
    cCi<                        S r SSKJrJrJr  SSKrSSKJr  SSKJr  SSK	J
r
  SSKJr  SSKJrJrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  \R4                  " \5      r " S S\R:                  5      r " S S\R:                  5      r " S S\R:                  5      r  " S S\R:                  5      r!\ " S S\
5      5       r"\ " S S\"5      5       r#\" SS9 " S S\"5      5       r$\" SS9 " S S\"\5      5       r%/ S Qr&g)!zPyTorch TextNet model.    )AnyOptionalUnionN)Tensor)PreTrainedModel)ACT2CLS)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)TextNetConfig)logging)BackboneMixin   )auto_docstringc                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )TextNetConvLayer)   configc           	        > [         TU ]  5         UR                  U l        UR                  U l        UR                  U l        [        UR                  [        5      (       a$  UR                  S   S-  UR                  S   S-  4OUR                  S-  n[        R                  " UR                  UR                  UR                  UR                  USS9U l        [        R                  " UR                  UR                   5      U l        [        R$                  " 5       U l        U R                  b  [(        U R                     " 5       U l        g g )Nr         F)kernel_sizestridepaddingbias)super__init__stem_kernel_sizer   stem_strider   stem_act_funcactivation_function
isinstancetuplennConv2dstem_num_channelsstem_out_channelsconvBatchNorm2dbatch_norm_eps
batch_normIdentity
activationr   )selfr   r   	__class__s      f/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/textnet/modeling_textnet.pyr   TextNetConvLayer.__init__*   s   !22((#)#7#7  &11599 "a'););A)>!)CD((A- 	 II$$$$//%%
	 ..)A)A6CXCXY++-##/%d&>&>?ADO 0    hidden_statesreturnc                 h    U R                  U5      nU R                  U5      nU R                  U5      $ N)r)   r,   r.   )r/   r4   s     r1   forwardTextNetConvLayer.forwardE   s-    		-06}--r3   )r.   r"   r,   r)   r   r   )__name__
__module____qualname____firstlineno__r   r   torchr   r8   __static_attributes____classcell__r0   s   @r1   r   r   )   s1    B} B6.U\\ .ell . .r3   r   c            
       ~   ^  \ rS rSrSrS\S\S\S\S\4
U 4S jjrS	\R                  S
\R                  4S jr
SrU =r$ )TextNetRepConvLayerK   a  
This layer supports re-parameterization by combining multiple convolutional branches
(e.g., main convolution, vertical, horizontal, and identity branches) during training.
At inference time, these branches can be collapsed into a single convolution for
efficiency, as per the re-parameterization paradigm.

The "Rep" in the name stands for "re-parameterization" (introduced by RepVGG).
r   in_channelsout_channelsr   r   c           	      P  > [         T	U ]  5         X l        X0l        X@l        XPl        US   S-
  S-  US   S-
  S-  4n[        R                  " 5       U l        [        R                  " UUUUUSS9U l
        [        R                  " X1R                  S9U l        US   S-
  S-  S4nSUS   S-
  S-  4nUS   S:w  aG  [        R                  " UUUS   S4UUSS9U l        [        R                  " X1R                  S9U l        OSu  U l        U l        US   S:w  aG  [        R                  " UUSUS   4UUSS9U l        [        R                  " X1R                  S9U l        OSu  U l        U l        X2:X  a*  US:X  a$  [        R                  " X!R                  S9U l        g S U l        g )Nr   r   r   F)rE   rF   r   r   r   r   )num_featuresepsNN)r   r   num_channelsrF   r   r   r%   ReLUr"   r&   	main_convr*   r+   main_batch_normvertical_convvertical_batch_normhorizontal_convhorizontal_batch_normrbr_identity)
r/   r   rE   rF   r   r   r   vertical_paddinghorizontal_paddingr0   s
            r1   r   TextNetRepConvLayer.__init__U   s   '(&NQ&1,{1~/Aa.GH#%779 #%#
  "~~<MbMbc(^a/A5q9+a.1"4!:;q>Q!#')(^Q/("D (*~~<UjUj'kD$;E8D 8q>Q#%99')A/*$D  *,\WlWl)mD&?I<D $"< *v{ NN9N9NO 	  	r3   r4   r5   c                 x   U R                  U5      nU R                  U5      nU R                  b&  U R                  U5      nU R                  U5      nX#-   nU R                  b&  U R	                  U5      nU R                  U5      nX$-   nU R                  b  U R                  U5      nX%-   nU R                  U5      $ r7   )rM   rN   rO   rP   rQ   rR   rS   r"   )r/   r4   main_outputsvertical_outputshorizontal_outputsid_outs         r1   r8   TextNetRepConvLayer.forward   s    ~~m4++L9 )#11-@#778HI':L +!%!5!5m!D!%!;!;<N!O'<L(&&}5F'0L''55r3   )r"   rR   rQ   r   rN   rM   rK   rF   rS   r   rP   rO   )r:   r;   r<   r=   __doc__r   intr   r>   r   r8   r?   r@   rA   s   @r1   rC   rC   K   sS    7
} 7
3 7
c 7
`c 7
mp 7
r6U\\ 6ell 6 6r3   rC   c                   :   ^  \ rS rSrS\S\4U 4S jjrS rSrU =r	$ )TextNetStage   r   depthc                 r  > [         TU ]  5         UR                  U   nUR                  U   n[	        U5      nUR
                  U   nUR
                  US-      nU/U/US-
  -  -   nU/U-  n	/ n
[        XX45       H  nU
R                  [        U/UQ76 5        M      [        R                  " U
5      U l        g )Nr   )r   r   conv_layer_kernel_sizesconv_layer_strideslenhidden_sizeszipappendrC   r%   
ModuleListstage)r/   r   rb   r   r   
num_layersstage_in_channel_sizestage_out_channel_sizerE   rF   rk   stage_configr0   s               r1   r   TextNetStage.__init__   s    44U;**51%
 & 3 3E :!'!4!4UQY!?,-1G0HJYZN0[[./*<;OLLL,VClCD P]]5)
r3   c                 <    U R                    H  nU" U5      nM     U$ r7   rk   )r/   hidden_stateblocks      r1   r8   TextNetStage.forward   s     ZZE .L  r3   rr   )
r:   r;   r<   r=   r   r^   r   r8   r?   r@   rA   s   @r1   r`   r`      s     *} *S *" r3   r`   c            	       r   ^  \ rS rSrS\4U 4S jjr  S
S\R                  S\\	   S\\	   S\
4S jjrS	rU =r$ )TextNetEncoder   r   c                    > [         TU ]  5         / n[        UR                  5      n[	        U5       H  nUR                  [        X5      5        M     [        R                  " U5      U l	        g r7   )
r   r   rf   rd   rangeri   r`   r%   rj   stages)r/   r   r{   
num_stagesstage_ixr0   s        r1   r   TextNetEncoder.__init__   sU    778
j)HMM,v89 * mmF+r3   rs   output_hidden_statesreturn_dictr5   c                     U/nU R                    H  nU" U5      nUR                  U5        M     U(       d  U4nU(       a  Xd4-   $ U$ [        XS9$ )N)last_hidden_stater4   )r{   ri   r
   )r/   rs   r   r   r4   rk   outputs          r1   r8   TextNetEncoder.forward   s[     &[[E .L  . ! "_F0D6,,P&P-jjr3   )r{   rJ   )r:   r;   r<   r=   r   r   r>   r   r   boolr
   r8   r?   r@   rA   s   @r1   rw   rw      sY    ,} , 04&*	kllk 'tnk d^	k
 
(k kr3   rw   c                   .    \ rS rSr% \\S'   SrSrS rSr	g)TextNetPreTrainedModel   r   textnetpixel_valuesc                 &   [        U[        R                  [        R                  45      (       ak  UR                  R
                  R                  SU R                  R                  S9  UR                  b%  UR                  R
                  R                  5         g g [        U[        R                  5      (       aX  UR                  R
                  R                  S5        UR                  b%  UR                  R
                  R                  5         g g g )Ng        )meanstdg      ?)r#   r%   Linearr&   weightdatanormal_r   initializer_ranger   zero_r*   fill_)r/   modules     r1   _init_weights$TextNetPreTrainedModel._init_weights   s    fryy"))455MM&&CT[[5R5R&S{{&  &&( '//MM$$S){{&  &&( ' 0r3    N)
r:   r;   r<   r=   r   __annotations__base_model_prefixmain_input_namer   r?   r   r3   r1   r   r      s    !$O)r3   r   c                      ^  \ rS rSrU 4S jr\ S	S\S\\   S\\   S\	\
\\\   4   \
\   \4   4S jj5       rSrU =r$ )
TextNetModel   c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        [        R                  " S5      U l        U R                  5         g )N)r   r   )
r   r   r   stemrw   encoderr%   AdaptiveAvgPool2dpooler	post_initr/   r   r0   s     r1   r   TextNetModel.__init__   sD     $V,	%f-**62r3   r   r   r   r5   c                 D   Ub  UOU R                   R                  nUb  UOU R                   R                  nU R                  U5      nU R	                  XBUS9nUS   nU R                  U5      nU(       d  Xg4nU(       a  XS   4-   $ U$ [        UUU(       a  US   S9$ S S9$ )Nr   r   r   r   )r   pooler_outputr4   )r   use_return_dictr   r   r   r   r   )	r/   r   r   r   rs   encoder_outputsr   pooled_outputr   s	            r1   r8   TextNetModel.forward   s     &1%<k$++B]B]$8$D $++JjJj 	 yy.,,Q\ ' 
 ,A.$56'7F5I6Q/11UvU7/'0D/!,
 	
 KO
 	
r3   )r   r   r   rJ   )r:   r;   r<   r=   r   r   r   r   r   r   r$   r   listr   r8   r?   r@   rA   s   @r1   r   r      sg     os
"
:B4.
^fgk^l
	uS$s)^$eCj2ZZ	[
 
r3   r   z
    TextNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                      ^  \ rS rSrU 4S jr\    S
S\\R                     S\\R                     S\\
   S\\
   S\4
S jj5       rS	rU =r$ )TextNetForImageClassificationi  c                   > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " S5      U l        [
        R                  " 5       U l	        UR                  S:  a.  [
        R                  " UR                  S   UR                  5      O[
        R                  " 5       U l        [
        R                  " U R                  U R                  /5      U l        U R!                  5         g )N)r   r   r   )r   r   
num_labelsr   r   r%   r   avg_poolFlattenflattenr   rg   r-   fcrj   
classifierr   r   s     r1   r   &TextNetForImageClassification.__init__  s      ++#F+,,V4zz|KQK\K\_`K`"))F//3V5F5FGfhfqfqfs --(EF 	r3   r   labelsr   r   r5   c                 Z   Ub  UOU R                   R                  nU R                  XUS9nUS   nU R                   H  nU" U5      nM     U R	                  U5      nSn	Ub  U R                  X(U R                   5      n	U(       d  U4USS -   n
U	b  U	4U
-   $ U
$ [        XUR                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:
```python
>>> import torch
>>> import requests
>>> from transformers import TextNetForImageClassification, TextNetImageProcessor
>>> from PIL import Image

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> processor = TextNetImageProcessor.from_pretrained("czczup/textnet-base")
>>> model = TextNetForImageClassification.from_pretrained("czczup/textnet-base")

>>> inputs = processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
...     outputs = model(**inputs)
>>> outputs.logits.shape
torch.Size([1, 2])
```Nr   r   r   )losslogitsr4   )r   r   r   r   r   loss_functionr   r4   )r/   r   r   r   r   outputsr   layerr   r   r   s              r1   r8   %TextNetForImageClassification.forward&  s    B &1%<k$++B]B],,|do,p#AJ__E %&7 8 %*+%%fdkkBDY,F'+'7D7V#CVC3\c\q\qrrr3   )r   r   r   r   r   r   )NNNN)r:   r;   r<   r=   r   r   r   r>   FloatTensor
LongTensorr   r   r8   r?   r@   rA   s   @r1   r   r     s      59-1/3&*0su0010s ))*0s 'tn	0s
 d^0s 
.0s 0sr3   r   zP
    TextNet backbone, to be used with frameworks like DETR and MaskFormer.
    c                   r   ^  \ rS rSrSrU 4S jr\ S
S\S\\	   S\\	   S\
\\   \4   4S jj5       rS	rU =r$ )TextNetBackboneiZ  Fc                    > [         TU ]  U5        [         TU ]	  U5        [        U5      U l        UR
                  U l        U R                  5         g r7   )r   r   _init_backboner   r   rg   rH   r   r   s     r1   r   TextNetBackbone.__init__b  sC     v&#F+"// 	r3   r   r   r   r5   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nU R                  USUS9nU(       a  UR                  OUS   nSn[        U R                  5       H  u  pxXR                  ;   d  M  XeU   4-  nM      U(       d)  U4n	U(       a  U(       a  UR                  OUS   nX4-  n	U	$ [        UU(       a  UR                  SS9$ SSS9$ )a  
Examples:

```python
>>> import torch
>>> import requests
>>> from PIL import Image
>>> from transformers import AutoImageProcessor, AutoBackbone

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> processor = AutoImageProcessor.from_pretrained("czczup/textnet-base")
>>> model = AutoBackbone.from_pretrained("czczup/textnet-base")

>>> inputs = processor(image, return_tensors="pt")
>>> with torch.no_grad():
>>>     outputs = model(**inputs)
```NTr   r   r   )feature_mapsr4   
attentions)	r   r   r   r   r4   	enumeratestage_namesout_featuresr	   )
r/   r   r   r   r   r4   r   idxrk   r   s
             r1   r8   TextNetBackbone.forwardl  s    . &1%<k$++B]B]$8$D $++JjJj 	 ,,|$T_,`1<--'!*#D$4$45JC)))s!3 55 6 "_F#9D 5 5'RS***M%3G'//
 	
MQ
 	
r3   )rH   r   rJ   )r:   r;   r<   r=   has_attentionsr   r   r   r   r   r   r$   r	   r8   r?   r@   rA   s   @r1   r   r   Z  s^     N os/
"/
:B4./
^fgk^l/
	uU|^+	,/
 /
r3   r   )r   r   r   r   )'r]   typingr   r   r   r>   torch.nnr%   r   transformersr   transformers.activationsr   transformers.modeling_outputsr	   r
   r   r   1transformers.models.textnet.configuration_textnetr   transformers.utilsr   !transformers.utils.backbone_utilsr   utilsr   
get_loggerr:   loggerModuler   rC   r`   rw   r   r   r   r   __all__r   r3   r1   <module>r      s&    ' '    ( ,  L & ; # 
		H	%.ryy .DW6")) W6t299 0kRYY k: )_ ) )  "
) "
 "
J @s$: @s@sF 
=
,m =

=
@ ir3   