
    +h*#                        S SK JrJrJr  S SKrS SKJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJr  SS\R                  S	\R                  S
\\   4S jjr " S S\5      r " S S\R&                  5      r " S S\R&                  5      rg)    )OptionalTupleUnionN)nn)CLIPPreTrainedModel)BaseModelOutputWithPooling)CLIPTextConfig)CLIPEncodermaskdtypetgt_lenc                 2   U R                  5       u  p4Ub  UOUnU SS2SSSS24   R                  USX$5      R                  U5      nSU-
  nUR                  UR                  [        R
                  5      [        R                  " U5      R                  5      $ )zW
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
N   g      ?)sizeexpandtomasked_filltorchboolfinfomin)r   r   r   bszsrc_lenexpanded_maskinverted_masks          n/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py_expand_maskr      s     99;LC ,g'GD$)*11#q'KNNuUM-'M$$]%5%5ejj%A5;;uCUCYCYZZ    c                      ^  \ rS rSr\rS/rS\4U 4S jjr        SS\R                  S\
S\\R                     S\\R                     S	\\R                     S
\\   S\\   S\\   S\\\4   4S jjrSrU =r$ )ContextCLIPTextModel*   CLIPEncoderLayerconfigc                 d   > [         TU ]  U5        [        U5      U l        U R	                  5         g N)super__init__ContextCLIPTextTransformer
text_model	post_init)selfr#   	__class__s     r   r'   ContextCLIPTextModel.__init__/   s&     4V<r   ctx_embeddingsctx_begin_pos	input_idsattention_maskposition_idsoutput_attentionsoutput_hidden_statesreturn_dictreturnc	                 .    U R                  UUUUUUUUS9$ )N)r.   r/   r0   r1   r2   r3   r4   r5   r)   )	r+   r.   r/   r0   r1   r2   r3   r4   r5   s	            r   forwardContextCLIPTextModel.forward5   s2     )')%/!5#  	
 		
r   r8   )NNNNNNNN)__name__
__module____qualname____firstlineno__r	   config_class_no_split_modulesr'   r   Tensorlistr   r   r   r   r   r9   __static_attributes____classcell__r,   s   @r   r    r    *   s    !L+,~  (,",015/3,0/3&*

 
 ELL)	

 !.
 u||,
 $D>
 'tn
 d^
 
u00	1
 
r   r    c                      ^  \ rS rSrS\4U 4S jjr      SS\R                  S\S\	\R                     S\	\R                     S\	\R                     S	\	\
   S
\	\
   S\	\
   S\\\4   4S jjrS rSrU =r$ )r(   L   r#   c                    > [         TU ]  5         Xl        UR                  n[	        U5      U l        [        U5      U l        [        R                  " U5      U l
        g r%   )r&   r'   r#   hidden_sizeContextCLIPTextEmbeddings
embeddingsr
   encoderr   	LayerNormfinal_layer_normr+   r#   	embed_dimr,   s      r   r'   #ContextCLIPTextTransformer.__init__M   sG    &&	3F;"6* "Y 7r   r.   r/   r0   r1   r2   r3   r4   r5   r6   c	           	      h   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eUR                  5       n	UR                  SU	S   5      nU R                  UUUUS9n
U	u  pUb  XR                  S5      -  nU R                  XU
R                  5      R                  U
R                  5      nUb  [        XJR                  5      nU R                  U
UUUUUS9nUS   nU R                  U5      nU[        R                   " UR"                  S   UR                  S9UR                  [        R$                  5      R'                  SS	94   nU(       d
  UU4USS -   $ [)        UUUR*                  UR,                  S
9$ )z
Returns:

Nz$You have to specify either input_ids)r0   r2   r.   r/   r   )inputs_embedsr1   causal_attention_maskr3   r4   r5   r   )devicedim)last_hidden_statepooler_outputhidden_states
attentions)r#   r3   r4   use_return_dict
ValueErrorr   viewrK   _build_causal_attention_maskr   r   rV   r   rL   rN   r   arangeshapeintargmaxr   r[   r\   )r+   r.   r/   r0   r1   r2   r3   r4   r5   input_shaper[   r   seq_lenrU   encoder_outputsrY   pooled_outputs                    r   r9   "ContextCLIPTextTransformer.forwardU   s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]CDDnn&NN2{27	%)'	 ( 
 #%**1--G !% A A#P]PcPc d g g  !
 %).:M:MNN,,')"7/!5# ' 
 ,A. 112CD
 *LL*003I<L<LMLL#**r*24

 %}58KKK)/')77&11	
 	
r   c                     [         R                  " XX#S9nUR                  [         R                  " [         R                  " U5      R
                  5      5        UR                  S5        UR                  S5      nU$ )N)r   r   )r   emptyfill_tensorr   r   triu_	unsqueeze)r+   r   rf   r   r   s        r   r`   7ContextCLIPTextTransformer._build_causal_attention_mask   sV     {{3>

5<<E 2 6 678

1~~a r   )r#   rK   rL   rN   )NNNNNN)r;   r<   r=   r>   r	   r'   r   rA   rB   r   r   r   r   r   r9   r`   rC   rD   rE   s   @r   r(   r(   L   s    8~ 8 -115/3,0/3&*K
K
 K
 ELL)	K

 !.K
 u||,K
 $D>K
 'tnK
 d^K
 
u00	1K
Z r   r(   c                      ^  \ rS rSrS\4U 4S jjr   SS\R                  S\S\	\R                     S\	\R                     S\	\R                     S	\R                  4S
 jjrSrU =r$ )rJ      r#   c                 P  > [         TU ]  5         UR                  n[        R                  " UR
                  U5      U l        [        R                  " UR                  U5      U l        U R                  S[        R                  " UR                  5      R                  S5      5        g )Nr2   )r   rS   )r&   r'   rI   r   	Embedding
vocab_sizetoken_embeddingmax_position_embeddingsposition_embeddingregister_bufferr   ra   r   rO   s      r   r'   "ContextCLIPTextEmbeddings.__init__   sx    &&	!||F,=,=yI"$,,v/M/My"Y 	^U\\&:X:X-Y-`-`ah-ijr   r.   r/   r0   r2   rT   r6   c                    Uc  SnOUR                   S   nUb  UR                   S   OUR                   S   U-   nUc  U R                  S S 2S U24   nUc  U R                  U5      n/ nUR                   S   n	Ubd  [        U	5       H@  n
X*   nXZS U24   nXZUS 24   nUR	                  [
        R                  " XU
   U/SS95        MB     [
        R                  " USS9nU R                  U5      nX^-   nU$ )Nr   r   rS   rW   )	rb   r2   rv   rangeappendr   catstackrx   )r+   r.   r/   r0   r2   rT   ctx_len
seq_lengthinput_embeds_ctxr   icbpprefixsuffixposition_embeddingsrK   s                   r   r9   !ContextCLIPTextEmbeddings.forward   s#    !G$**1-G-6-Bioob)H[H[\^H_cjj
,,Q^<L  00;M  "%%a(C)sA'*C*dsd73F*cd73F$++EIIva?PRX6Y_`,ab $ !&,<! D"55lC"8
r   )rx   rv   )NNN)r;   r<   r=   r>   r	   r'   r   rA   rB   r   
LongTensorr9   rC   rD   rE   s   @r   rJ   rJ      s    k~ k 153704(( ( E,,-	(
 u//0(  -( 
( (r   rJ   r%   )typingr   r   r   r   r   transformersr   transformers.modeling_outputsr   +transformers.models.clip.configuration_clipr	   &transformers.models.clip.modeling_clipr
   rA   r   rc   r   r    Moduler(   rJ    r   r   <module>r      sz    * )   , D F >[u|| [EKK [(3- ["
. 
D] ]@3		 3r   