
    +h_                     @   S SK r S SKJrJrJrJr  S SKrS SKJr  S SK	rS SK
JrJrJr  S SKJr  S SKJr  S SKJr  SSKJrJrJrJr  SS	KJrJrJr  SS
KJr  SSKJ r   SSK!J"r"J#r#  \" 5       (       a  S SK$J%s  J&r'  Sr(OSr( " S S\"5      r) \RT                  " \+5      r,S/r-SS0r.  " S S\5      r/S$S\R`                  S\Rb                  S\\2   4S jjr3 " S S\Rh                  5      r5 " S S\Rh                  5      r6 " S S\5      r7 " S  S!\75      r8 " S" S#\75      r9g)%    N)ListOptionalTupleUnion)PretrainedConfigPreTrainedModelPreTrainedTokenizer)ACT2FN)BaseModelOutput)logging   )AutoencoderKLUNet2DConditionModelUNet2DModelVQModel)DDIMSchedulerLMSDiscreteSchedulerPNDMScheduler)is_torch_xla_available)randn_tensor   )DiffusionPipelineImagePipelineOutputTFc                   r  ^  \ rS rSrSrSrS\\\4   S\	S\
S\\\4   S\\\\4   4
U 4S	 jjr\R$                  " 5                SS
\\\\   4   S\\   S\\   S\\   S\\   S\\   S\\\R0                  \\R0                     4      S\\R2                     S\\   S\S\\\4   4S jj5       rSrU =r$ )LDMTextToImagePipeline)   a  
Pipeline for text-to-image generation using latent diffusion.

This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
implemented for all pipelines (downloading, saving, running on a particular device, etc.).

Parameters:
    vqvae ([`VQModel`]):
        Vector-quantized (VQ) model to encode and decode images to and from latent representations.
    bert ([`LDMBertModel`]):
        Text-encoder model based on [`~transformers.BERT`].
    tokenizer ([`~transformers.BertTokenizer`]):
        A `BertTokenizer` to tokenize text.
    unet ([`UNet2DConditionModel`]):
        A `UNet2DConditionModel` to denoise the encoded image latents.
    scheduler ([`SchedulerMixin`]):
        A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
        [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
zbert->unet->vqvaevqvaebert	tokenizerunet	schedulerc                    > [         TU ]  5         U R                  XX4US9  S[        U R                  R
                  R                  5      S-
  -  U l        g )N)r   r   r   r    r!   r      )super__init__register_moduleslenr   configblock_out_channelsvae_scale_factor)selfr   r   r   r    r!   	__class__s         x/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.pyr%   LDMTextToImagePipeline.__init__@   sN     	E	`ij !c$***;*;*N*N&ORS&S T    promptheightwidthnum_inference_stepsguidance_scaleeta	generatorlatentsoutput_typereturn_dictreturnc                    U=(       d-    U R                   R                  R                  U R                  -  nU=(       d-    U R                   R                  R                  U R                  -  n[	        U[
        5      (       a  SnO8[	        U[        5      (       a  [        U5      nO[        S[        U5       35      eUS-  S:w  d	  US-  S:w  a  [        SU SU S35      eUS:w  aN  U R                  S	/U-  S
SSSS9nU R                  UR                  R                  U R                  5      5      S   nU R                  US
SSSS9nU R                  UR                  R                  U R                  5      5      S   nXR                   R                  R                  US-  US-  4n[	        U[        5      (       a*  [        U5      U:w  a  [        S[        U5       SU S35      eUc   [!        UXpR                  UR"                  S9nO+UR$                  U:w  a  [        SUR$                   SU 35      eUR                  U R                  5      nU R&                  R)                  U5        S[+        [,        R.                  " U R&                  R0                  5      R2                  R5                  5       5      ;   n0 nU(       a  UUS'   U R7                  U R&                  R8                  5       H  nUS:X  a  UnUnO2[:        R<                  " U/S-  5      n[:        R<                  " WU/5      nU R                  UUUS9R>                  nUS:w  a  URA                  S5      u  nnUUUU-
  -  -   nU R&                  R0                  " UUU40 UD6RB                  n[D        (       d  M  [F        RH                  " 5         M     SU RJ                  R                  RL                  -  U-  nU RJ                  RO                  U5      R>                  nUS-  S-   RQ                  SS5      nURS                  5       RU                  SSSS5      RW                  5       nU	S:X  a  U RY                  U5      nU
(       d  U4$ [[        US9$ )a	  
The call function to the pipeline for generation.

Args:
    prompt (`str` or `List[str]`):
        The prompt or prompts to guide the image generation.
    height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The height in pixels of the generated image.
    width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
        The width in pixels of the generated image.
    num_inference_steps (`int`, *optional*, defaults to 50):
        The number of denoising steps. More denoising steps usually lead to a higher quality image at the
        expense of slower inference.
    guidance_scale (`float`, *optional*, defaults to 1.0):
        A higher guidance scale value encourages the model to generate images closely linked to the text
        `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
    generator (`torch.Generator`, *optional*):
        A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
        generation deterministic.
    latents (`torch.Tensor`, *optional*):
        Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
        generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
        tensor is generated by sampling using the supplied random `generator`.
    output_type (`str`, *optional*, defaults to `"pil"`):
        The output format of the generated image. Choose between `PIL.Image` or `np.array`.
    return_dict (`bool`, *optional*, defaults to `True`):
        Whether or not to return a [`ImagePipelineOutput`] instead of a plain tuple.

Example:

```py
>>> from diffusers import DiffusionPipeline

>>> # load model and scheduler
>>> ldm = DiffusionPipeline.from_pretrained("CompVis/ldm-text2im-large-256")

>>> # run pipeline in inference (sample random noise and denoise)
>>> prompt = "A painting of a squirrel eating a burger"
>>> images = ldm([prompt], num_inference_steps=50, eta=0.3, guidance_scale=6).images

>>> # save images
>>> for idx, image in enumerate(images):
...     image.save(f"squirrel-{idx}.png")
```

Returns:
    [`~pipelines.ImagePipelineOutput`] or `tuple`:
        If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
        returned where the first element is a list with the generated images.
r#   z2`prompt` has to be of type `str` or `list` but is    r   z7`height` and `width` have to be divisible by 8 but are z and .      ? 
max_lengthM   Tpt)paddingr@   
truncationreturn_tensorsz/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r6   devicedtypezUnexpected latents shape, got z, expected r5   r   )encoder_hidden_statesg      ?r   pil)images).r    r(   sample_sizer*   
isinstancestrlistr'   
ValueErrortyper   r   	input_idsto_execution_devicein_channelsr   rG   shaper!   set_timestepssetinspect	signaturestep
parameterskeysprogress_bar	timestepstorchcatsamplechunkprev_sampleXLA_AVAILABLExm	mark_stepr   scaling_factordecodeclampcpupermutenumpynumpy_to_pilr   )r+   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   kwargs
batch_sizeuncond_inputnegative_prompt_embeds
text_inputprompt_embedslatents_shapeaccepts_etaextra_kwargstlatents_inputcontext
noise_prednoise_pred_uncondnoise_prediction_textimages                              r-   __call__LDMTextToImagePipeline.__call__L   s#   D O499++77$:O:OOM))558M8MMfc""J%%VJQRVW]R^Q_`aaA:?eai1nVW]V^^cdicjjklmm S >>z!<BSWhl * L &*YY|/E/E/H/HI_I_/`%abc%d" ^^FLR\`qu^v
		*"6"6"9"9$:P:P"QRSTU $YY%5%5%A%A6Q;PUYZPZ[i&&3y>Z+GA#i.AQ R&<'gi 
 ?";Q;QYfYlYlG }}- #A'--P[\i[j!kll**T334$$%89 s7#4#4T^^5H5H#I#T#T#Y#Y#[\\"%L""4>>#;#;<A$ ''
 !&		7)a- 8))%;]$KL =!7SZZJ$;E;K;KA;N8!#8.CX[lCl1mm
 nn))*aQLQ]]G}/ =4 djj''666@

!!'*11S''1-		##Aq!Q/557%%%e,E8O"%00r/   )r*   )	NN2   r>           NNrI   T) __name__
__module____qualname____firstlineno____doc__model_cpu_offload_seqr   r   r   r   r	   r   r   r   r   r   r%   r_   no_gradrM   r   r   intfloat	GeneratorTensorboolr   r   r~   __static_attributes____classcell__r,   s   @r-   r   r   )   sr   ( 0
UWm+,
U 
U '	
U
 K!556
U 7KKL
U ]]_ !%#-/*-"MQ*.%* X1c49n%X1 X1 }	X1
 &c]X1 !X1 e_X1 E%//43H"HIJX1 %,,'X1 c]X1 X1 
u))	*X1 X1r/   r   zldm-bertz>https://huggingface.co/valhalla/ldm-bert/blob/main/config.jsonc                   b   ^  \ rS rSrSrS/rSSS.r                 S	U 4S jjrSrU =r	$ )
LDMBertConfig   ldmbertpast_key_valuesencoder_attention_headsd_model)num_attention_headshidden_sizec                    > Xl         X l        Xl        X@l        X0l        XPl        X`l        Xl        Xl        Xl	        Xl
        Xl        Xpl        Xl        UU l        X0l        Xl        ["        TU ]H  " SSU0UD6  g )Npad_token_id )
vocab_sizemax_position_embeddingsr   encoder_ffn_dimencoder_layersr   head_dimdropoutattention_dropoutactivation_dropoutactivation_functioninit_stdencoder_layerdropclassifier_dropout	use_cachenum_hidden_layersscale_embeddingr$   r%   )r+   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rn   r,   s                      r-   r%   LDMBertConfig.__init__  s|    * %'>$.,'>$ !2"4#6  !2"4"!/.=l=f=r/   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )i:w  rA       i   r<   @   r   gelui   g?r   r   g{Gz?r   FTr   )
r   r   r   r   
model_typekeys_to_ignore_at_inferenceattribute_mapr%   r   r   r   s   @r-   r   r      s[    J#4"5,EV_`M  " !"%'> '>r/   r   maskrG   tgt_lenc                 2   U R                  5       u  p4Ub  UOUnU SS2SSSS24   R                  USX$5      R                  U5      nSU-
  nUR                  UR                  [        R
                  5      [        R                  " U5      R                  5      $ )zW
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
Nr#   r>   )sizeexpandrR   masked_fillr_   r   finfomin)r   rG   r   bszsrc_lenexpanded_maskinverted_masks          r-   _expand_maskr   -  s     99;LC ,g'GD$)*11#q'KNNuUM-'M$$]%5%5ejj%A5;;uCUCYCYZZr/   c                     ^  \ rS rSrSr   SS\S\S\S\S\S\4U 4S	 jjjrS
\	R                  S\S\4S jr     SS\	R                  S\\	R                     S\\\	R                        S\\	R                     S\\	R                     S\S\\	R                  \\	R                     \\\	R                        4   4S jjrSrU =r$ )LDMBertAttentioni<  z=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsr   r   
is_decoderbiasc                   > [         TU ]  5         Xl        X l        X@l        X0l        X2-  U l        U R
                  S-  U l        XPl        [        R                  " XR                  US9U l        [        R                  " XR                  US9U l        [        R                  " XR                  US9U l        [        R                  " U R                  U5      U l        g )Ng      )r   )r$   r%   r   r   r   r   	inner_dimscalingr   nnLineark_projv_projq_projout_proj)r+   r   r   r   r   r   r   r,   s          r-   r%   LDMBertAttention.__init__?  s     	"" !-}}d*$ii	>>Eii	>>Eii	>>E		$..)<r/   tensorseq_lenr   c                     UR                  X2U R                  U R                  5      R                  SS5      R	                  5       $ )Nr#   r   )viewr   r   	transpose
contiguous)r+   r   r   r   s       r-   _shapeLDMBertAttention._shapeW  s5    {{3GQQRSUVWbbddr/   hidden_stateskey_value_statespast_key_valueattention_masklayer_head_maskoutput_attentionsr:   c                 	   USLnUR                  5       u  pn
U R                  U5      U R                  -  nU(       a  Ub  US   nUS   nGOU(       aE  U R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      nOUby  U R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      n[        R                  " US   U/SS9n[        R                  " US   U/SS9nODU R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      nU R                  (       a  X4nXR                  -  SU R                  4nU R                  XU5      R                  " U6 nUR                  " U6 nUR                  " U6 nUR                  S5      n[        R                  " XR                  SS5      5      nUR                  5       XR                  -  X4:w  a-  [        SXR                  -  X4 SUR                  5        35      eUbv  UR                  5       USX4:w  a"  [        S	USX4 SUR                  5        35      eUR                  XR                  X5      U-   nUR                  XR                  -  X5      n[        R                   R#                  USS9nUb  UR                  5       U R                  4:w  a*  [        S
U R                  4 SUR                  5        35      eUR                  SSSS5      UR                  XR                  X5      -  nUR                  XR                  -  X5      nU(       a;  UR                  XR                  X5      nUR                  XR                  -  X5      nOSn[        R                   R%                  UU R$                  U R&                  S9n[        R                  " UU5      nUR                  5       XR                  -  XR                  4:w  a5  [        SXR                  XR                  4 SUR                  5        35      eUR                  XR                  XR                  5      nUR                  SS5      nUR)                  XU R*                  5      nU R-                  U5      nUUU4$ )z#Input shape: Batch x Time x ChannelNr   r#   r   )dimz$Attention weights should be of size z	, but is z!Attention mask should be of size z/Head mask for a single layer should be of size ptrainingz `attn_output` should be of size )r   r   r   r   r   r   r_   r`   r   r   r   r   bmmr   rO   r   
functionalsoftmaxr   r   reshaper   r   )r+   r   r   r   r   r   r   is_cross_attentionr   r   _query_states
key_statesvalue_states
proj_shaper   attn_weightsattn_weights_reshaped
attn_probsattn_outputs                       r-   forwardLDMBertAttention.forwardZ  s    .T9',,.a {{=1DLL@."<'*J)!,LT[[1A%BBLJ;;t{{3C'Db#NL'T[[%?SIJ;;t{{='A2sKLN1$5z#BJJ 99nQ&7%FANL T[[%?SIJ;;t{{='A2sKL?? )7NNN*B>
{{<#>CCZP__j1
#((*5//!$yy/C/CAq/IJ3#7"JJ6nn8Lg7_6` a %%'(* 
 %""$a(BB 7a8R7SS\]k]p]p]r\st  (,,S..'SVddL',,S>>-A7TL}},,\r,B&##%$..):: Et~~FWEX Y',,./1  +//2q!<|?P?PQTVdVdfm?wwL',,S>>-A7TL
 %1$5$5c>>7$\!055cNN6JG]L$(!]]**<4<<RVR_R_*`
ii
L9#"6!OO2CR_R_3`2a b$$&') 
 "&&sNNG]]S!++Aq1 "))#GmmK01>AAr/   )r   r   r   r   r   r   r   r   r   r   r   )r   FF)NNNNF)r   r   r   r   r   r   r   r   r%   r_   r   r   r   r   r   r   r   r   s   @r-   r   r   <  s<   G  == = 	=
 = = = =0eU\\ eC ec e 488<1526"'oB||oB #5<<0oB !u||!45	oB
 !.oB "%,,/oB  oB 
u||Xell3XeELL>Q5RR	SoB oBr/   r   c                      ^  \ rS rSrS\4U 4S jjr SS\R                  S\R                  S\R                  S\\	   S\
\R                  \\R                     4   4
S	 jjrS
rU =r$ )LDMBertEncoderLayeri  r(   c                 ~  > [         TU ]  5         UR                  U l        [	        U R                  UR
                  UR                  UR                  S9U l        [        R                  " U R                  5      U l        UR                  U l        [        UR                     U l        UR                   U l        [        R"                  " U R                  UR$                  5      U l        [        R"                  " UR$                  U R                  5      U l        [        R                  " U R                  5      U l        g )N)r   r   r   r   )r$   r%   r   r   r   r   r   r   	self_attnr   	LayerNormself_attn_layer_normr   r
   r   activation_fnr   r   r   fc1fc2final_layer_normr+   r(   r,   s     r-   r%   LDMBertEncoderLayer.__init__  s    )nn44__,,	
 %'LL$@!~~#F$>$>?"(";";99T^^V-C-CD99V33T^^D "T^^ <r/   r   r   r   r   r:   c                    UnU R                  U5      nU R                  UUUUS9u  pn[        R                  R	                  XR                  U R
                  S9nXQ-   nUnU R                  U5      nU R                  U R                  U5      5      n[        R                  R	                  XR                  U R
                  S9nU R                  U5      n[        R                  R	                  XR                  U R
                  S9nXQ-   nUR                  [        R                  :X  a  [        R                  " U5      R                  5       (       d)  [        R                   " U5      R                  5       (       aC  [        R"                  " UR                  5      R$                  S-
  n[        R&                  " X* US9nU4n	U(       a  X4-  n	U	$ )aH  
Args:
    hidden_states (`torch.Tensor`): input to the layer of shape `(seq_len, batch, embed_dim)`
    attention_mask (`torch.Tensor`): attention mask of size
        `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
    layer_head_mask (`torch.Tensor`): mask for attention heads in a given layer of size
        `(encoder_attention_heads,)`.
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
)r   r   r   r   r   i  )r   max)r   r   r   r   r   r   r   r   r   r   r   rG   r_   float16isinfanyisnanr   r  ri   )
r+   r   r   r   r   residualr   r   clamp_valueoutputss
             r-   r   LDMBertEncoderLayer.forward  s   $ !11-@)-')+/	 *8 *
&Q --m||VZVcVc-d 0 --m<**488M+BC--m?V?Vaeanan-o/--m||VZVcVc-d 0%--/KK&**,,M0J0N0N0P0P++m&9&9:>>EK!KK<[YM "&Gr/   )	r   r   r   r   r   r   r   r   r   )F)r   r   r   r   r   r%   r_   r   r   r   r   r   r   r   r   s   @r-   r   r     su    =} =, -20||0 0 	0
 $D>0 
u||Xell33	40 0r/   r   c                   >    \ rS rSr\rSrSrSS/rS r	\
S 5       rSrg	)
LDMBertPreTrainedModeli  modelTzencoder\.versionzdecoder\.versionc                 "   U R                   R                  n[        U[        R                  5      (       aW  UR
                  R                  R                  SUS9  UR                  b%  UR                  R                  R                  5         g g [        U[        R                  5      (       ad  UR
                  R                  R                  SUS9  UR                  b2  UR
                  R                  UR                     R                  5         g g g )Nr   )meanstd)r(   r   rL   r   r   weightdatanormal_r   zero_	Embeddingpadding_idx)r+   moduler  s      r-   _init_weights$LDMBertPreTrainedModel._init_weights  s    kk""fbii((MM&&CS&9{{&  &&( '--MM&&CS&9!!-""6#5#56<<> . .r/   c                     U R                   R                  n[        R                  " / SQSSSSU//U R                  S9nUR                  U5      US.nU$ )N)r      
      r   r   r<      r   )rF   )r   rQ   )r(   r   r_   r   rF   ne)r+   	pad_tokenrQ   dummy_inputss       r-   r   #LDMBertPreTrainedModel.dummy_inputs#  sW    KK,,	LL"2Q2q)4L!MVZVaVab	'll95"
 r/   r   N)r   r   r   r   r   config_classbase_model_prefix _supports_gradient_checkpointing"_keys_to_ignore_on_load_unexpectedr  propertyr   r   r   r/   r-   r  r    s8     L'+$*=?R)S&	?  r/   r  c                     ^  \ rS rSrSrS\4U 4S jjrS rS r        SS\	R                  S\\	R                     S	\\	R                     S
\\	R                     S\\	R                     S\\   S\\   S\\   S\\\4   4S jjrSrU =r$ )LDMBertEncoderi.  z
Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
[`LDMBertEncoderLayer`].

Args:
    config: LDMBertConfig
    embed_tokens (nn.Embedding): output embedding
r(   c                 8  > [         TU ]  U5        UR                  U l        UR                  nUR                  U l        UR                  U l        [        R                  " UR                  U5      U l        [        R                  " UR                  U5      U l        [        R                  " [        UR                  5       Vs/ s H  n[!        U5      PM     sn5      U l        [        R$                  " U5      U l        SU l        U R+                  5         g s  snf )NF)r$   r%   r   r   r   r  r   max_source_positionsr   r  r   embed_tokensembed_positions
ModuleListranger   r   layersr   
layer_normgradient_checkpointing	post_init)r+   r(   r   r   r,   s       r-   r%   LDMBertEncoder.__init__8  s     ~~NN	!..$*$B$B!LL):):IF!||F,J,JIVmm%PVPePeJf$gJfQ%8%@Jf$gh,,y1&+# %hs   Dc                     U R                   $ Nr+  )r+   s    r-   get_input_embeddings#LDMBertEncoder.get_input_embeddingsJ  s       r/   c                     Xl         g r5  r6  )r+   values     r-   set_input_embeddings#LDMBertEncoder.set_input_embeddingsM  s    !r/   rQ   r   position_ids	head_maskinputs_embedsr   output_hidden_statesr9   r:   c	                 t   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb&  UR                  5       n	UR                  SU	S   5      nO"Ub  UR                  5       SS n	O[	        S5      eUc  U R                  U5      nU	S   n
Uc=  [        R                  " U
[        R                  UR                  S9R                  S5      nU R                  U5      nX[-   n[        R                  R!                  XR                   U R"                  S9nUb  [%        X%R&                  5      nU(       a  S	OSnU(       a  S	OSnUb`  UR                  5       S
   [)        U R*                  5      :w  a6  [	        S[)        U R*                  5       SUR                  5       S
    S35      e[-        U R*                  5       H  u  nnU(       a  X4-   n[        R.                  " 5       (       a-  U R0                  (       a  U R3                  UUUUb  XO   OS5      nOU" UUUb  XO   OSUS9nUS
   nU(       d  Mw  UUS   4-   nM     U R5                  U5      nU(       a  X4-   nU(       d  [7        S XU4 5       5      $ [9        XUS9$ )a  
Args:
    input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
        Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
        provide it.

        Indices can be obtained using [`BartTokenizer`]. See [`PreTrainedTokenizer.encode`] and
        [`PreTrainedTokenizer.__call__`] for details.

        [What are input IDs?](../glossary#input-ids)
    attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
        Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.

        [What are attention masks?](../glossary#attention-mask)
    head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*):
        Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:

        - 1 indicates the head is **not masked**,
        - 0 indicates the head is **masked**.

    inputs_embeds (`torch.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
        This is useful if you want more control over how to convert `input_ids` indices into associated vectors
        than the model's internal embedding lookup matrix.
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
        for more detail.
    return_dict (`bool`, *optional*):
        Whether or not to return a [`~utils.BaseModelOutput`] instead of a plain tuple.
NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr#   )rG   rF   )r#   r   r   r   r   z&The head_mask should be specified for z layers, but it is for r=   )r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr5  r   ).0vs     r-   	<genexpr>)LDMBertEncoder.forward.<locals>.<genexpr>  s     e$Sq$Ss   	)last_hidden_stater   
attentions)r(   r   r@  use_return_dictrO   r   r   r+  r_   arangelongrF   r   r,  r   r   r   r   r   rG   r'   r/  	enumerateis_grad_enabledr1  _gradient_checkpointing_funcr0  tupler   )r+   rQ   r   r=  r>  r?  r   r@  r9   input_shaper   	embed_posr   encoder_statesall_attentionsidxencoder_layerlayer_outputss                     r-   r   LDMBertEncoder.forwardP  s   ^ 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]  ]%>cdd"#..*K!r;r?;I&',,.s3KTUU  --i8Ma. <<uzz-J^J^_ffgnoL((6	%1--m||VZVcVc-d %).:M:MNN30d  ~~"s4;;'78 <S=M<N O!(+,A/ 
 #,DKK"8C#!/2B!B$$&&4+F+F $ A A!!"'0'<Y^$	! !.!"7@7LY^RV&7	! *!,M  !/=3C2E!E+ #9. 6+.>>Ne]N$Seee+Vd
 	
r/   )r   r,  r+  r1  r0  r/  r*  r  NNNNNNNN)r   r   r   r   r   r   r%   r7  r;  r_   
LongTensorr   r   r   r   r   r   r   r   r   r   s   @r-   r(  r(  .  s    } $!"
 '+1537,004,0/3&*{
##{
 !.{
 u//0	{

 ELL){
  -{
 $D>{
 'tn{
 d^{
 
uo%	&{
 {
r/   r(  c                   N   ^  \ rS rSr/ rS\4U 4S jjr        SS jrSrU =r	$ )LDMBertModeli  r(   c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  UR                  5      U l        g r5  )	r$   r%   r(  r  r   r   r   r   	to_logitsr   s     r-   r%   LDMBertModel.__init__  s:     #F+
6#5#5v7H7HIr/   c	                 2    U R                  UUUUUUUUS9n	U	$ )N)r   r=  r>  r?  r   r@  r9   )r  )
r+   rQ   r   r=  r>  r?  r   r@  r9   r  s
             r-   r   LDMBertModel.forward  s6     **)%'/!5#  	
 r/   )r  r]  rX  )
r   r   r   r   _no_split_modulesr   r%   r   r   r   r   s   @r-   r[  r[    s;    J} J ! r/   r[  r5  ):rX   typingr   r   r   r   r_   torch.nnr   torch.utils.checkpointtransformersr   r   r	   transformers.activationsr
   transformers.modeling_outputsr   transformers.utilsr   modelsr   r   r   r   
schedulersr   r   r   utilsr   utils.torch_utilsr   pipeline_utilsr   r   torch_xla.core.xla_modelcore	xla_modelre   rd   r   
get_loggerr   logger%LDMBERT_PRETRAINED_MODEL_ARCHIVE_LIST%LDMBERT_PRETRAINED_CONFIG_ARCHIVE_MAPr   r   rG   r   r   Moduler   r   r  r(  r[  r   r/   r-   <module>rv     s*    / /    O O + 9 & O O L L + - C ))MM|1. |1D  
		H	% ) % P) %
 #,>$ ,>^[u|| [EKK [(3- [MBryy MB`B")) BL_ 8]
+ ]
@) r/   