
    +hA                        S SK JrJrJr  S SKrS SKJrJrJrJ	r	J
r
  SSKJr  SSKJr  SSKJrJr  SSKJrJr  SS	KJrJrJr  SS
KJr  SSKJrJrJrJr  SSK J!r!J"r"  SSK#J$r$J%r%J&r&J'r'  SSK J(r(  \RR                  " \*5      r+ SS\RX                  S\\RZ                     S\.4S jjr/ " S S\!5      r0 " S S\!5      r1 " S S\!5      r2 " S S\!5      r3g)    )ListOptionalTupleN)CLIPImageProcessorCLIPTextModelCLIPTextModelWithProjectionCLIPTokenizerCLIPVisionModelWithProjection   )
FrozenDict)ClassifierFreeGuidance)PipelineImageInputVaeImageProcessor) StableDiffusionXLLoraLoaderMixinTextualInversionLoaderMixin)AutoencoderKLImageProjectionUNet2DConditionModel)adjust_lora_scale_text_encoder)USE_PEFT_BACKENDloggingscale_lora_layersunscale_lora_layers   )ModularPipelineBlocksPipelineState)ComponentSpec
ConfigSpec
InputParamOutputParam   ) StableDiffusionXLModularPipelineencoder_output	generatorsample_modec                    [        U S5      (       a!  US:X  a  U R                  R                  U5      $ [        U S5      (       a   US:X  a  U R                  R                  5       $ [        U S5      (       a  U R                  $ [        S5      e)Nlatent_distsampleargmaxlatentsz3Could not access latents of provided encoder_output)hasattrr'   r(   moder*   AttributeError)r#   r$   r%   s      r/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/modular_pipelines/stable_diffusion_xl/encoders.pyretrieve_latentsr/   /   s}     ~}--+2I))00;;		/	/K84K))..00		+	+%%%RSS    c                       \ rS rSrSr\S\4S j5       r\S\\	   4S j5       r
\S\\   4S j5       r\S\\   4S j5       r\SS	 j5       rS
 r\R&                  " 5       S\S\S\4S j5       rSrg)StableDiffusionXLIPAdapterStep<   stable-diffusion-xlreturnc                      g)Na  IP Adapter step that prepares ip adapter image embeddings.
Note that this step only prepares the embeddings - in order for it to work correctly, you need to load ip adapter weights into unet via ModularPipeline.load_ip_adapter() and pipeline.set_ip_adapter_scale().
See [ModularIPAdapterMixin](https://huggingface.co/docs/diffusers/api/loaders/ip_adapter#diffusers.loaders.ModularIPAdapterMixin) for more details selfs    r.   description*StableDiffusionXLIPAdapterStep.description?   s     	
r0   c                     [        S[        5      [        S[        [        SSS.5      SS9[        S[        5      [        S[
        [        S	S
05      SS9/$ )Nimage_encoderfeature_extractor   )size	crop_sizefrom_configconfigdefault_creation_methodunetguiderguidance_scale      @)r   r
   r   r   r   r   r8   s    r.   expected_components2StableDiffusionXLIPAdapterStep.expected_componentsI   sc     /+HI#"!3S"AB(5	 &"67&!#3S"9:(5	
 	
r0   c                 $    [        S[        SSS9/$ )Nip_adapter_imageTz%The image(s) to be used as ip adapter)requiredr:   )r   r   r8   s    r.   inputs%StableDiffusionXLIPAdapterStep.inputs\   s"     ""C	
 	
r0   c                 f    [        S[        R                  SS9[        S[        R                  SS9/$ )Nip_adapter_embedszIP adapter image embeddings	type_hintr:   negative_ip_adapter_embedsz$Negative IP adapter image embeddingsr    torchTensorr8   s    r.   intermediate_outputs3StableDiffusionXLIPAdapterStep.intermediate_outputsg   s5     +u||Qno,,,B
 	
r0   Nc                 d   [        U R                  R                  5       5      R                  n[	        U[
        R                  5      (       d  U R                  USS9R                  nUR                  X%S9nU(       aq  U R                  USS9R                  S   nUR                  USS9nU R                  [
        R                  " U5      SS9R                  S   nUR                  USS9nXg4$ U R                  U5      R                  nUR                  USS9n[
        R                  " U5      n	X4$ )	Npt)return_tensorsdevicedtypeToutput_hidden_statesr   dim)nextr=   
parametersr`   
isinstancerW   rX   r>   pixel_valuestohidden_statesrepeat_interleave
zeros_likeimage_embeds)

componentsimager_   num_images_per_promptrb   r`   image_enc_hidden_statesuncond_image_enc_hidden_statesrn   uncond_image_embedss
             r.   encode_image+StableDiffusionXLIPAdapterStep.encode_imager   sA    Z--88:;AA%..00t0LYYE4&0&>&>u[_&>&`&n&noq&r#&=&O&OPekl&O&m#-7-E-E  'd .F .mB. * .L-]-]%1 .^ .* +JJ%33E:GGL'99:OUV9WL"'"2"2<"@44r0   c                    / nU(       a  / nUGc'  [        U[        5      (       d  U/n[        U5      [        UR                  R                  R
                  5      :w  aB  [        S[        U5       S[        UR                  R                  R
                  5       S35      e[        X!R                  R                  R
                  5       Hi  u  p[        U
[        5      (       + nU R                  XUSU5      u  pUR                  US S S 24   5        U(       d  MQ  WR                  US S S 24   5        Mk     OEU H?  nU(       a$  UR                  S5      u  pWR                  U5        UR                  U5        MA     / n[        U5       Hw  u  p[        R                  " U/U-  SS9nU(       a2  [        R                  " WU   /U-  SS9n[        R                  " X/SS9nUR                  US9nUR                  U5        My     U$ )	NzK`ip_adapter_image` must have same length as the number of IP Adapters. Got z images and z IP Adapters.r!   r   r   rd   )r_   )rh   listlenrF   encoder_hid_projimage_projection_layers
ValueErrorzipr   ru   appendchunk	enumeraterW   catrj   )r9   ro   rM   ip_adapter_image_embedsr_   rq   prepare_unconditional_embedsrn   negative_image_embedssingle_ip_adapter_imageimage_proj_layeroutput_hidden_statesingle_image_embedssingle_negative_image_embedsis                  r.   prepare_ip_adapter_image_embeds>StableDiffusionXLIPAdapterStep.prepare_ip_adapter_image_embeds   s;    '$&!"*.55$4#5 #$JOO,L,L,d,d(ee abefvbwax  yE  FI  JT  JY  JY  Jj  Jj  JB  JB  FC  ED  DQ  R  >A //"B"B"Z"Z>9' +55E*W&W#DHDUDUDWEA# ##$7a$@A//)001MdTUg1VW> (?#/H[HaHabcHdE0)001MN##$78	 (? #%&/&="A"'))-@,ADY,Y_`"a+/4yy:OPQ:R9SVk9kqr/s,&+ii1M0cij&k#"5"8"8"8"G#**+>? '> '&r0   ro   statec           	         U R                  U5      nUR                  R                  S:  Ul        UR                  Ul        U R                  UUR                  S UR
                  SUR                  S9Ul        UR                  (       aa  / Ul	        [        UR                  5       HA  u  pEUR                  S5      u  peUR                  R                  U5        XSR                  U'   MC     U R                  X#5        X4$ )Nr!   )rM   r   r_   rq   r   r   )get_block_staterG   num_conditionsr   _execution_devicer_   r   rM   rR   rU   r   r   r~   set_block_state)r9   ro   r   block_stater   rn   r   s          r.   __call__'StableDiffusionXLIPAdapterStep.__call__   s    **513=3D3D3S3SVW3W0'99(,(L(L(99$(%%"#)4)Q)Q )M )
% 3357K2#,[-J-J#K6B6H6H6K3%66==>ST3?--a0 $L
 	U0  r0   r7   )N)__name__
__module____qualname____firstlineno__
model_namepropertystrr:   r   r   rJ   r   rO   r    rY   staticmethodru   r   rW   no_gradr"   r   r   __static_attributes__r7   r0   r.   r2   r2   <   s    &J
S 
 
 
T-%8 
 
$ 
Z( 
 
 
d;&7 
 
 5 521'f ]]_!#C !M !^k ! !r0   r2   c                      \ rS rSrSr\S\4S j5       r\S\\	   4S j5       r
\S\\   4S j5       r\S\\   4S j5       r\S\\   4S j5       r\S	 5       r\            SS\S\\   S\\R*                     S\S\S\\   S\\   S\\R0                     S\\R0                     S\\R0                     S\\R0                     S\\   S\\   4S jj5       r\R6                  " 5       S\S\S\4S j5       rSrg
) StableDiffusionXLTextEncoderStep   r4   r5   c                     g)NzMText Encoder step that generate text_embeddings to guide the image generationr7   r8   s    r.   r:   ,StableDiffusionXLTextEncoderStep.description   s    ^r0   c                     [        S[        5      [        S[        5      [        S[        5      [        S[        5      [        S[        [        SS05      SS	9/$ )
Ntext_encodertext_encoder_2	tokenizertokenizer_2rG   rH   rI   rB   rC   )r   r   r   r	   r   r   r8   s    r.   rJ   4StableDiffusionXLTextEncoderStep.expected_components   sW     .-8*,GH+}5-7&!#3S"9:(5	
 	
r0   c                     [        SS5      /$ )Nforce_zeros_for_empty_promptT)r   r8   s    r.   expected_configs1StableDiffusionXLTextEncoderStep.expected_configs   s    94@AAr0   c                 ~    [        S5      [        S5      [        S5      [        S5      [        S5      [        S5      /$ )Npromptprompt_2negative_promptnegative_prompt_2cross_attention_kwargs	clip_skip)r   r8   s    r.   rO   'StableDiffusionXLTextEncoderStep.inputs   sA     x z"()*+/0{#
 	
r0   c           
          [        S[        R                  SSS9[        S[        R                  SSS9[        S[        R                  SSS9[        S	[        R                  SS
S9/$ )Nprompt_embedsguider_input_fieldsz2text embeddings used to guide the image generation)rT   kwargs_typer:   negative_prompt_embedsz;negative text embeddings used to guide the image generationpooled_prompt_embedsz9pooled text embeddings used to guide the image generationnegative_pooled_prompt_embedszBnegative pooled text embeddings used to guide the image generationrV   r8   s    r.   rY   5StableDiffusionXLTextEncoderStep.intermediate_outputs   st     ,,1P	 (,,1Y	 &,,1W	 /,,1`	'
 	
r0   c                    U R                   b_  [        U R                   [        5      (       d@  [        U R                   [        5      (       d!  [	        S[        U R                   5       35      eU R                  ba  [        U R                  [        5      (       dA  [        U R                  [        5      (       d!  [	        S[        U R                  5       35      eg g g )Nz2`prompt` has to be of type `str` or `list` but is z4`prompt_2` has to be of type `str` or `list` but is )r   rh   r   rx   r|   typer   )r   s    r.   check_inputs-StableDiffusionXLTextEncoderStep.check_inputs  s    );--s33J{GYGY[_<`<`QRVWbWiWiRjQklmm!!-;//55jI]I]_c>d>dSTXYdYmYmTnSopqq ?e5 .r0   Nr   r   r_   rq   r   r   r   r   r   r   r   
lora_scaler   c                    U=(       d    U R                   nUb  [        U [        5      (       a  Xl        U R                  b8  [
        (       d  [        U R                  U5        O[        U R                  U5        U R                  b8  [
        (       d  [        U R                  U5        O[        U R                  U5        [        U[        5      (       a  U/OUnUb  [        U5      nOUR                  S   nU R                  b  U R                  U R                  /OU R                  /nU R                  b  U R                  U R                  /OU R                  /nUGc  U=(       d    Un[        U[        5      (       a  U/OUn/ nX/n[        UUU5       GH;  u  nnn[        U [        5      (       a  U R!                  UU5      nU" USUR"                  SSS9nUR$                  nU" USSS9R$                  nUR                  S	   UR                  S	   :  ah  [&        R(                  " UU5      (       dL  UR+                  USS2UR"                  S
-
  S	24   5      n[,        R/                  SUR"                   SU 35        U" UR1                  U5      SS9nUS   n
Uc  UR2                  S   nOUR2                  US-   *    nUR5                  U5        GM>     [&        R6                  " US	S9nUSL =(       a    U R8                  R:                  nU(       a8  U	c5  U(       a.  [&        R<                  " U5      n	[&        R<                  " U
5      nGOU(       Gaw  U	Gcs  U=(       d    SnU=(       d    Un[        U[        5      (       a  X/-  OUn[        U[        5      (       a  X/-  OUnUb;  [?        U5      [?        U5      La$  [A        S[?        U5       S[?        U5       S35      eU[        U5      :w  a!  [C        SU S[        U5       SU SU S3	5      eXg/n/ n[        UUU5       H  u  nnn[        U [        5      (       a  U R!                  UU5      nUR                  S
   nU" USUSSS9nU" UR$                  R1                  U5      SS9n	U	S   nU	R2                  S   n	UR5                  U	5        M     [&        R6                  " US	S9n	U R                  b%  UR1                  U R                  RD                  US9nO$UR1                  U RF                  RD                  US9nUR                  u  nnn URI                  S
US
5      nURK                  UU-  US	5      nU(       a  U	R                  S
   nU R                  b%  U	R1                  U R                  RD                  US9n	O$U	R1                  U RF                  RD                  US9n	U	RI                  S
US
5      n	U	RK                  X-  US	5      n	U
RI                  S
U5      RK                  UU-  S	5      n
U(       a%  URI                  S
U5      RK                  UU-  S	5      nU R                  b6  [        U [        5      (       a!  [
        (       a  [M        U R                  U5        U R                  b6  [        U [        5      (       a!  [
        (       a  [M        U R                  U5        XX4$ )a%
  
Encodes the prompt into text encoder hidden states.

Args:
    prompt (`str` or `List[str]`, *optional*):
        prompt to be encoded
    prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
        used in both text-encoders
    device: (`torch.device`):
        torch device
    num_images_per_prompt (`int`):
        number of images that should be generated per prompt
    prepare_unconditional_embeds (`bool`):
        whether to use prepare unconditional embeddings or not
    negative_prompt (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation. If not defined, one has to pass
        `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
        less than `1`).
    negative_prompt_2 (`str` or `List[str]`, *optional*):
        The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
        `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
    prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
        provided, text embeddings will be generated from `prompt` input argument.
    negative_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
        argument.
    pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
        If not provided, pooled text embeddings will be generated from `prompt` input argument.
    negative_pooled_prompt_embeds (`torch.Tensor`, *optional*):
        Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
        weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
        input argument.
    lora_scale (`float`, *optional*):
        A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
    clip_skip (`int`, *optional*):
        Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
        the output of the pre-final layer will be used for computing the prompt embeddings.
Nr   
max_lengthTr\   )paddingr   
truncationr]   longest)r   r]   r!   z\The following part of your input was truncated because CLIP can only handle sequences up to z	 tokens: ra   rc   r   rd    z?`negative_prompt` should be the same type to `prompt`, but got z != .z`negative_prompt`: z has batch size z, but `prompt`: zT. Please make sure that passed `negative_prompt` matches the batch size of `prompt`.)r`   r_   )'r   rh   r   _lora_scaler   r   r   r   r   r   ry   shaper   r   r}   r   maybe_convert_promptmodel_max_length	input_idsrW   equalbatch_decodeloggerwarningrj   rk   r~   concatrD   r   rm   r   	TypeErrorr|   r`   rF   repeatviewr   )!ro   r   r   r_   rq   r   r   r   r   r   r   r   r   r   
batch_size
tokenizerstext_encodersprompt_embeds_listpromptsr   r   text_inputstext_input_idsuntruncated_idsremoved_textzero_out_negative_promptuncond_tokensnegative_prompt_embeds_listr   uncond_inputbs_embedseq_len_s!                                    r.   encode_prompt.StableDiffusionXLTextEncoderStep.encode_prompt'  s    v 7:77 !j=]&^&^%/" &&2''2:3J3JJW%j&=&=zJ((4''2:3L3LjY%j&?&?L'44&&VJ&,,Q/J
 ##/ !!:#9#9:(() 	 &&2 $$j&?&?@++, 	  )6H%/#%>%>zHH "$(G36w
M3Z/	<j*EFF'<<VYOF'((99##' "-!6!6"+FIVZ"["e"e"((,0D0DR0HHQVQ\Q\"OR R $-#9#9/!YMgMgjkMknpMpJp:q#rLNN%667yP
 !-^->->v-F]a b (5Q'7$$$1$?$?$CM %2$?$?)a-@P$QM"))-8E 4[H "LL);DM $3d#:#mz?P?P?m?m ',B,JOg%*%5%5m%D",1,<,<=Q,R)).D.L-3O 1 D_ AK?\_@`@`j+<<fuO4>?PRU4V4V
00\m 
 !d6l$:O&OUVZ[jVkUl mV~Q(  s?33 )/)::J3K_J` ax/
| <33  "1 D*,'<?z[h<i8Lj*EFF&0&E&EoW`&aO*003
(#()##'  *6 **--f5)-*&
 1Gq0I-)?)M)Mb)Q&+223IJ+ =j. &+\\2MSU%V"$$0),,:3L3L3R3R[a,bM),,:??3H3HQW,XM,22'1%,,Q0EqI%**86K+KWVXY',2215G((4)?)B)B$3399& *C *& *@)B)BI^I^gm)B)n&%;%B%B1F[]^%_"%;%@%@Acelnp%q"3::1>STYY,,b 
 (,I,P,PQRTi,j,o,o00"-) "".*&FGGL\L\#J$;$;ZH$$0*&FGGL\L\#J$=$=zJ6Jiir0   ro   r   c                 6   U R                  U5      nU R                  U5        UR                  R                  S:  Ul        UR
                  Ul        UR                  b  UR                  R                  SS 5      OS Ul	        U R                  UUR                  UR                  UR                  SUR                  UR                  UR                  S S S S UR                  UR                  S9u  Ul        Ul        Ul        Ul        U R)                  X#5        X4$ )Nr!   scale)r   r   r   r   r   r   )r   r   rG   r   r   r   r_   r   gettext_encoder_lora_scaler   r   r   r   r   r   r   r   r   r   r   )r9   ro   r   r   s       r.   r   )StableDiffusionXLTextEncoderStep.__call__  s     **51+&3=3D3D3S3SVW3W0'99
 11= ..227DA 	+   44''))#'!%*."::!++  
	
%.,5$ 	U0  r0   r7   )NNr!   TNNNNNNNN) r   r   r   r   r   r   r   r:   r   r   rJ   r   r   r   rO   r    rY   r   r   r   rW   r_   intboolrX   floatr   r   r"   r   r   r   r7   r0   r.   r   r      s   &J_S _ _ 
T-%8 
 
 B$z"2 B B 
Z( 
 
 
d;&7 
 
8 r r  #')-%&-1)-+/049=7;@D&*#'pjpj 3-pj &	pj
  #pj '+pj "#pj $C=pj  -pj !) 6pj 'u||4pj (0'=pj UOpj C=pj pjd ]]_%!#C %!M %!^k %! %!r0   r   c                      \ rS rSrSr\S\4S j5       r\S\\	   4S j5       r
\S\\   4S j5       r\S\\   4S j5       rS\R                   S	\R"                  4S
 jr\R&                  " 5       S\S\S\4S j5       rSrg)StableDiffusionXLVaeEncoderStepiC  r4   r5   c                     g)NzIVae Encoder step that encode the input image into a latent representationr7   r8   s    r.   r:   +StableDiffusionXLVaeEncoderStep.descriptionF  s    Zr0   c           	      X    [        S[        5      [        S[        [        SS05      SS9/$ )Nvaeimage_processorvae_scale_factor   rB   rC   r   r   r   r   r8   s    r.   rJ   3StableDiffusionXLVaeEncoderStep.expected_componentsJ  s7     %/!!!#5q"9:(5	
 	
r0   c                     [        SSS9[        S5      [        S5      [        S5      [        S[        R                  SS	9[        S
[        [           SS	9/$ )Nrp   TrN   heightwidthr$   r`   z Data type of model tensor inputsrS   preprocess_kwargszA kwargs dictionary that if specified is passed along to the `ImageProcessor` as defined under `self.image_processor` in [diffusers.image_processor.VaeImageProcessor])r   rW   r`   r   dictr8   s    r.   rO   &StableDiffusionXLVaeEncoderStep.inputsV  sY     w.x w{#w%++Cef#"4. E
 	
r0   c                 6    [        S[        R                  SS9/$ )Nimage_latentszUThe latents representing the reference image for image-to-image/inpainting generationrS   rV   r8   s    r.   rY   4StableDiffusionXLVaeEncoderStep.intermediate_outputse  s#     ,,s
 	
r0   rp   r$   c                    S =pE[        UR                  R                  S5      (       ag  UR                  R                  R                  bF  [        R
                  " UR                  R                  R                  5      R                  SSSS5      n[        UR                  R                  S5      (       ag  UR                  R                  R                  bF  [        R
                  " UR                  R                  R                  5      R                  SSSS5      nUR                  nUR                  R                  R                  (       a7  UR                  5       nUR                  R                  [        R                  S9  [        U[        5      (       af  [        UR                   S   5       Vs/ s H-  n[#        UR                  R%                  X'US-    5      X7   S9PM/     nn[        R&                  " USS9nO#[#        UR                  R%                  U5      US9nUR                  R                  R                  (       a  UR                  R                  U5        UR                  U5      nUba  Ub^  UR                  UR(                  US	9nUR                  UR(                  US	9nX-
  UR                  R                  R*                  -  U-  nU$ UR                  R                  R*                  U-  nU$ s  snf 
Nlatents_meanr!      latents_stdr`   r   r$   rd   r^   r+   r   rD   r
  rW   tensorr   r  r`   force_upcastr   rj   float32rh   rx   ranger   r/   encoder   r_   scaling_factor	r9   ro   rp   r$   r
  r  r`   r   r  s	            r.   _encode_vae_image1StableDiffusionXLVaeEncoderStep._encode_vae_imageq  sV   %)):>>((.99jnn>S>S>`>`>l <<
(=(=(J(JKPPQRTUWXZ[\L:>>((-88Z^^=R=R=^=^=j,,z~~'<'<'H'HINNqRSUVXYZK>>  --KKMENNEMM2i&& u{{1~..A !!6!6uQ7G!HT]T`a.   "IIm;M,Z^^-B-B5-IU^_M>>  --NNe$%((/#(?'??-2F2Fe?TL%..0D0DE.RK*9Z^^=R=R=a=aadooM  'NN11@@=PM'   34K<ro   r   c                 *   U R                  U5      nUR                  =(       d    0 Ul        UR                  Ul        UR                  b  UR                  OUR
                  R                  Ul        UR                  R                  " UR                  4UR                  UR                  S.UR                  D6nUR                  UR                  UR                  S9nUR                  S   Ul        [        UR                  [         5      (       aR  [#        UR                  5      UR                  :w  a/  [%        S[#        UR                  5       SUR                   S35      eU R'                  XUR                  S9Ul        U R+                  X#5        X4$ )N)r   r  r^   r   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.rp   r$   )r   r  r   r_   r`   r   r   
preprocessrp   r   r  rj   r   r   rh   r$   rx   ry   r|   r  r  r   )r9   ro   r   r   rp   s        r.   r   (StableDiffusionXLVaeEncoderStep.__call__  so   **51(3(E(E(K%'991<1B1B1NK--T^TbTbThTh**55
&1&8&8@Q@Q
U`UrUr
  2 2+:K:KL!&Q k++T22s;;P;P7QU`UkUk7kA#kF[F[B\A] ^'2233su 
 %)$:$::^i^s^s$:$t!U0  r0   r7   N)r   r   r   r   r   r   r   r:   r   r   rJ   r   rO   r    rY   rW   rX   	Generatorr  r   r"   r   r   r   r7   r0   r.   r   r   C  s    &J[S [ [ 	
T-%8 	
 	
 
Z( 
 
 
d;&7 
 
 5<<  EOO  D ]]_!#C !M !^k ! !r0   r   c                      \ rS rSrSr\S\\   4S j5       r\S\	4S j5       r
\S\\   4S j5       r\S\\   4S j5       rS\R                   S	\R"                  4S
 jrS r\R(                  " 5       S\S\S\4S j5       rSrg)&StableDiffusionXLInpaintVaeEncoderStepi  r4   r5   c                     [        S[        5      [        S[        [        SS05      SS9[        S[        [        SSS	S	S
.5      SS9/$ )Nr   r   r   r   rB   rC   mask_processorFT)do_normalizer   do_binarizedo_convert_grayscaler   r8   s    r.   rJ   :StableDiffusionXLInpaintVaeEncoderStep.expected_components  sb     %/!!!#5q"9:(5	  !!%*RVptu )6
 	
r0   c                     g)NzLVae encoder step that prepares the image and mask for the inpainting processr7   r8   s    r.   r:   2StableDiffusionXLInpaintVaeEncoderStep.description  s    ]r0   c                     [        S5      [        S5      [        SSS9[        SSS9[        S5      [        S[        R                  S	S
9[        S5      /$ )Nr   r  rp   Tr   
mask_imagepadding_mask_cropr`   zThe dtype of the model inputsrS   r$   )r   rW   r`   r8   s    r.   rO   -StableDiffusionXLInpaintVaeEncoderStep.inputs  sT     x ww.|d3*+w%++Cbc{#
 	
r0   c           
          [        S[        R                  SS9[        S[        R                  SS9[        S[        R                  SS9[        S[        [        [
        [
        4      S	S9/$ )
Nr  z-The latents representation of the input imagerS   maskz*The mask to use for the inpainting processmasked_image_latentsz^The masked image latents to use for the inpainting process (only for inpainting-specifid unet)crops_coordszPThe crop coordinates to use for the preprocess/postprocess of the image and mask)r    rW   rX   r   r   r   r8   s    r.   rY   ;StableDiffusionXLInpaintVaeEncoderStep.intermediate_outputs  sj     5<<Et %,,Dpq&,,|
 "5c?3n
 	
r0   rp   r$   c                    S =pE[        UR                  R                  S5      (       ag  UR                  R                  R                  bF  [        R
                  " UR                  R                  R                  5      R                  SSSS5      n[        UR                  R                  S5      (       ag  UR                  R                  R                  bF  [        R
                  " UR                  R                  R                  5      R                  SSSS5      nUR                  nUR                  R                  R                  (       a7  UR                  5       nUR                  R                  [        R                  S9  [        U[        5      (       af  [        UR                   S   5       Vs/ s H-  n[#        UR                  R%                  X'US-    5      X7   S9PM/     nn[        R&                  " USS9nO#[#        UR                  R%                  U5      US9nUR                  R                  R                  (       a  UR                  R                  U5        UR                  U5      nUba  Ub^  UR                  UR(                  US	9nUR                  UR(                  US	9nX-
  U R                  R                  R*                  -  U-  nU$ UR                  R                  R*                  U-  nU$ s  snf r	  r  r  s	            r.   r  8StableDiffusionXLInpaintVaeEncoderStep._encode_vae_image  sT   %)):>>((.99jnn>S>S>`>`>l <<
(=(=(J(JKPPQRTUWXZ[\L:>>((-88Z^^=R=R=^=^=j,,z~~'<'<'H'HINNqRSUVXYZK>>  --KKMENNEMM2i&& u{{1~..A !!6!6uQ7G!HT]T`a.   "IIm;M,Z^^-B-B5-IU^_M>>  --NNe$%((/#(?'??-2F2Fe?TL%..0D0DE.RK*9TXX__=[=[[^iiM  'NN11@@=PM'r  c
                    [         R                  R                  R                  X%UR                  -  XaR                  -  4S9nUR                  XS9nUR                  S   U:  aW  XBR                  S   -  S:X  d  [        SU SUR                  S    S35      eUR                  XBR                  S   -  SSS5      nUb  UR                  S   S:X  a  Un
OS n
Ub  U
c  UR                  XS9nU R                  XU	S	9n
U
R                  S   U:  aW  XJR                  S   -  S:X  d  [        S
U SU
R                  S    S35      eU
R                  XJR                  S   -  SSS5      n
U
R                  XS9n
X*4$ )N)r@   r^   r   zvThe passed mask and the required batch size don't match. Masks are supposed to be duplicated to a total batch size of z, but zo masks were passed. Make sure the number of masks that you pass is divisible by the total requested batch size.r!   r  r  zyThe passed images and the required batch size don't match. Images are supposed to be duplicated to a total batch size of zq images were passed. Make sure the number of images that you pass is divisible by the total requested batch size.)
rW   nn
functionalinterpolater   rj   r   r|   r   r  )r9   ro   r.  masked_imager   r   r  r`   r_   r$   r/  s              r.   prepare_mask_latents;StableDiffusionXLInpaintVaeEncoderStep.prepare_mask_latents  s    xx""..*"="==uHcHc?cd / 
 wwfw2 ::a=:%

1-2 ..8\

1 O^^ 
 ;;zZZ]:Aq!DD#(:(:1(=(B#/ #' ##++fJ'+'='=jbk'='l$#))!,z9!$>$>q$AAQF$55?LG[GaGabcGdFe fxx 
 (<'B'B"<"<Q"??Aq($
 $8#:#:&#:#V ))r0   ro   r   c                 :   U R                  U5      nUR                  b  UR                  OUR                  R                  Ul        UR                  Ul        UR
                  c  UR                  Ul        UR                  c  UR                  Ul        UR                  bQ  UR                  R                  UR                  UR                  UR
                  UR                  S9Ul        SUl        OS Ul        SUl        UR                  R!                  UR"                  UR
                  UR                  UR                  UR                  S9nUR%                  [&        R(                  S9nUR                  R!                  UR                  UR
                  UR                  UR                  UR                  S9nXES:  -  Ul        UR,                  S   Ul        UR%                  UR                  UR                  S	9nU R1                  XUR2                  S
9Ul        U R7                  UUUR*                  UR.                  UR
                  UR                  UR                  UR                  UR2                  5	      u  Ul        Ul        U R=                  X#5        X4$ )N)padfilldefault)r   r  r0  resize_moder  )r   r  r?  r0  g      ?r   r^   r  )r   r`   r   r   r_   r   default_heightr  default_widthr+  r"  get_crop_regionr*  r0  r?  r   r  rp   rj   rW   r  r8  r   r   r  r$   r  r9  r.  r/  r   )r9   ro   r   r   rp   r.  s         r.   r   /StableDiffusionXLInpaintVaeEncoderStep.__call__=  sc   **511<1B1B1NK--T^TbTbThTh'99%!+!:!:K$ * 8 8K((4'1'@'@'P'P&&(9(9;;M;MS^SpSp (Q (K$ '-K#'+K$&/K#**55%%##$11#// 6 
 u}}-((33""%%###//$11 4 
 $)3J#7 !&Q 2 2+:K:KL$($:$::^i^s^s$:$t! >B=V=V$$""!!
>
:+: 	U0  r0   r7   N)r   r   r   r   r   r   r   r   rJ   r   r:   r   rO   r    rY   rW   rX   r  r  r9  r   r"   r   r   r   r7   r0   r.   r   r     s    &J
T-%8 
 
& ^S ^ ^ 	
Z( 	
 	
 
d;&7 
 
( 5<<  EOO  H-*^ ]]_9!#C 9!M 9!^k 9! 9!r0   r   )Nr(   )4typingr   r   r   rW   transformersr   r   r   r	   r
   configuration_utilsr   guidersr   r   r   r   loadersr   r   modelsr   r   r   models.lorar   utilsr   r   r   r   modular_pipeliner   r   modular_pipeline_utilsr   r   r   r    r"   
get_loggerr   r   rX   r  r   r/   r2   r   r   r   r7   r0   r.   <module>rO     s    ) (   . - D T J J 9  D W W > 
		H	%
 ck
TLL
T-5eoo-F
T\_
T[!%: [!|f!'< f!Rh!&; h!VI!-B I!r0   