
    cCi                        S r SSKrSSKrSSKJrJr  SSKrSSKJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJr  SSKJr  SSKJrJ r   SSK!J"r"J#r#J$r$J%r%J&r&  SSK'J(r(  SSK)J*r*  \#" 5       (       a  SSK+J,r,  SSK-J.r.  \&R^                  " \05      r1Sr2 SSK3J4r4  Sr2\1Rk                  S5         " S S\Rr                  5      r:\2(       d  \4r: " S S\Rr                  5      r; " S S \Rr                  5      r< " S! S"\Rr                  5      r= " S# S$\Rr                  5      r> " S% S&\Rr                  5      r? " S' S(\Rr                  5      r@ " S) S*\5      rA\" " S+ S,\5      5       rB " S- S.\B5      rC " S/ S0\Rr                  5      rD\"" S1S29 " S3 S4\B\5      5       rES4S,/rFg! \6 a     N\7 a    \1Rq                  S5         GNf = f)5zPyTorch Pop2Piano model.    N)OptionalUnion)nn)CrossEntropyLoss)GenerationConfig   )ACT2FN)CacheDynamicCacheEncoderDecoderCache)GenerationMixin)AttentionMaskConverter)GradientCheckpointingLayer)BaseModelOutput)BaseModelOutputWithPastAndCrossAttentionsSeq2SeqLMOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringis_torch_flex_attn_availableis_torch_fx_proxyis_torchdynamo_compilinglogging)deprecate_kwarg   )Pop2PianoConfig)	BlockMask)make_flex_block_causal_maskT)FusedRMSNormFzVDiscovered apex.normalization.FusedRMSNorm - will use it instead of Pop2PianoLayerNormzIDiscovered apex but it failed to load, falling back to Pop2PianoLayerNormc                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Pop2PianoLayerNormA   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)zZ
Construct a layernorm module in the Pop2Piano style. No bias and no subtraction of mean.
N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      j/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/pop2piano/modeling_pop2piano.pyr&   Pop2PianoLayerNorm.__init__B   s/     	ll5::k#:; #    c                    UR                  [        R                  5      R                  S5      R	                  SSS9nU[        R
                  " X R                  -   5      -  nU R                  R                  [        R                  [        R                  4;   a%  UR                  U R                  R                  5      nU R                  U-  $ )N   T)keepdim)tor(   float32powmeanrsqrtr+   r*   dtypefloat16bfloat16)r,   hidden_statesvariances      r0   forwardPop2PianoLayerNorm.forwardJ   s     !##EMM266q9>>r4>P%H?T?T4T(UU ;; ??),,T[[->->?M{{]**r2   )r+   r*   )gư>)__name__
__module____qualname____firstlineno__r&   rA   __static_attributes____classcell__r/   s   @r0   r"   r"   A   s    $+ +r2   r"   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoDenseActDense_   configc                 X  > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l
        [        UR                     U l        g NFbias)r%   r&   r   Lineard_modeld_ffwiwoDropoutdropout_ratedropoutr	   dense_act_fnactr,   rM   r/   s     r0   r&   Pop2PianoDenseActDense.__init__`   sn    ))FNNFKKeD))FKKeDzz&"5"56&--.r2   c                    U R                  U5      nU R                  U5      nU R                  U5      n[        U R                  R
                  [        R                  5      (       a  UR                  U R                  R
                  R                  :w  aa  U R                  R
                  R                  [        R                  :w  a/  UR                  U R                  R
                  R                  5      nU R	                  U5      nU$ N)rU   r[   rY   
isinstancerV   r*   r(   Tensorr<   int8r7   )r,   r?   s     r0   rA   Pop2PianoDenseActDense.forwardg   s    ./]3tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r2   )r[   rY   rU   rV   	rC   rD   rE   rF   r   r&   rA   rG   rH   rI   s   @r0   rK   rK   _   s    / / r2   rK   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoDenseGatedActDensev   rM   c                   > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l        [        UR                     U l        g rO   )r%   r&   r   rR   rS   rT   wi_0wi_1rV   rW   rX   rY   r	   rZ   r[   r\   s     r0   r&   $Pop2PianoDenseGatedActDense.__init__w   s    IIfnnfkkF	IIfnnfkkF	))FKKeDzz&"5"56&--.r2   c                 8   U R                  U R                  U5      5      nU R                  U5      nX#-  nU R                  U5      n[	        U R
                  R                  [        R                  5      (       a  UR                  U R
                  R                  R                  :w  aa  U R
                  R                  R                  [        R                  :w  a/  UR                  U R
                  R                  R                  5      nU R                  U5      nU$ r_   )r[   ri   rj   rY   r`   rV   r*   r(   ra   r<   rb   r7   )r,   r?   hidden_geluhidden_linears       r0   rA   #Pop2PianoDenseGatedActDense.forward   s    hhtyy78		-0#3]3 tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r2   )r[   rY   ri   rj   rV   rd   rI   s   @r0   rf   rf   v   s    / / r2   rf   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoLayerFF   rM   c                   > [         TU ]  5         UR                  (       a  [        U5      U l        O[        U5      U l        [        UR                  UR                  S9U l	        [        R                  " UR                  5      U l        g )Nr.   )r%   r&   is_gated_actrf   DenseReluDenserK   r"   rS   layer_norm_epsilon
layer_normr   rW   rX   rY   r\   s     r0   r&   Pop2PianoLayerFF.__init__   s_    "=f"ED"8"@D,V^^AZAZ[zz&"5"56r2   c                 p    U R                  U5      nU R                  U5      nXR                  U5      -   nU$ r_   )rx   rv   rY   )r,   r?   forwarded_statess      r0   rA   Pop2PianoLayerFF.forward   s;    ??=9../?@%5E(FFr2   )rv   rY   rx   rd   rI   s   @r0   rq   rq      s    7 7 r2   rq   c                      ^  \ rS rSr  SS\S\\   4U 4S jjjrS r\	SS j5       r
SS jr\" SS	S
S9         SS j5       rSrU =r$ )Pop2PianoAttention   rM   	layer_idxc                   > [         TU ]  5         UR                  U l        X l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l
        UR                  U l        U R                  U R                  -  U l        X0l        Uc>  U R                  (       a-  [        R!                  SU R"                  R$                   S35        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        U R                  (       a0  [&        R2                  " U R                  U R                  5      U l        [7        5       U l        SU l        g )NzInstantiating a decoder z without passing `layer_idx` is not recommended and will to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` when creating this class.FrP   )r%   r&   
is_decoderhas_relative_attention_biasrelative_attention_num_bucketsrelative_attention_max_distancerS   d_kvkey_value_proj_dim	num_headsn_headsrX   rY   	inner_dimr   loggerwarning_oncer/   rC   r   rR   qkvo	Embeddingrelative_attention_biassetpruned_headsgradient_checkpointingr,   rM   r   r   r/   s       r0   r&   Pop2PianoAttention.__init__   so    	 +++F(.4.S.S+/5/U/U,~~"(++''**(?(??"*4>>+B+B*C D, , 4<<eD4<<eD4<<eD4>>4<<eD+++-<<8[8[]a]i]i+jD(E&+#r2   c                 
   [        U5      S:X  a  g [        XR                  U R                  U R                  5      u  p[        U R                  U5      U l        [        U R                  U5      U l        [        U R                  U5      U l        [        U R                  USS9U l	        U R                  [        U5      -
  U l        U R                  U R                  -  U l
        U R                  R                  U5      U l        g )Nr   r   dim)lenr   r   r   r   r   r   r   r   r   r   union)r,   headsindexs      r0   prune_headsPop2PianoAttention.prune_heads   s    u:?7<<!8!8$:K:K
 $DFFE2#DFFE2#DFFE2#DFFEq9||c%j0004<<? --33E:r2   c                 b   SnU(       aC  US-  nX@S:  R                  [        R                  5      U-  -  n[        R                  " U 5      n O,[        R                  " U [        R
                  " U 5      5      * n US-  nX:  nU[        R                  " U R                  5       U-  5      [        R                  " X5-  5      -  X%-
  -  R                  [        R                  5      -   n[        R                  " U[        R                  " XrS-
  5      5      nU[        R                  " X`U5      -  nU$ )aR  
Adapted from Mesh Tensorflow:
https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593

Translate relative position to a bucket number for relative attention. The relative position is defined as
memory_position - query_position, i.e. the distance in tokens from the attending position to the attended-to
position. If bidirectional=False, then positive relative positions are invalid. We use smaller buckets for
small absolute relative_position and larger buckets for larger absolute relative_positions. All relative
positions >=max_distance map to the same bucket. All relative positions <=-max_distance map to the same bucket.
This should allow for more graceful generalization to longer sequences than the model has been trained on

Args:
    relative_position: an int32 Tensor
    bidirectional: a boolean - whether the attention is bidirectional
    num_buckets: an integer
    max_distance: an integer

Returns:
    a Tensor with the same shape as relative_position, containing int32 values in the range [0, num_buckets)
r   r4   r   )r7   r(   longabsmin
zeros_likelogfloatmath	full_likewhere)relative_positionbidirectionalnum_bucketsmax_distancerelative_buckets	max_exactis_smallrelative_position_if_larges           r0   _relative_position_bucket,Pop2PianoAttention._relative_position_bucket   s   , AKQ!6 : :5:: F TT %		*; <!&+<e>N>NO`>a!b b  1$	$0 &/II'--/);<hh|/01&( "UZZ.	&"
 &+YY&8RbcTc(d&
" 	EKKE_``r2   c                    Uc   U R                   R                  R                  nUc,  [        R                  " U[        R
                  US9SS2S4   nOUSS2S4   R                  U5      n[        R                  " U[        R
                  US9SSS24   nXe-
  nU R                  UU R                  (       + U R                  U R                  S9nU R                  U5      n	U	R                  / SQ5      R                  S5      n	U	$ )z%Compute binned relative position biasN)r<   device)r   r   r   )r4   r   r   r   )r   r*   r   r(   aranger   r7   r   r   r   r   permute	unsqueeze)
r,   query_length
key_lengthr   cache_positioncontext_positionmemory_positionr   relative_position_bucketvaluess
             r0   compute_biasPop2PianoAttention.compute_bias  s    >1188??F!$||L

SYZ[\^b[bc-ag699&A,,zFSTXZ[T[\+>#'#A#A#.;;==	 $B $
  --.FG	*44Q7r2   past_key_valuepast_key_values4.58new_nameversionc                    UR                   SS u  pUSLnU R                  U5      nUR                  USU R                  U R                  5      R                  SS5      nSn[        U[        5      (       aF  UR                  R                  U R                  5      nU(       a  UR                  nOUR                  nOUnU(       a  UOUnU(       aQ  UbN  U(       aG  UR                  U R                     R                  nUR                  U R                     R                  nOU R!                  U5      nU R#                  U5      nUR                  USU R                  U R                  5      R                  SS5      nUR                  USU R                  U R                  5      R                  SS5      nUbc  U(       d  U
OSn
UR%                  UUU R                  SU
05      u  nnU(       a.  [        U[        5      (       a  SUR                  U R                  '   [&        R(                  " UUR                  SS5      5      nUc  UR                   S	   nUb  UOU
S   S-   nU R*                  (       db  [&        R,                  " SU R                  UU4UR.                  UR0                  S
9nU R2                  (       a  U R4                  (       a  SUl        O.U R9                  UUUR.                  U
S9nUSS2SS2U* S2SS24   nUb#  USS2SS2SS2SUR                   S	   24   nUU-   nU R:                  (       aS  [&        R<                  " UR                   S   5      nSU[?        U R:                  5      '   USS2URA                  5       4   nOUnUU-  n[B        RD                  RG                  URI                  5       SS9RK                  U5      n[B        RD                  RM                  UU RL                  U R4                  S9nUb  UU-  n[&        R(                  " UU5      nUR                  SS5      RO                  5       nUR                  USU RP                  5      nU RS                  U5      nUU4nU	(       a  UU4-   nU$ )zp
Self-attention (if key_value_states is None) or attention over source sentence (provided by key_value_states).
Nr4   r5   r   Fr   Tr   )r   r<   )r   r   r   r   )ptraining)*shaper   viewr   r   	transposer`   r   
is_updatedgetr   cross_attention_cacheself_attention_cachelayerskeysr   r   r   updater(   matmulr   zerosr   r<   r   r   requires_gradr   r   r)   listboolr   
functionalsoftmaxr   type_asrY   
contiguousr   r   )r,   r?   maskkey_value_statesposition_biasr   layer_head_maskr   	use_cacheoutput_attentionsr   
batch_size
seq_lengthis_cross_attentionquery_statesr   curr_past_key_valuecurrent_states
key_statesvalue_statesscoresr   real_seq_lengthcausal_maskposition_bias_maskedattn_weightsattn_outputoutputss                               r0   rA   Pop2PianoAttention.forward  s   & "/!4!4Ra!8
 .T9vvm,#((RtG^G^_iijkmno 
o':;;(3377GJ!&5&K&K#&5&J&J#"1-?)]/"=*,33DNNCHHJ.55dnnELLL/J66.1L#RtG^G^_iijkmnoJ',,ZT\\4KbKbcmmnoqrsL*7It+>+E+Ednn?OQ_>`,(
L &*_FY*Z*ZAEO..t~~> lJ,@,@A,FG #))"-J.:.FlN[]L^abLbO33 %j*=fmm[a[g[g! ..4==26M/ $ 1 1#ZVd !2 ! !.aZKL!.C D"1a,Bj.>.>r.B,B#BC - ;::m11!45D,-Dd''()#0DIIK#@ #0 && }},,V\\^,DLLVT}},,\T\\TXTaTa,b &'/9Lll<>!++Aq1<<>!&&z2t~~Fff[)./Gr2   )rS   rY   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   FN)T       )NN)	NNNNNNFFN)rC   rD   rE   rF   r   r   intr&   r   staticmethodr   r   r   rA   rG   rH   rI   s   @r0   r~   r~      s     %*#'	!,!, C=	!, !,F;  -  - ^( %0A6R m Smr2   r~   c                   h   ^  \ rS rSrS
S\\   4U 4S jjjr\" SSSS9       SS j5       rS	r	U =r
$ )Pop2PianoLayerSelfAttentioni  r   c                    > [         TU ]  5         [        XUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )Nr   r   rt   )r%   r&   r~   SelfAttentionr"   rS   rw   rx   r   rW   rX   rY   r   s       r0   r&   $Pop2PianoLayerSelfAttention.__init__  sQ    /W`
 -V^^AZAZ[zz&"5"56r2   r   r   r   r   c	                     U R                  U5      n	U R                  U	UUUUUUUS9n
XR                  U
S   5      -   nU4U
SS  -   nU$ )N)r   r   r   r   r   r   r   r   r   )rx   r   rY   )r,   r?   attention_maskr   r   r   r   r   r   normed_hidden_statesattention_outputr   s               r0   rA   #Pop2PianoLayerSelfAttention.forward  st      $}=-- '++/) . 	
 &5Ea5H(II "%5ab%99r2   )r   rY   rx   r   )NNNNFFNrC   rD   rE   rF   r   r   r&   r   rA   rG   rH   rI   s   @r0   r   r     sP    7XVY] 7 7 %0A6R  Sr2   r   c                   j   ^  \ rS rSrS
S\\   4U 4S jjjr\" SSSS9        SS j5       rS	r	U =r
$ )Pop2PianoLayerCrossAttentioni  r   c                    > [         TU ]  5         [        USUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )NFr   rt   )r%   r&   r~   EncDecAttentionr"   rS   rw   rx   r   rW   rX   rY   )r,   rM   r   r/   s      r0   r&   %Pop2PianoLayerCrossAttention.__init__  sO    1&V[gpq,V^^AZAZ[zz&"5"56r2   r   r   r   r   c                     U R                  U5      nU R                  UUUUUUUUU	U
S9
nXR                  US   5      -   nU4USS  -   nU$ )N)	r   r   r   r   r   r   r   r   r   r   r   )rx   r
  rY   )r,   r?   r   r  r   r   r   r   r   r   r   r  r  layer_outputr   s                  r0   rA   $Pop2PianoLayerCrossAttention.forward  sy      $}=// -'++%/) 0 
 %||4DQ4G'HH/$4QR$88r2   )r
  rY   rx   r_   )NNNNFNFNr  rI   s   @r0   r  r    sR    7(3- 7 7 %0A6R
  Sr2   r  c                   r   ^  \ rS rSrS
S\\   4U 4S jjjr\" SSSS9            SS j5       rS	r	U =r
$ )Pop2PianoBlocki  r   c                 l  > [         TU ]  5         UR                  U l        [        R                  " 5       U l        U R
                  R                  [        XUS95        U R                  (       a"  U R
                  R                  [        XS95        U R
                  R                  [        U5      5        g )Nr   )r   )
r%   r&   r   r   
ModuleListlayerappendr   r  rq   r   s       r0   r&   Pop2PianoBlock.__init__  s     ++]]_


'[d	

 ??JJ:6WX

*623r2   r   r   r   r   c                    U R                   S   " UUUUU	U
UUS9nUS   nUSS  nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nU R                  =(       a    US LnU(       a  U R                   S   " UUUUUU	US   S-   U
US9	nUS   nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nUUSS  -   nU R                   S   " U5      nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " UU* US9nU4nUU-   $ )Nr   )r  r   r   r   r   r   r   r   i  )r   maxr5   )r   r  r   r   r   r   r   r   )r  r<   r(   r=   r   isinfanyfinfor  clampr   )r,   r?   r  r   encoder_hidden_statesencoder_attention_maskencoder_decoder_position_biasr   cross_attn_layer_head_maskr   r   r   return_dictr   self_attention_outputsattention_outputsclamp_valuedo_cross_attentioncross_attention_outputsr   s                       r0   rA   Pop2PianoBlock.forward  sl   " "&A)'++/)	"
 /q12126 %--/++M*..0M//044t;M//044K
 "KKK<[YM!__R1Fd1R&*jjm!65; : /+B/!3#"3
'# 4A6M ""emm3#kkKK.224KK 3 34884?KK 3 3488
 !&M|Q\ ] !24KAB4O O 

2}5 %--/++M*..0M//044t;M//044K
 "KKK<[YM " ''	
r2   )r   r  r   )NNNNNNNNFFTNr  rI   s   @r0   r  r    sa    4XVY] 4 4 %0A6R "#&*#'Q
 SQ
r2   r  c                   H    \ rS rSr% \\S'   SrSrSrSr	S/r
S/rS rS	 rS
rg)Pop2PianoPreTrainedModeliC  rM   transformerFTr  rV   c                 P   U R                   R                  n[        U[        5      (       a)  UR                  R
                  R                  US-  5        g[        U[        5      (       a2  UR                  R                  R
                  R                  SUS-  S9  g[        U[        5      (       a  UR                  R                  R
                  R                  SUS-  S9  [        US5      (       aN  U R                   R                  (       d2  UR                  R                  R
                  R                  SUS-  S9  ggg[        U[        5      (       GaQ  UR                   R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR                   S5      (       aE  UR                   R$                  b.  UR                   R$                  R
                  R'                  5         UR(                  R                  R
                  R                  SX R                   R*                  S-  -  S9  [        UR(                  S5      (       aG  UR(                  R$                  b/  UR(                  R$                  R
                  R'                  5         ggg[        U[,        5      (       Ga  UR.                  R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR.                  S5      (       aE  UR.                  R$                  b.  UR.                  R$                  R
                  R'                  5         UR0                  R                  R
                  R                  SX R                   R"                  S-  -  S9  [        UR0                  S5      (       aE  UR0                  R$                  b.  UR0                  R$                  R
                  R'                  5         UR(                  R                  R
                  R                  SX R                   R*                  S-  -  S9  [        UR(                  S5      (       aG  UR(                  R$                  b/  UR(                  R$                  R
                  R'                  5         ggg[        U[2        5      (       GaZ  U R                   R"                  nU R                   R4                  nU R                   R6                  nUR8                  R                  R
                  R                  SX#U-  S-  -  S9  UR:                  R                  R
                  R                  SX#S-  -  S9  UR<                  R                  R
                  R                  SX#S-  -  S9  UR>                  R                  R
                  R                  SX%U-  S-  -  S9  UR@                  (       a4  URB                  R                  R
                  R                  SX#S-  -  S9  ggg)zInitialize the weights      ?        )r:   stdlm_head      rQ   N)"rM   initializer_factorr`   r"   r*   datafill_Pop2PianoConcatEmbeddingToMel	embeddingnormal_!Pop2PianoForConditionalGenerationsharedhasattrtie_word_embeddingsr.  rK   rU   rS   rQ   zero_rV   rT   rf   ri   rj   r~   r   r   r   r   r   r   r   r   )r,   modulefactorrS   r   r   s         r0   _init_weights&Pop2PianoPreTrainedModel._init_weightsN  s   //f011MM$$Vc\2 =>>##((00cv|0L ABB MM  %%--3FSL-Ivy))$++2Q2Q%%**22#2N 3R) 677 II!!))s;;CVCV[_B_8`)avyy&))fiinn.H		##))+II!!))s;;CSCSX\B\8])^vyy&))fiinn.H		##))+ /I) ;<<KK##++&[[EXEX]aDa:b+cv{{F++0@0@0L  %%++-KK##++&[[EXEX]aDa:b+cv{{F++0@0@0L  %%++-II!!))s;;CSCSX\B\8])^vyy&))fiinn.H		##))+ /I) 233 kk))G!%!1!1kk++GHHOO  ((cvL^B^cgAg7h(iHHOO  ((cv$7O(PHHOO  ((cv$7O(PHHOO  ((cvL^B^cgAg7h(i11..55::BBQWhl[lQmBn 2 4r2   c                    U R                   R                  nU R                   R                  nUc  [        S5      e[	        U5      (       aE  [
        R                  " UR                  S S S-   U5      n[
        R                  " XASS S24   /SS9nO=UR                  UR                  5      nUSS S24   R                  5       USSS 24'   X$S'   Uc  [        S5      eUR                  US	:H  U5        U$ )
Nzoself.model.config.decoder_start_token_id has to be defined. In Pop2Piano it is usually set to the pad_token_id.r5   )r   .r   r   ).r   z1self.model.config.pad_token_id has to be defined.)rM   decoder_start_token_idpad_token_id
ValueErrorr   r(   fullr   cat	new_zerosclonemasked_fill_)r,   	input_idsrA  rB  shifted_input_idss        r0   _shift_right%Pop2PianoPreTrainedModel._shift_right|  s    !%!C!C{{//!) B 
 Y'' %

9??3B+?$+FH^ _ %		+<SbS>Q*RXZ [ ) 3 3IOO D)238)<)B)B)Dc12g&(>f%PQQ&&'8D'@,O  r2    N)rC   rD   rE   rF   r   __annotations__base_model_prefixis_parallelizablesupports_gradient_checkpointing_can_compile_fullgraph_no_split_modules_keep_in_fp32_modulesr=  rK  rG   rM  r2   r0   r(  r(  C  s=    %&*#")*!F,o\!r2   r(  c                   (  ^  \ rS rSrSU 4S jjrS r             SS jr SS\\R                  S4   S\R                  S\R                  S	\
S
\4
S jjr\S\R                  S\S\S\R                  S\R                  S\4S j5       rSrU =r$ )Pop2PianoStacki  c                   > [         TU ]  U5        X l        UR                  U l        [        R
                  " [        UR                  5       Vs/ s H  n[        U[        US:H  5      US9PM     sn5      U l
        [        UR                  UR                  S9U l        [        R                  " UR                   5      U l        U R%                  5         SU l        S U l        SU l        g s  snf )Nr   r   rt   F)r%   r&   embed_tokensr   r   r  range
num_layersr  r   blockr"   rS   rw   final_layer_normrW   rX   rY   	post_initmodel_parallel
device_mapr   )r,   rM   rX  ir/   s       r0   r&   Pop2PianoStack.__init__  s     ( ++]] v0011A v4Q<[\]1

 !36>>vG`G` azz&"5"56 	#&+#s   !C*c                     Xl         g r_   )rX  r,   new_embeddingss     r0   set_input_embeddings#Pop2PianoStack.set_input_embeddings  s    *r2   c                 V
   U	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub  UOU R                   R                  nUb  UOU R                   R                  nUb*  Ub'  U R
                  (       a  SOSn[        SU SU S35      eUb&  UR                  5       nUR                  SUS   5      nO>Ub  UR                  5       S S nO'U R
                  (       a  SOSn[        SU SU S	35      eU R                  (       a/  U R                  (       a  U	(       a  [        R                  S
5        Sn	Uc)  U R                  c  [        S5      eU R                  U5      nUu  nnU	SL a   U R
                  (       d  [        SU  S35      eU R
                  (       ah  U	(       a`  Uc]  U R                   R                  (       a/  [        [!        U R                   S9[!        U R                   S95      nO'[!        U R                   S9nOU R
                  (       d  S nUb  UR#                  5       OSnUc#  [$        R&                  " UUU-   UR(                  S9nUc4  [+        5       (       d%  UU-   n[$        R,                  " UUUR(                  S9nU R                   R
                  (       a7  U R/                  UUU[1        U[        5      (       a  UR2                  OUU
5      nOVUS S 2S S S S 24   nUR5                  UR6                  S9nSU-
  [$        R8                  " UR6                  5      R:                  -  nU R
                  (       aO  UbL  UR                  5       u  nnnUU4nUc  [$        R,                  " UUR(                  S9nU R=                  U5      nOS nU R?                  X`R                   R@                  5      nU R?                  XpR                   R@                  5      nU(       a  SOS nU
(       a  SOS nU
(       a  U R
                  (       a  SOS nS nS nU RC                  U5      n[E        U RF                  5       H  u  n n!UU    n"UU    n#U(       a  UU4-   nU!" UUUUUUU"U#UU	U
US9n$U$S   nU$S   nU R
                  (       a  Ub  U$U
(       a  SOS   nU
(       d  Mc  UU$S   4-   nU R
                  (       d  M  UU$S   4-   nM     U RI                  U5      nU RC                  U5      nU(       a  UU4-   nU(       d  [K        S UUUUU4 5       5      $ [M        UUUUUS9$ )Ndecoder_ zYou cannot specify both zinput_ids and zinputs_embeds at the same timer5   zYou have to specify either zinput_ids or inputs_embedszZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fz<You have to initialize the model with valid token embeddingsTz)`use_cache` can only be set to `True` if z is used as a decoder)rM   r   r   )r<   r+  rM  )r   r  r   r   r   r   r   r   r4      c              3   0   #    U  H  nUc  M  Uv   M     g 7fr_   rM  ).0r   s     r0   	<genexpr>)Pop2PianoStack.forward.<locals>.<genexpr>R  s"      
A  s   	)last_hidden_stater   r?   
attentionscross_attentions)'rM   r   r   output_hidden_statesuse_return_dictr   rC  sizer   r   r   r   r   rX  is_encoder_decoderr   r   get_seq_lengthr(   r   r   r   r)   _update_causal_maskr`   r   r7   r<   r  r   invert_attention_maskget_head_maskrZ  rY   	enumerater[  r\  tupler   )%r,   rI  r  r  r  rj  	head_maskcross_attn_head_maskr   r   r   rt  r   r   err_msg_prefixinput_shaper   r   past_key_values_lengthmask_seq_lengthr   encoder_batch_sizeencoder_sequence_length_encoder_hidden_shapeencoder_extended_attention_maskall_hidden_statesall_attentionsall_cross_attentionsr   r  r?   r`  layer_moduler   r  layer_outputss%                                        r0   rA   Pop2PianoStack.forward  s/     "+!6IDKK<Q<Q	1B1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>+/??ZN*>*:.HXXvw  "#..*K!r;r?;I&',,.s3K+/??ZN:>:J-XfWggtuvv&&4==##p "	   ( !_`` --i8M!,
J?? #LTFRg!hii??_4;;11&9$DKK8,dkk:Z'O '3$++&FO #OETE`!?!?!Afg!"\\&(>(KTaThThN !*B*D*D4zAO"ZZ
OML`L`aN;;!!22o/BCC  44$!K )D$)9:K%..}/B/B.CK,M<O<O0P0T0TTK ??4@=R=W=W=Y: 7$68O#P %-).4HQ^QeQe)f&.2.H.HI_.`+.2+ &&y++2H2HI	#112FH^H^_"6BD0d&7DOOrRV(,%]3(4OA|'lO)=a)@&#$58H$H!(%/- /+E /#"3-M *!,M
 *!,M#8#D0=CTaZ[0\-  !/=3C2E!E???+?=QRCSBU+U(C  5F --m<]3   1]4D D 
 "#%"(
 
 
 9+++%1
 	
r2   r  r   input_tensorr   r   r   c           	         U R                   R                  S:X  a  Ub  US:H  R                  5       (       a  U$ g U R                   R                  S:X  a,  [        U[        R
                  5      (       a  [        U5      nU$ Ub  UR                  5       OSnUb  UR                  OSnU R                   R                  S:X  a5  U(       d.  U(       d'  [        R                  " UUUU R                  S9(       a  g UR                  nUR                  S   n	U(       a  UR                  5       n
O5[        U[        R
                  5      (       a  UR                  S	   OXi-   S-   n
U R                  UU	U
UUUR                  S   S
9nU R                   R                  S:X  aZ  UbW  UR                   R"                  S;   a=  U(       d6  [        R$                  " U5      R&                  n[        R(                  " X5      nU$ )Nflash_attention_2r,  flex_attentionr   Fsdpa)rj  r  is_trainingr   r5   )sequence_lengthtarget_lengthr<   r   r   )cudaxpunpu)rM   _attn_implementationr  r`   r(   ra   r   rx  is_compileabler   _ignore_causal_mask_sdpar   r<   r   get_max_cache_shape5_prepare_4d_causal_attention_mask_with_cache_positionr   typer  r   _unmask_unattended)r,   r  r  r   r   r   past_seen_tokensusing_compilable_cacher<   r  r  r   	min_dtypes                r0   ry  "Pop2PianoStack._update_causal_maskf  s    ;;++/BB)~/D.I.I.K.K%%;;++/??.%,,77!<^!L!!
 @O?Z?99;`aCRC^!?!?di ;;++v5>T]n%>>*'7 MM	 ""&,,Q/!+??AM nell;; $$R(%7!;  PP+')#))!, Q 
 KK,,6*%%**.DD%
 E*..I0CCK[Kr2   r  r  r<   r   c                    U b  U R                  5       S:X  a  U nU$ [        R                  " U5      R                  n[        R                  " X4XUR
                  S9nUS:w  a  [        R                  " USS9nU[        R                  " X$R
                  S9UR                  SS5      :  -  nUSSSS2SS24   R                  USSS5      nU b  UR                  5       nU R                  S   n	USS2SS2SS2SU	24   U SS2SSSS24   R                  UR
                  5      -   n
U
S:H  n
USS2SS2SS2SU	24   R                  X5      USS2SS2SS2SU	24'   U$ )	a  
Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
`(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.

Args:
    attention_mask (`torch.Tensor`):
        A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
        `(batch_size, 1, query_length, key_value_length)`.
    sequence_length (`int`):
        The sequence length being processed.
    target_length (`int`):
        The target length: when generating with static cache, the mask should be as long as the static cache,
        to account for the 0 padding, the part of the cache that is not filled yet.
    dtype (`torch.dtype`):
        The dtype to use for the 4D attention mask.
    cache_position (`torch.Tensor`):
        Indices depicting the position of the input sequence tokens in the sequence.
    batch_size (`torch.Tensor`):
        Batch size.
Nrl  )
fill_valuer<   r   r   )diagonalrk  r5   r   )r   r(   r  r   rD  r   triur   reshapeexpandrG  r   r7   masked_fill)r  r  r  r<   r   r   kwargsr   r  mask_lengthpadding_masks              r0   r  DPop2PianoStack._prepare_4d_causal_attention_mask_with_cache_position  s}   > %.*<*<*>!*C(K* ' E*..I** 0Y\j\q\qK !##jjqA5<<>S>STWeWmWmnprsWtttK%dD!Q&67>>z1bRTUK))//1,2226*1aL[L+@ANSTVZ\`bcScDdDgDg&&E    ,q05@Aq,;,AV5W5c5c 6Aq!\k\12 r2   )r[  r_  rY   rX  r\  r   r   r^  r_   )NNNNNNNNNNNNN)F)rC   rD   rE   rF   r&   re  rA   r   r(   ra   r
   r   ry  r   r   r<   r  rG   rH   rI   s   @r0   rV  rV    s    ,.+
 "#!!p
r #(BellK78B llB 	B
 B  BH 444 4 {{	4
 4 4 4r2   rV  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )r3  i  z'Embedding Matrix for `composer` tokens.c                 ~   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        g )N)num_embeddingsembedding_dim)r%   r&   r   r   composer_vocab_sizerS   r4  r\   s     r0   r&   &Pop2PianoConcatEmbeddingToMel.__init__  s-    V5O5O_e_m_mnr2   c                 z    X#-
  nU R                  U5      R                  S5      n[        R                  " XQ/SS9nU$ )Nr   r   )r4  r   r(   rE  )r,   featureindex_valueembedding_offsetindex_shiftedcomposer_embeddingrj  s          r0   rA   %Pop2PianoConcatEmbeddingToMel.forward  s>    #6!^^M:DDQG		#5"?QGr2   )r4  )	rC   rD   rE   rF   __doc__r&   rA   rG   rH   rI   s   @r0   r3  r3    s    1o r2   r3  zA
    Pop2Piano Model with a `language modeling` head on top.
    )custom_introc            *          ^  \ rS rSr/ SQrS\4U 4S jjrS rS rS r	 S"S\
R                  S	\S
\S\\
R                     4S jjr\                  S#S\\
R"                     S\\
R                     S\\
R"                     S\\
R$                     S\\
R                     S\\
R                     S\\
R&                     S\\\\
R&                           S\\   S\\
R                     S\\
R                     S\\
R                     S\\
R"                     S\\   S\\   S\\   S\\   S\\
R"                     S\\\
R                     \4   4&S jj5       r\
R4                  " 5          S$U 4S jj5       rS\
R&                  4S  jrS!rU =r$ )%r6  i  )zencoder.embed_tokens.weightzdecoder.embed_tokens.weightzlm_head.weightrM   c                 x  > [         TU ]  U5        Xl        UR                  U l        [
        R                  " UR                  UR                  5      U l        [        U5      U l
        [        R                  " U5      nSUl        SUl        SUl        [!        X R                  5      U l        [        R                  " U5      nSUl        SUl        UR$                  Ul        [!        X0R                  5      U l        [
        R*                  " UR                  UR                  SS9U l        U R/                  5         g )NFTrP   )r%   r&   rM   rS   	model_dimr   r   
vocab_sizer7  r3  mel_conditionercopydeepcopyr   r   tie_encoder_decoderrV  encodernum_decoder_layersrZ  decoderrR   r.  r]  )r,   rM   encoder_configdecoder_configr/   s       r0   r&   *Pop2PianoForConditionalGeneration.__init__  s     ll6#4#4fnnE<VDv.$)!#( -2*%nkkBv.$(!-2*$*$=$=!%nkkByy1B1BO 	r2   c                     U R                   $ r_   )r7  r,   s    r0   get_input_embeddings6Pop2PianoForConditionalGeneration.get_input_embeddings  s    {{r2   c                 |    Xl         U R                  R                  U5        U R                  R                  U5        g r_   )r7  r  re  r  rc  s     r0   re  6Pop2PianoForConditionalGeneration.set_input_embeddings  s+    $)).9)).9r2   c                     U R                   $ r_   )r  r  s    r0   get_encoder-Pop2PianoForConditionalGeneration.get_encoder  s    ||r2   input_featurescomposergeneration_configr  c                    UR                   nX%;  a(  [        S[        UR                  5       5       SU 35      eXR   n[        R
                  " X`R                  S9nUR                  UR                  S   5      n[        UR                  5       5      nU R                  UUUS9nUbK  SXSS2S4   R                  5       ) '   [        R                  " USS2S4   R                  SS	5      U/S	S
9nX4$ US4$ )ak  
This method is used to concatenate mel conditioner tokens at the front of the input_features in order to
control the type of MIDI token generated by the model.

Args:
    input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        input features extracted from the feature extractor.
    composer (`str`):
        composer token which determines the type of MIDI tokens to be generated.
    generation_config (`~generation.GenerationConfig`):
        The generation is used to get the composer-feature_token pair.
    attention_mask (``, *optional*):
        For batched generation `input_features` are padded to have the same shape across all examples.
        `attention_mask` helps to determine which areas were padded and which were not.
        - 1 for tokens that are **not padded**,
        - 0 for tokens that are **padded**.
zPlease choose a composer from z. Composer received - rk  r   )r  r  r  Nr,  r5   r   )axis)composer_to_feature_tokenrC  r   r   r(   tensorr   repeatr   r   r   r  r   concatenater   )r,   r  r  r  r  r  composer_valuer  s           r0   get_mel_conditioner_outputs=Pop2PianoForConditionalGeneration.get_mel_conditioner_outputs  s   0 %6$O$O!406O6T6T6V1W0XXnownxy  3<n[[I'..~/C/CA/FG8??AB--"&- . 

 %;>N1a4055778 #..q!t0D0I0I"a0PR`/ahijN!11t##r2   rI  decoder_input_idsdecoder_attention_maskr~  decoder_head_maskr  encoder_outputsr   rj  decoder_inputs_embedslabelsr   r   rt  r   r   returnc                    Ub  UOU R                   R                  nUb  UOU R                   R                  nU
b  Ub  [        S5      eUb  U
c  Un
Uc  U R	                  UUU
UUUUS9nORU(       aK  [        U[        5      (       d6  [        US   [        U5      S:  a  US   OS[        U5      S:  a  US   OSS9nUS   nUb  Uc  Uc  U R                  U5      nU R                  UUUU	UUUUUUUUUS9nUS   nU R                   R                  (       a  UU R                  S	-  -  nU R                  U5      nSnUb@  [        S
S9nU" UR                  SUR                  S5      5      UR                  S5      5      nU(       d  U4USS -   U-   nUb  U4U-   $ U$ [!        UUUR"                  UR$                  UR&                  UR(                  UR*                  UR$                  UR&                  S9	$ )aB	  
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. Pop2Piano is a model with relative position embeddings
    so you should be able to pad the inputs on both the right and the left. Indices can be obtained using
    [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for detail.
    [What are input IDs?](../glossary#input-ids) To know more on how to prepare `input_ids` for pretraining
    take a look a [Pop2Piano Training](./Pop2Piano#training).
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary. Indices can be obtained using
    [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.
    [What are decoder input IDs?](../glossary#decoder-input-ids) Pop2Piano uses the `pad_token_id` as the
    starting token for `decoder_input_ids` generation. If `past_key_values` is used, optionally only the last
    `decoder_input_ids` have to be input (see `past_key_values`). To know more on how to prepare
decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.
decoder_head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
    Mask to nullify selected heads of the self-attention modules in the decoder. Mask values selected in `[0,
    1]`:
    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
cross_attn_head_mask (`torch.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
    Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in
    `[0, 1]`:
    - 1 indicates the head is **not masked**,
    - 0 indicates the head is **masked**.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[-100, 0, ...,
    config.vocab_size - 1]`. All labels set to `-100` are ignored (masked), the loss is only computed for
    labels in `[0, ..., config.vocab_size]`
NzSBoth `inputs_embeds` and `input_features` received! Please provide only one of them)rI  r  rj  r~  r   rt  r   r   r   r4   )rq  r?   rr  )rI  r  rj  r   r  r  r~  r  r   r   rt  r   r   r/  r@  )ignore_indexr5   )	losslogitsr   decoder_hidden_statesdecoder_attentionsrs  encoder_last_hidden_stater  encoder_attentions)rM   r   ru  rC  r  r`   r   r   rK  r  r9  r  r.  r   r   rv  r   r   r?   rr  rs  rq  )r,   rI  r  r  r  r~  r  r  r  r   rj  r  r  r  r   r   rt  r   r   r?   decoder_outputssequence_output	lm_logitsr  loss_fctoutputs                             r0   rA   )Pop2PianoForConditionalGeneration.forwardP  sA   j "+!6IDKK<Q<Q	%0%<k$++B]B]$)Crss'M,A*M ""ll#-+#"3%9' + O O_!M!M-"1!"4474H14Loa0RV14_1E1I?1-tO (*"3";@U@] $ 1 1& 9 ,,'1/+"/#1'!5/!5#) ' 
  *!,;;** .1EFOLL1	'T:HINN2y~~b/ABFKKPROTD\OAB$77/IF)-)9TGf$EvE+;;"1"?"?.99,==&5&G&G"1"?"?.99

 
	
r2   c                   > Uc  U R                   nUR                  " S	0 UD6  [        US5      (       d  [        S5      e[	        UR
                  5      U R                  R                  :w  a9  [        SU R                  R                   S[	        UR
                  5       S35      eU R                  UUUUS9u  p[        TU ](  " S	SUUUS.UD6$ )
aV  
Generates token ids for midi outputs.

<Tip warning={true}>

Most generation-controlling parameters are set in `generation_config` which, if not passed, will be set to the
model's default generation configuration. You can override any `generation_config` by passing the corresponding
parameters to generate(), e.g. `.generate(inputs, num_beams=4, do_sample=True)`. For an overview of generation
strategies and code examples, check out the [following guide](./generation_strategies).

</Tip>

Parameters:
    input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        This is the featurized version of audio generated by `Pop2PianoFeatureExtractor`.
    attention_mask:
        For batched generation `input_features` are padded to have the same shape across all examples.
        `attention_mask` helps to determine which areas were padded and which were not.
        - 1 for tokens that are **not padded**,
        - 0 for tokens that are **padded**.
    composer (`str`, *optional*, defaults to `"composer1"`):
        This value is passed to `Pop2PianoConcatEmbeddingToMel` to generate different embeddings for each
        `"composer"`. Please make sure that the composer value is present in `composer_to_feature_token` in
        `generation_config`. For an example please see
        https://huggingface.co/sweetcocoa/pop2piano/blob/main/generation_config.json .
    generation_config (`~generation.GenerationConfig`, *optional*):
        The generation configuration to be used as base parametrization for the generation call. `**kwargs`
        passed to generate matching the attributes of `generation_config` will override them. If
        `generation_config` is not provided, the default will be used, which had the following loading
        priority: 1) from the `generation_config.json` model file, if it exists; 2) from the model
        configuration. Please note that unspecified parameters will inherit [`~generation.GenerationConfig`]'s
        default values, whose documentation should be checked to parameterize generation.
    kwargs:
        Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
        forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
        specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
Return:
    [`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
    or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`.
        Since Pop2Piano is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
        [`~utils.ModelOutput`] types are:
            - [`~generation.GenerateEncoderDecoderOutput`],
            - [`~generation.GenerateBeamEncoderDecoderOutput`]
Nr  z`composer_to_feature_token` was not found! Please refer to https://huggingface.co/sweetcocoa/pop2piano/blob/main/generation_config.jsonand parse a dict like that.ztconfig.composer_vocab_size must be same as the number of keys in generation_config.composer_to_feature_token! Found z vs .)r  r  r  r  )inputsrj  r  r  rM  )r  r   r8  rC  r   r  rM   r  r  r%   generate)r,   r  r  r  r  r  r/   s         r0   r  *Pop2PianoForConditionalGeneration.generate  s    l $ $ 6 6  *6* (*EFF.   ::;t{{?^?^^889cBSBmBm>n=oopr  *.)I)I))/	 *J *
& w 
()/	

 
 	
r2   c                 $    U R                  U5      $ r_   )rK  )r,   r  s     r0   %prepare_decoder_input_ids_from_labelsGPop2PianoForConditionalGeneration.prepare_decoder_input_ids_from_labels/  s      ((r2   )rM   r  r  r.  r  r  r7  r_   )NNNNNNNNNNNNNNNNNN)N	composer1N)rC   rD   rE   rF   _tied_weights_keysr   r&   r  re  r  r(   FloatTensorstrr   r   r  r   
LongTensor
BoolTensorra   r}  r
   r   r   r   rA   no_gradr  r  rG   rH   rI   s   @r0   r6  r6    s    j 6:
 7;/$))/$ /$ ,	/$
 !!2!23/$b  156:8<=A159=7;@D+/596:=A-1$(,0/3&*59'B
E,,-B
 !!2!23B
 $E$4$45	B

 !))9)9 :B
 E--.B
 $E$5$56B
 'u||4B
 "%ell(;"<=B
 "%B
   1 12B
 !!2!23B
  ((9(9:B
 ))*B
 D>B
  $D>!B
" 'tn#B
$ d^%B
& !!1!12'B
( 
uU&&'8	9)B
 B
H ]]_ W
 W
r)ELL ) )r2   r6  )Gr  r  r   typingr   r   r(   r   torch.nnr   transformers.generationr   activationsr	   cache_utilsr
   r   r   
generationr   modeling_attn_mask_utilsr   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   r   r   utils.deprecationr   configuration_pop2pianor   !torch.nn.attention.flex_attentionr   integrations.flex_attentionr   
get_loggerrC   r   _load_pop2piano_layer_normapex.normalizationr    infoImportError	ExceptionwarningModuler"   rK   rf   rq   r~   r   r  r  r(  rV  r3  r6  __all__rM  r2   r0   <module>r     s      "   % 4 ! C C ) > 9 k k - Q w w 0 4  !!;J 
		H	%! 	/!&
KKhi+ +2 "%RYY .")) <ryy &f fT"")) "L$299 $Pa
/ a
H P! P! P!fI- IX
BII  
z)(@/ z)
z)z	 /0J
Kw'  	 	
NN^_	s   *F4 4G<GG