
    cCi                      S r SSKJr  SSKrSSKJr  SSKrSSKr	SSK
Jr  SSKJrJr  SSKJrJrJrJrJr  SS	KJrJr  SS
KJrJrJrJr  SSKJr  \R>                  " \ 5      r!Sr"Sr#S r$S r% S@         SAS jjr&SBSCS jjr' " S S\RP                  RR                  5      r* " S S\RP                  RV                  5      r, " S S\RP                  RR                  5      r- " S S\RP                  RR                  5      r. " S S\RP                  RR                  5      r/ " S S\RP                  RR                  5      r0 " S S \RP                  RR                  5      r1 " S! S"\RP                  RR                  5      r2 " S# S$\25      r3 " S% S&\RP                  RR                  5      r4 " S' S(\RP                  RR                  5      r5 " S) S*\RP                  RR                  5      r6 " S+ S,\RP                  RR                  5      r7 " S- S.\RP                  RR                  5      r8 " S/ S0\RP                  RR                  5      r9 " S1 S2\RP                  RR                  5      r:\ " S3 S4\RP                  RR                  5      5       r; " S5 S6\5      r<S7r=S8r>\" S9\=5       " S: S;\<5      5       r?\" S<\=5       " S= S>\<5      5       r@/ S?QrAg)DzTensorFlow Hubert model.    )annotationsN)Any   )get_tf_activation)TFBaseModelOutputTFCausalLMOutput)TFPreTrainedModelget_initializerkeraskeras_serializableunpack_inputs)
shape_liststable_softmax)add_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )HubertConfigr   g    חc                    [         R                  R                  [         R                  R	                  [        U 5      SS5      5      * n[         R                  R                  X-   U5      u  p4U$ )z
Categorical sampling without replacement is currently not implemented. The gumbel-max trick will do for now - see
https://github.com/tensorflow/tensorflow/issues/9260 for more info
r   r   )tfmathlograndomuniformr   nntop_k)distributionnum_samplesz_indicess        g/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/hubert/modeling_tf_hubert.py_sample_without_replacementr$   5   sO    
 
RYY&&z,'?AF	GGA\-{;JAN    c           
        [        U5      n[        R                  " [        R                  " [        R                  " [        R
                  " US   5      SS9U5      SS/5      n[        R                  " [        R                  " U[        R                  " USS/5      /S5      5      n[        R                  " U[        R                  " U S/5      U5      $ )zL
Scatter function as in PyTorch with indices in format (batch_dim, indices)
r   axisr   )	r   r   reshapebroadcast_toexpand_dimsrange	transposeconcat
scatter_nd)valuesbatch_indicesoutput_shapeindices_shapebroad_casted_batch_dimspair_indicess         r#    _scatter_values_on_batch_indicesr7   @   s     }-M jj
rxxa0@'AK][^_ac]d <<		+BBJJ}_`bd^eDf*gij klL==rzz&2$'?NNr%   c           	     r   U u  pEUS:  a  [        S5      e[        R                  R                  UUSU SU S3S9  U[        R                  " U[        R
                  5      -  U-  [        R                  R                  S5      -   n[        R                  " Xc5      n[        R                  " U[        R                  5      n[        R                  R                  XR-  U5      n[        R                  " U5      n[        R                  " XE4[        R                  S9n[        R                  " XEUS-
  -
  45      n[        X5      n	[        R                   " U	S	5      n	[        R"                  " U	SSU45      n	[        R$                  " XXb-  45      n	[        R&                  " U5      [        R(                  [        R(                  S
S
24   n
[        R"                  " XUS45      n
[        R$                  " XXb-  45      n
X-   n	[+        [        R,                  " U	5      U	[        R.                  " U5      5      nU$ )ab  
Computes random mask spans for a given shape

Args:
    shape: the shape for which to compute masks.
        should be of size 2 where first element is batch size and 2nd is timesteps
    attention_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
    mask_prob:
        probability for each token to be chosen as start of the span to be masked. this will be multiplied by
        number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
        however due to overlaps, the actual number will be smaller (unless no_overlap is True)
    mask_length: size of the mask
    min_masks: minimum number of masked spans

Adapted from [fairseq's
data_utils.py](https://github.com/pytorch/fairseq/blob/e0788f7007a8473a76db573985031f3c94201e79/fairseq/data/data_utils.py#L376).
r   z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: `messager   dtyper'   N)
ValueErrorr   	debuggingassert_lesscastfloat32r   r   maximumint32r   minimumsqueezezerosonesr$   r,   tiler*   r-   newaxisr7   	ones_likeshape)rM   	mask_probmask_length	min_masks
batch_sizesequence_lengthnum_masked_spansspec_aug_maskuniform_distspec_aug_mask_idxsoffsetss              r#   _compute_mask_indicesrX   P   s   . #(JQABBLL]^i]j k##2"316	   !277?BJJ#GG+UXZXaXaXiXijnXoozz"2>ww/: ww'EGWXzz"23 HHj:"((KM 77J;?(KLML 5\T (:B?!3aK5HI$6EUEc8dehh{#BJJ

A$=>Gggg,<a@AGjj/?/M"NOG+5 5
'(*<bhh}>UM r%   c                    [        U 5      S   nUb  UOUn[        R                  " S5      n[        R                  " XR                  S9n [        R
                  " U SS2SSSS24   SSUS45      nX4-
  [        -  $ )zW
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
r   Ng      ?r=   )r   r   constantrB   r>   rJ   LARGE_NEGATIVE)masktgt_lensrc_lenone_cstexpanded_masks        r#   _expand_maskra      st     q!G ,g'Gkk#G774}}-DGGDD$!12Q7A4FGM#~55r%   c                     ^  \ rS rSrSr           S                     SU 4S jjjrU 4S jrS rU 4S jrS r	S r
S	 rS
 rS rS rS rS rS rS rS rS rSrU =r$ )TFHubertGroupNorm   zh
From tensorflow-addons https://www.tensorflow.org/addons/api_docs/python/tfa/layers/GroupNormalization
c                @  > [         TU ]  " S0 UD6  SU l        Xl        X l        X0l        X@l        XPl        [        R                  R                  U5      U l        [        R                  R                  U5      U l        [        R                  R                  U5      U l        [        R                  R                  U	5      U l        [        R                   R                  U
5      U l        [        R                   R                  U5      U l        U R'                  5         g )NT )super__init__supports_maskinggroupsr)   epsiloncenterscaler   initializersgetbeta_initializergamma_initializerregularizersbeta_regularizergamma_regularizerconstraintsbeta_constraintgamma_constraint_check_axis)selfrj   r)   rk   rl   rm   rp   rq   rs   rt   rv   rw   kwargs	__class__s                r#   rh   TFHubertGroupNorm.__init__   s     	"6" $	
 % 2 2 6 67G H!&!3!3!7!78I!J % 2 2 6 67G H!&!3!3!7!78I!J$0044_E % 1 1 5 56F Gr%   c                   > U R                  U5        U R                  U5        U R                  U5        U R                  U5        U R	                  U5        U R                  U5        SU l        [        TU ]!  U5        g NT)	_check_if_input_shape_is_none'_set_number_of_groups_for_instance_norm_check_size_of_dimensions_create_input_spec_add_gamma_weight_add_beta_weightbuiltrg   buildry   input_shaper{   s     r#   r   TFHubertGroupNorm.build   sj    **;744[A&&{3,{+k*
k"r%   c                8   [         R                  R                  U5      n[        R                  " U5      nU R                  XU5      u  pEU R                  XB5      nX R                     U R                  -  S:H  nU(       d  [        R                  " Xc5      nU$ UnU$ Nr   )
r   backend	int_shaper   rM   _reshape_into_groups_apply_normalizationr)   rj   r*   )	ry   inputsr   tensor_input_shapereshaped_inputsgroup_shapenormalized_inputsis_instance_normoutputss	            r#   callTFHubertGroupNorm.call   s    mm--f5XXf-'+'@'@Vh'i$ 55oS'		2dkkAaGjj!2GG  (Gr%   c                  > U R                   U R                  U R                  U R                  U R                  [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      S.n[         TU ]E  5       n0 UEUE$ )N)rj   r)   rk   rl   rm   rp   rq   rs   rt   rv   rw   )rj   r)   rk   rl   rm   r   rn   	serializerp   rq   rr   rs   rt   ru   rv   rw   rg   
get_config)ry   configbase_configr{   s      r#   r   TFHubertGroupNorm.get_config   s    kkII||kkZZ % 2 2 < <T=R=R S!&!3!3!=!=d>T>T!U % 2 2 < <T=R=R S!&!3!3!=!=d>T>T!U$00::4;O;OP % 1 1 ; ;D<Q<Q R
 g(*(+(((r%   c                    U$ Nrf   ry   r   s     r#   compute_output_shape&TFHubertGroupNorm.compute_output_shape   s    r%   c                   [        [        U5      5       Vs/ s H  oCU   PM	     nnX R                     U R                  -  S:H  nU(       d|  X R                     U R                  -  XPR                  '   UR	                  U R                  U R                  5        [
        R                  " U5      n[
        R                  " X5      nXu4$ X4$ s  snf r   )r-   lenr)   rj   insertr   stackr*   )ry   r   r   r   ir   r   r   s           r#   r   &TFHubertGroupNorm._reshape_into_groups   s    6;C<L6MN6M!,6MN'		2dkkAaG%0%;t{{%JK		"tyy$++6((;/K jj=O"//&& Os   Cc           	     0   [         R                  R                  U5      n[        [	        S[        U5      5      5      nX R                     U R                  -  S:H  nU(       d"  U R                  S:X  a  SOU R                  S-
  nO!U R                  S:X  a  SOU R                  S-
  nUR                  U5        [        R                  R                  XSS9u  pxU R                  U5      u  p[        R                  R                  UUUU	U
U R                  S9nU$ )Nr   r'   T)keepdims)meanvariancerm   offsetvariance_epsilon)r   r   r   listr-   r   r)   rj   popr   r   moments_get_reshaped_weightsbatch_normalizationrk   )ry   r   r   r   group_reduction_axesr   r)   r   r   gammabetar   s               r#   r   &TFHubertGroupNorm._apply_normalization  s    mm--o>#E!S-=$>?'		2dkkAaGb2dii!mDb2dii!mD  &W[\00=EE55!\\ 6 
 ! r%   c                    U R                  U5      nS nS nU R                  (       a!  [        R                  " U R                  U5      nU R
                  (       a!  [        R                  " U R                  U5      nX44$ r   )_create_broadcast_shaperm   r   r*   r   rl   r   )ry   r   broadcast_shaper   r   s        r#   r   'TFHubertGroupNorm._get_reshaped_weights  sZ    66{C::JJtzz?;E;;::dii9D{r%   c                    XR                      nUc3  [        S[        U R                   5      -   S-   [        U5      -   S-   5      eg )NzAxis z\ of input tensor should have a defined dimension but the layer received an input with shape .)r)   r?   strry   r   dims      r#   r   /TFHubertGroupNorm._check_if_input_shape_is_none)  sZ    ))$;dii.!pq k"# 	  r%   c                N    XR                      nU R                  S:X  a  X l        g g Nr'   )r)   rj   r   s      r#   r   9TFHubertGroupNorm._set_number_of_groups_for_instance_norm4  s$    ))$;;"K r%   c                .   XR                      nX R                  :  a3  [        S[        U R                  5      -   S-   [        U5      -   S-   5      eX R                  -  S:w  a3  [        S[        U R                  5      -   S-   [        U5      -   S-   5      eg )NzNumber of groups (z.) cannot be more than the number of channels ().r   z0) must be a multiple of the number of channels ()r)   rj   r?   r   r   s      r#   r   +TFHubertGroupNorm._check_size_of_dimensions:  s    ))$$dkk"#BC c( 	  !$dkk"#DE c( 	  "r%   c                :    U R                   S:X  a  [        S5      eg )Nr   zdYou are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead)r)   r?   ry   s    r#   rx   TFHubertGroupNorm._check_axisN  s"    99>v  r%   c                    XR                      n[        R                  R                  [	        U5      U R                   U0S9U l        g )N)ndimaxes)r)   r   layers	InputSpecr   
input_specr   s      r#   r   $TFHubertGroupNorm._create_input_specT  s8    ))$,,00c+6FdiiY\M]0^r%   c                    XR                      nU4nU R                  (       a7  U R                  USU R                  U R                  U R
                  S9U l        g S U l        g )Nr   rM   nameinitializerregularizer
constraint)r)   rm   
add_weightrq   rt   rw   r   ry   r   r   rM   s       r#   r   #TFHubertGroupNorm._add_gamma_weightX  s[    ))$:: 22 2200 ) DJ DJr%   c                    XR                      nU4nU R                  (       a7  U R                  USU R                  U R                  U R
                  S9U l        g S U l        g )Nr   r   )r)   rl   r   rp   rs   rv   r   r   s       r#   r   "TFHubertGroupNorm._add_beta_weightg  s[    ))$;; 11 11// ( DI DIr%   c                <   S/[        U5      -  nXR                     U R                  -  S:H  nU(       dO  XR                     U R                  -  X R                  '   UR                  U R                  U R                  5        U$ U R                  X R                  '   U$ r   )r   r)   rj   r   )ry   r   r   r   s       r#   r   )TFHubertGroupNorm._create_broadcast_shapev  s    #K 00'		2dkkAaG)4YY)?4;;)NOII&""499dkk:  *.OII&r%   )r)   r   rv   rp   rs   r   rl   rk   r   rw   rq   rt   rj   r   rm   ri   )    r'   gMbP?TTrH   rI   NNNN)rj   intr)   r   rk   floatrl   boolrm   r   rp   keras.initializers.Initializerrq   r   rs   keras.regularizers.Regularizerrt   r   rv   keras.constraints.Constraintrw   r   )__name__
__module____qualname____firstlineno____doc__rh   r   r   r   r   r   r   r   r   r   r   rx   r   r   r   r   __static_attributes____classcell__r{   s   @r#   rc   rc      s     ;B<B;?<@8<9=  	
   9 : 9 : 6 7 <	# )"
'!.		(_ r%   rc   c                  P   ^  \ rS rSrSrU 4S jrS rS rU 4S jrU 4S jr	Sr
U =r$ )	TFHubertWeightNormConv1Di  zeAdapted from https://www.tensorflow.org/probability/api_docs/python/tfp/layers/weight_norm/WeightNormc           
        > [         TU ]  " SUUUSSSS.UD6  X@l        SU l        [        R
                  " SS/5      U l        g )	NvalidT	he_normal)filterskernel_sizerj   paddinguse_biasbias_initializer   r   r   rf   )rg   rh   explicit_paddingfilter_axisr   rZ   kernel_norm_axes)ry   r   r   rj   r   rz   r{   s         r#   rh   !TFHubertWeightNormConv1D.__init__  sV     	
#(	
 	
 !1 "QF 3r%   c                $   [         R                  " [         R                  " [         R                  " U R                  5      U R
                  S95      nU R                  R                  USS2[         R                  [         R                  4   5        g)z"Set the norm of the weight vector.r(   N)	r   sqrt
reduce_sumsquareweight_vr   weight_gassignrK   )ry   kernel_norms     r#   
_init_norm#TFHubertWeightNormConv1D._init_norm  sT    ggbmmBIIdmm,D4K`K`ab[BJJ

)BCDr%   c                    [         R                  R                  U R                  U R                  S9[         R
                  " U R                  5      -  n[         R
                  " U5      U l        g)zGenerate normalized weights.r(   N)r   r   l2_normalizer  r   r.   r  kernel)ry   r  s     r#   _normalize_kernel*TFHubertWeightNormConv1D._normalize_kernel  sM    ##DMM8M8M#NQSQ]Q]^b^k^kQllll6*r%   c                  > U R                   (       d  [        TU ]	  U5        [        R                  " [        R
                  " U R                  5      SSS9U l        U R                  U l        U R                  S[        U R                  R                  U R                     5      SS4SU R                  R                  SS9U l        U R                  5         U R                  SU R                  4S	SS
9U l        g g )Nr  T)r   	trainabler  r   rI   )r   rM   r   r>   r  biasrH   )r   rM   r   r  )r   rg   r   r   Variabler.   r  r  r   r   rM   r   r>   r  r  r   r  r   s     r#   r   TFHubertWeightNormConv1D.build  s    zzGM+&++bll4;;&?j\`aDK KKDM OO4==..t/?/?@A1aH"mm)) , DM OOVDLL?X_kopDI r%   c                   > U R                  5         [        R                  " USU R                  U R                  4S45      n[        TU ]  U5      nU$ )N)r   r   )r  r   padr   rg   r   )ry   r   padded_inputsoutputr{   s       r#   r   TFHubertWeightNormConv1D.call  sM     	 v1F1FH]H]0^`f'ghm,r%   )r  r   r   r  r   r  r  )r   r   r   r   r   rh   r  r  r   r   r   r   r   s   @r#   r   r     s&    o4E
+
q"	 	r%   r   c                  D   ^  \ rS rSrSSU 4S jjjrSS jrS	S jrSrU =r$ )
TFHubertNoLayerNormConvLayeri  c                b  > [         TU ]  " S0 UD6  US:  a  UR                  U   OSU l        UR                  U   U l        [
        R                  R                  U R                  UR                  U   UR                  U   UR                  SS9U l        [        UR                  5      U l        g )Nr   r   convr   r   stridesr   r   rf   )rg   rh   conv_dimin_conv_dimout_conv_dimr   r   Conv1Dconv_kernelconv_stride	conv_biasr  r   feat_extract_activation
activationry   r   layer_idrz   r{   s       r#   rh   %TFHubertNoLayerNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	 ,F,J,JKr%   c                J    U R                  U5      nU R                  U5      nU$ r   )r  r&  ry   hidden_statess     r#   r   !TFHubertNoLayerNormConvLayer.call  s$    		-06r%   c                ,   U R                   (       a  g SU l         [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       g = fNTr  )r   getattrr   
name_scoper  r   r   r  r   s     r#   r   "TFHubertNoLayerNormConvLayer.build  sg    ::
4&2tyy~~.		tT-=-= >? /. 3..s   )B
B)r&  r   r  r  r   r   r   r   r(  r   rz   r   returnNoner,  	tf.Tensorr5  r8  r   	r   r   r   r   rh   r   r   r   r   r   s   @r#   r  r    s     L L
@ @r%   r  c                  D   ^  \ rS rSrSSU 4S jjjrSS jrS	S jrSrU =r$ )
TFHubertLayerNormConvLayeri  c                  > [         TU ]  " S0 UD6  US:  a  UR                  U   OSU l        UR                  U   U l        [
        R                  R                  U R                  UR                  U   UR                  U   UR                  SS9U l        [
        R                  R                  SUR                  S9U l        [        UR                   5      U l        g )Nr   r   r  r  
layer_norm)r   rk   rf   )rg   rh   r  r  r   r   r   r!  r"  r#  r$  r  LayerNormalizationlayer_norm_epsr=  r   r%  r&  r'  s       r#   rh   #TFHubertLayerNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	  ,,99|U[UjUj9k+F,J,JKr%   c                l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   r  r=  r&  r+  s     r#   r   TFHubertLayerNormConvLayer.call  2    		-066r%   c                    U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       Ny= f! , (       d  f       g = fNTr  r=  
r   r0  r   r1  r  r   r   r  r=  r   r   s     r#   r    TFHubertLayerNormConvLayer.build      ::
4&2tyy~~.		tT-=-= >? /4t,8t334%%tT43D3D&EF 54 9 /. 54   )C.;)C?.
C<?
Dr&  r   r  r  r=  r   r3  r4  r7  r   r9  r   s   @r#   r;  r;    s     L L	G 	Gr%   r;  c                  D   ^  \ rS rSrSSU 4S jjjrSS jrS	S jrSrU =r$ )
TFHubertGroupNormConvLayeri  c                  > [         TU ]  " S0 UD6  US:  a  UR                  U   OSU l        UR                  U   U l        [
        R                  R                  U R                  UR                  U   UR                  U   UR                  SS9U l        [        UR                  5      U l        [        U R                  UR                   SS9U l        g )Nr   r   r  r  r=  )rj   rk   r   rf   )rg   rh   r  r  r   r   r   r!  r"  r#  r$  r  r   r%  r&  rc   r?  r=  r'  s       r#   rh   #TFHubertGroupNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	 ,F,J,JK+43D3DfNcNcjvwr%   c                l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   rB  r+  s     r#   r   TFHubertGroupNormConvLayer.call  rD  r%   c                    U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       Ny= f! , (       d  f       g = frF  rG  r   s     r#   r    TFHubertGroupNormConvLayer.build  rI  rJ  rK  r3  r4  r7  r   r9  r   s   @r#   rM  rM    s     x x	G 	Gr%   rM  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	TFHubertPositionalConvEmbeddingi  c                  > [         TU ]  " S0 UD6  [        UR                  UR                  UR
                  UR                  S-  SS9U l        [        UR                  5      U l        [        UR                  5      U l        Xl        g )Nr   r  )r   r   rj   r   r   rf   )rg   rh   r   hidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsr  TFHubertSamePadLayerr   r   r%  r&  r   ry   r   rz   r{   s      r#   rh   (TFHubertPositionalConvEmbedding.__init__   sv    "6",&&6677#;;q@
	 ,F,J,JK+F,J,JKr%   c                l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r  r   r&  r+  s     r#   r   $TFHubertPositionalConvEmbedding.call-  s2    		-0]36r%   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fr/  )	r   r0  r   r1  r  r   r   r   rW  r   s     r#   r   %TFHubertPositionalConvEmbedding.build3  sk    ::
4&2tyy~~.		tT[[-D-D EF /. 3..s   3B
B)r&  r   r   r  r   r   r   rz   r   r5  r6  r7  r   r9  r   s   @r#   rU  rU    s    G Gr%   rU  c                  .   ^  \ rS rSrU 4S jrS rSrU =r$ )rZ  i=  c                V   > [         TU ]  " S0 UD6  US-  S:X  a  SU l        g SU l        g )Nr   r   r   rf   )rg   rh   num_pad_remove)ry   rX  rz   r{   s      r#   rh   TFHubertSamePadLayer.__init__>  s.    "6"#:Q#>!#Car%   c                X    U R                   S:  a  US S 2S U R                   * 2S S 24   nU$ )Nr   rd  r+  s     r#   r   TFHubertSamePadLayer.callB  s6    ")!-C0C0C/C-CQ*FGMr%   rg  )r   r   r   r   rh   r   r   r   r   s   @r#   rZ  rZ  =  s    K r%   rZ  c                  <   ^  \ rS rSrSU 4S jjrS rSS jrSrU =r$ )TFHubertFeatureEncoderiH  c                  > [         TU ]  " S	0 UD6  UR                  S:X  aK  [        USSS 3S9/[	        UR
                  S-
  5       Vs/ s H  n[        XS-   SUS-    3S9PM     sn-   nOZUR                  S:X  a1  [	        UR
                  5       Vs/ s H  n[        XSU 3S9PM     nnO[        SUR                   S35      eX@l	        g s  snf s  snf )
Ngroupr   zconv_layers.)r(  r   r   layerz`config.feat_extract_norm` is z), but has to be one of ['group', 'layer']rf   )
rg   rh   feat_extract_normrM  r-   num_feat_extract_layersr  r;  r?   conv_layers)ry   r   rz   r   rp  r{   s        r#   rh   TFHubertFeatureEncoder.__init__I  s   "6"##w.5fqQ]^_]`Oabcv==ABgBA -V!eLYZ]^Y^X_J`aBg K %%0 v==>>A +6lSTRUDVW>  K
 01I1I0JJst  'g
s   
CCc                j    [         R                  " US5      nU R                   H  nU" U5      nM     U$ r   )r   r,   rp  )ry   input_valuesr,  
conv_layers       r#   r   TFHubertFeatureEncoder.call\  s2    |R8**J&}5M +r%   c                    U R                   (       a  g SU l         U R                   H=  n[        R                  " UR                  5         UR                  S 5        S S S 5        M?     g ! , (       d  f       MQ  = fr~   )r   rp  r   r1  r   r   )ry   r   rt  s      r#   r   TFHubertFeatureEncoder.buildb  sP    ::
**Jz/  & 0/ +//s   	A((
A7	)r   rp  ra  r   r9  r   s   @r#   rj  rj  H  s    '&' 'r%   rj  c                  (   ^  \ rS rSrU 4S jrSrU =r$ )TFHubertFeatureExtractorik  c                   > [         TU ]  " U40 UD6  [        R                  " SU R                  R
                   SU R                  R                  S   R
                   S3[        5        g )NzThe class `zD` has been depreciated and will be removed in Transformers v5. Use `r   z
` instead.)rg   rh   warningswarnr{   r   	__bases__FutureWarningr[  s      r#   rh   !TFHubertFeatureExtractor.__init__l  s`    *6*$..112 3NN,,Q/889E 		
r%   rf   )r   r   r   r   rh   r   r   r   s   @r#   ry  ry  k  s    
 
r%   ry  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFHubertFeatureProjectioniv  c                h  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  SS9U l        [        R                  R                  UR                  [        UR                  5      SSS9U l        [        R                  R                  UR                  S9U l        Xl        g )Nr=  rk   r   rH   
projectionunitskernel_initializerr   r   )raterf   )rg   rh   r   r   r>  r?  r=  DenserW  r
   initializer_ranger  Dropoutfeat_proj_dropoutdropoutr   r[  s      r#   rh   "TFHubertFeatureProjection.__init__w  s    "6",,99&BWBW^j9k,,,,$$.v/G/GH$	 - 
 ||++1I1I+Jr%   c                h    U R                  U5      nU R                  U5      nU R                  XS9nU$ Ntraining)r=  r  r  ry   r,  r  s      r#   r   TFHubertFeatureProjection.call  s4    66]Fr%   c                T   U R                   (       a  g SU l         [        U SS 5      bh  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  S   /5        S S S 5        [        U SS 5      bi  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  S   /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTr=  r'   r  )
r   r0  r   r1  r=  r   r   r   r  r  r   s     r#   r   TFHubertFeatureProjection.build  s    ::
4t,8t334%%tT4;;3G3G3K&LM 54t,8t334%%tT4;;3G3G3K&LM 54 9 54 54s   6D6D
D
D')r   r   r  r=  r  r   r   Fr,  r8  r  r   r5  r8  r   r9  r   s   @r#   r  r  v  s    	N 	Nr%   r  c                     ^  \ rS rSrSr   S         S	U 4S jjjrS
S jr     S             SS jjrSS jrSr	U =r
$ )TFHubertAttentioni  z6Multi-headed attention from "Attention Is All You Needc                V  > [         TU ]  " S
0 UD6  Xl        X l        [        R
                  R                  U5      U l        X-  U l        U R                  U-  U R                  :w  a  [        SU R                   SU S35      eU R                  S-  U l
        X@l        [        R
                  R                  XSS9U l        [        R
                  R                  XSS9U l        [        R
                  R                  XSS9U l        [        R
                  R                  XS	S9U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: r   g      k_proj)r   r   q_projv_projout_projrf   )rg   rh   	embed_dim	num_headsr   r   r  r  head_dimr?   scaling
is_decoderr  r  r  r  r  )ry   r  r  r  r  r  rz   r{   s          r#   rh   TFHubertAttention.__init__  s     	"6"""||++G4!.MMI%$..8MdnnM]$YKr3  }}d*$ll(((Qll(((Qll(((Q**9**Ur%   c           	         [         R                  " [         R                  " XX R                  U R                  45      S5      $ )Nr   r   r   r   )r   r.   r*   r  r  )ry   tensorseq_lenbszs       r#   _shapeTFHubertAttention._shape  s,    ||BJJvWnndmm/\]_kllr%   c           
     	   USLn[        U5      u  pn
U R                  U5      U R                  -  nU(       a  Ub  US   nUS   nGOU(       aE  U R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      nOUby  U R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      n[        R                  " US   U/SS9n[        R                  " US   U/SS9nODU R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      nU R                  (       a  X4nXR                  -  SU R                  4n[        R                  " U R                  XU5      U5      n[        R                  " X5      n[        R                  " X5      n[        U5      S   n[        R                  " XSS9n[        R                  R                  [        U5      XR                  -  X/S	XR                  -  X4 S
[        U5       3S9  Ub  [        R                  R                  [        U5      USX/SUSX4 S
[        U5       3S9  [        R                  " UUR                   S9n[        R                  " UXR                  X45      U-   n[        R                  " UXR                  -  X45      n[#        USS9nUb  [        R                  R                  [        U5      U R                  /SU R                   S
[        U5       3S9  [        R                  " US5      [        R                  " UXR                  X45      -  n[        R                  " UXR                  -  X45      nU R%                  UUS9n[        R                  " UU5      n[        R                  R                  [        U5      XR                  -  XR                  /SXR                  XR                  4 S
[        U5       3S9  [        R&                  " [        R                  " UXR                  XR                  45      S5      n[        R                  " UXU
45      nU R)                  U5      n[        R                  " UXR                  X45      nUUU4$ )z#Input shape: Batch x Time x ChannelNr   r   r'   r   r(   T)transpose_bz$Attention weights should be of size z	, but is r:   z!Attention mask should be of size r=   z/Head mask for a single layer should be of size )r   r'   r   r   r  z `attn_output` should be of size r  )r   r  r  r  r  r  r   r/   r  r  r  r*   matmulr@   assert_equalrB   r>   r   r  r.   r  )ry   r,  key_value_statespast_key_valueattention_masklayer_head_maskr  is_cross_attentionr  r]   r  query_states
key_statesvalue_states
proj_shaper^   attn_weights
attn_probsattn_outputs                      r#   r   TFHubertAttention.call  s\    .T9",]";i {{=1DLL@."<'*J)!,LT[[1A%BBLJ;;t{{3C'Db#NL'T[[%?SIJ;;t{{='A2sKLN1$5z#BKJ99nQ&7%FQOL T[[%?SIJ;;t{{='A2sKL?? )7NNN*B>
zz$++lS"I:VZZ
7
zz,;Z(+yytL
!!|$>>!746nn8Lg7_6` a|,-/	 	" 	
 %LL%%>*a*7a8R7S T">235	 &   WW^<;M;MNN::lS..'4[\_mmL::lS>>5I74\]L%l<&LL%%?+ Et~~EW X"?346	 &  ::o}E

sNNGEI L ::lS>>5I74\]L\\,\B
ii
L9
!!{#>>!7MM:2CR_R_3`2a b{+,.	 	" 	
 llJJ{S..'==$QRT`
 jjsY.GHmmK0"$**\CQX;b"cL.88r%   c                
   U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       GNL= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr  r  r  r  )r   r0  r   r1  r  r   r   r  r  r  r  r   s     r#   r   TFHubertAttention.build/  s[   ::
44(4t{{//0!!4t~~">? 144(4t{{//0!!4t~~">? 144(4t{{//0!!4t~~">? 14T*6t}}112##T4$@A 32 7 10 10 10 32s0   )G ;)G$)G#)G4 
G
G #
G14
H)r   r  r  r  r  r  r  r  r  r  r  )g        FT)
r  r   r  r   r  r   r  r   r  r   )r  r8  r  r   r  r   )NNNNF)r,  r8  r  tf.Tensor | Noner  ztuple[tuple[tf.Tensor]] | Noner  r  r  r  r  bool | Noner5  z"tuple[tf.Tensor, tf.Tensor | None]r   )r   r   r   r   r   rh   r  r   r   r   r   r   s   @r#   r  r    s    @  VV V 	V
 V V V8m .29=+/,0 %t9 t9 +t9 7	t9
 )t9 *t9 t9 
,t9lB Br%   r  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFHubertFeedForwardiB  c                &  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  5      U l        [        R                  R                  UR                  [        UR                  5      SSS9U l        [        UR                  5      U l        [        R                  R                  UR                  [        UR                  5      SSS9U l        [        R                  R	                  UR"                  5      U l        Xl        g )NrH   intermediate_denser  output_denserf   )rg   rh   r   r   r  activation_dropoutintermediate_dropoutr  intermediate_sizer
   r  r  r   
hidden_actintermediate_act_fnrW  r  hidden_dropoutoutput_dropoutr   r[  s      r#   rh   TFHubertFeedForward.__init__C  s    "6"$)LL$8$89R9R$S!"',,"4"4**.v/G/GH$%	 #5 #
 $5V5F5F#G !LL..$$.v/G/GH$	 / 
 $ll2263H3HIr%   c                    U R                  U5      nU R                  U5      nU R                  XS9nU R                  U5      nU R	                  XS9nU$ r  )r  r  r  r  r  r  s      r#   r   TFHubertFeedForward.callY  s^    //>00?11-1S))-8++M+Mr%   c                H   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTr  r  )r   r0  r   r1  r  r   r   r   rW  r  r  r   s     r#   r   TFHubertFeedForward.buildb  s    ::
4-t4@t66;;<''--tT4;;;R;R.ST =4.:t00556!!''tT[[5R5R(ST 76 ; =< 76s   3D3D
D
D!)r   r   r  r  r  r  r  r  r  r  r   r9  r   s   @r#   r  r  B  s    ,	U 	Ur%   r  c                  \   ^  \ rS rSrSU 4S jjr   S         SS jjrS	S jrSrU =r$ )
TFHubertEncoderLayerio  c                  > [         TU ]  " S	0 UD6  [        UR                  UR                  UR
                  SSS9U l        [        R                  R                  UR                  5      U l        [        R                  R                  UR                  SS9U l        [        USS9U l        [        R                  R                  UR                  SS9U l        Xl        g 
NF	attention)r  r  r  r  r   r=  r  feed_forwardr   final_layer_normrf   rg   rh   r  rW  num_attention_headsattention_dropoutr  r   r   r  r  r  r>  r?  r=  r  r  r  r   r[  s      r#   rh   TFHubertEncoderLayer.__init__p      "6"*((00,,
 ||++F,A,AB,,99&BWBW^j9k/^L % ? ?H]H]dv ? wr%   c                    UnU R                  XUS9u  pnU R                  XS9nXQ-   nU R                  U5      nXR                  U5      -   nU R	                  U5      nU4nU(       a  X4-  nU$ N)r  r  r  )r  r  r=  r  r  	ry   r,  r  output_attentionsr  attn_residualr  r!   r   s	            r#   r   TFHubertEncoderLayer.call  s     &)-8 *8 *
&Q ]F%56%(9(9-(HH--m< "&Gr%   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       GNS= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = fNTr  r=  r  r  r   r0  r   r1  r  r   r   r=  r   rW  r  r  r   s     r#   r   TFHubertEncoderLayer.build  Z   ::
4d+7t~~223$$T* 44t,8t334%%tT4;;3J3J&KL 54.:t00556!!''- 74+T2>t4499:%%++T49P9P,QR ;: ? 43 54 76 ;:0   F:.3G!G=3G.:
G	
G
G+.
G<r  r   r   r  r  r  r=  r  NFF
r,  r8  r  r  r  r  r  r   r5  ztuple[tf.Tensor]r   r9  r   s   @r#   r  r  o  sT    $ ,0).  ) '	
  
2S Sr%   r  c                  \   ^  \ rS rSrSU 4S jjr   S         SS jjrS	S jrSrU =r$ )
#TFHubertEncoderLayerStableLayerNormi  c                  > [         TU ]  " S	0 UD6  [        UR                  UR                  UR
                  SSS9U l        [        R                  R                  UR                  5      U l        [        R                  R                  UR                  SS9U l        [        USS9U l        [        R                  R                  UR                  SS9U l        Xl        g r  r  r[  s      r#   rh   ,TFHubertEncoderLayerStableLayerNorm.__init__  r  r%   c                    UnU R                  U5      nU R                  XUS9u  pnU R                  XS9nXQ-   nXR                  U R	                  U5      5      -   nU4nU(       a  X4-  nU$ r  )r=  r  r  r  r  r  s	            r#   r   (TFHubertEncoderLayerStableLayerNorm.call  s     &6)-8 *8 *
&Q ]F%5%(9(9$:O:OP]:^(__ "&Gr%   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       GNS= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = fr  r  r   s     r#   r   )TFHubertEncoderLayerStableLayerNorm.build  r  r  r  r  r  r  r   r9  r   s   @r#   r  r    sT    $ ,0).  ) '	
  
.S Sr%   r  c                  h   ^  \ rS rSrSU 4S jjr     S             SS jjrS	S jrSrU =r$ )
TFHubertEncoderi  c                |  > [         TU ]  " S0 UD6  Xl        [        USS9U l        [
        R                  R                  UR                  SS9U l	        [
        R                  R                  UR                  5      U l        [        UR                  5       Vs/ s H  n[        USU 3S9PM     snU l        g s  snf Npos_conv_embedr  r=  r  zlayers.rf   )rg   rh   r   rU  r  r   r   r>  r?  r=  r  r  r  r-   num_hidden_layersr  rm  ry   r   rz   r   r{   s       r#   rh   TFHubertEncoder.__init__  s    "6"=fK[\,,99&BWBW^j9k||++F,A,ABPUV\VnVnPopPo1*6'!FPop
p   B9c                h   U(       a  SOS nU(       a  SOS nUb&  U[         R                  " US5      -  n[        U5      nOS nU R                  U5      n	X-   nU R	                  U5      nU R                  XS9n[        U R                  5       Hr  u  pU(       a  Xq4-   n[        R                  R                  SS5      nU(       a  XR                  R                  :  a  MS  U" UUUUS9nUS   nU(       d  Mj  XS   4-   nMt     U(       a  Xq4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )	Nrf   r'   r  r   r   r,  r  r  r  c              3  .   #    U  H  oc  M  Uv   M     g 7fr   rf   .0vs     r#   	<genexpr>'TFHubertEncoder.call.<locals>.<genexpr>       m$[q$[   	last_hidden_stater,  
attentions)r   r,   ra   r  r=  r  	enumeraterm  npr   r   r   	layerdroptupler   ry   r,  r  r  output_hidden_statesreturn_dictr  all_hidden_statesall_self_attentionsposition_embeddingsr   layer_moduledropout_probabilitylayer_outputss                 r#   r   TFHubertEncoder.call  sF    #7BD$5b4%)BNN>2,NNM).9N!N"11-@%;6]F(4OA#$58H$H! #%))"3"3Aq"90;;3H3HH(+-"3!	M *!,M  &91=M<O&O#%  5*   14D Dm]GZ$[mmm ++*
 	
r%   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       N= f! , (       d  f       N|= f! , (       d  f       Mt  = fNTr  r=  rm  r   r0  r   r1  r  r   r   r=  r   rW  rm  ry   r   rm  s      r#   r   TFHubertEncoder.build%     ::
4)40<t22778##))$/ 94t,8t334%%tT4;;3J3J&KL 54$'3]]5::.KK% /. $ 4 98 54 /.$   E.3E&E(
E
E%(
E7	r   r   r  rm  r=  r  r  NFFTFr,  r8  r  r  r  r  r  r  r  r  r  r  r5  $TFBaseModelOutput | tuple[tf.Tensor]r   r9  r   s   @r#   r  r    sn    q ,0).,1#' %5
 5
 )5
 '	5

 *5
 !5
 5
 
.5
n& &r%   r  c                  h   ^  \ rS rSrSU 4S jjr     S             SS jjrS	S jrSrU =r$ )
TFHubertEncoderStableLayerNormi6  c                |  > [         TU ]  " S0 UD6  Xl        [        USS9U l        [
        R                  R                  UR                  SS9U l	        [
        R                  R                  UR                  5      U l        [        UR                  5       Vs/ s H  n[        USU 3S9PM     snU l        g s  snf r  )rg   rh   r   rU  r  r   r   r>  r?  r=  r  r  r  r-   r  r  rm  r  s       r#   rh   'TFHubertEncoderStableLayerNorm.__init__7  s    "6"=fK[\,,99&BWBW^j9k||++F,A,ABUZ[a[s[sUt
UtPQ/wqc]KUt

 
r   c                h   U(       a  SOS nU(       a  SOS nUb&  U[         R                  " US5      -  n[        U5      nOS nU R                  U5      n	X-   nU R	                  XS9n[        U R                  5       Hr  u  pU(       a  Xq4-   n[        R                  R                  SS5      nU(       a  XR                  R                  :  a  MS  U" UUUUS9nUS   nU(       d  Mj  XS   4-   nMt     U R                  U5      nU(       a  Xq4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )	Nrf   r'   r  r   r   r  c              3  .   #    U  H  oc  M  Uv   M     g 7fr   rf   r  s     r#   r  6TFHubertEncoderStableLayerNorm.call.<locals>.<genexpr>q  r	  r
  r  )r   r,   ra   r  r  r  rm  r  r   r   r   r  r=  r  r   r  s                 r#   r   #TFHubertEncoderStableLayerNorm.callA  sF    #7BD$5b4%)BNN>2,NNM).9N!N"11-@%;]F(4OA#$58H$H! #%))"3"3Aq"90;;3H3HH(+-"3!	M *!,M  &91=M<O&O#%  5( 6 14D Dm]GZ$[mmm ++*
 	
r%   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       N= f! , (       d  f       N|= f! , (       d  f       Mt  = fr  r  r  s      r#   r   $TFHubertEncoderStableLayerNorm.buildx  r!  r"  r#  r  r$  r%  r   r9  r   s   @r#   r(  r(  6  sm    
 ,0).,1#' %5
 5
 )5
 '	5

 *5
 !5
 5
 
.5
n& &r%   r(  c                     ^  \ rS rSr\rSU 4S jjrS	S jrS
S jrS	SS jjr	\
         S                     SS jj5       rSrU =r$ )TFHubertMainLayeri  c                   > [         TU ]  " S0 UD6  Xl        [        USS9U l        [        USS9U l        UR                  (       a  [        USS9U l	        g [        USS9U l	        g )Nfeature_extractorr  feature_projectionencoderrf   )rg   rh   r   rj  r4  r  r5  do_stable_layer_normr(  r6  r  r[  s      r#   rh   TFHubertMainLayer.__init__  s]    "6"!7EX!Y";FI]"^&&9&yQDL*6	BDLr%   c                   U R                  U R                  R                  4SSSS9U l        U R                  (       a  g SU l        [        U SS 5      bN  [        R                  " U R                  R                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)Nr   Tmasked_spec_embed)rM   r   r  r   r4  r5  r6  )r   r   rW  r:  r   r0  r   r1  r4  r   r   r5  r6  r   s     r#   r   TFHubertMainLayer.build  s#   !%;;**,)tZm "1 "
 ::
4,d3?t55::;&&,,T2 <4-t4@t66;;<''--d3 =4D)5t||001""4( 21 6 <; =< 21s$   >EE-6E>
E*-
E;>
Fc                    S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )z8
Computes the output length of the convolutional layers
c                    X-
  U-  S-   $ r   rf   )input_lengthr   strides      r#   _conv_out_lengthLTFHubertMainLayer._get_feat_extract_output_lengths.<locals>._conv_out_length  s     !.69A==r%   )zipr   r"  r#  )ry   input_lengthsr@  r   r?  s        r#    _get_feat_extract_output_lengths2TFHubertMainLayer._get_feat_extract_output_lengths  sF    
	>
 $'t{{'>'>@W@W#XK,]PM $Y r%   c                   [        U5      u  p4n[        U R                  SS5      (       d  U$ Ub  [        R                  " [        R
                  " USS2SS2[        R                  4   [        R                  5      U R                  [        R                  [        R                  SS24   U5      nOU R                  R                  S:  a  [        X44U R                  R                  U R                  R                  SS9n[        R                  " [        R
                  " USS2SS2[        R                  4   [        R                  5      U R                  [        R                  [        R                  SS24   U5      nU R                  R                  S:  ad  [        X54U R                  R                  U R                  R                  S9n[        R                  " USS2[        R                  SS24   US5      nU$ )z
Masks extracted features along time axis and/or along feature axis according to
[SpecAugment](https://huggingface.co/papers/1904.08779).
apply_spec_augmentTNr   r   )rN   rO   rP   )rN   rO   )r   r0  r   r   whererB   rK   r   r:  mask_time_probrX   mask_time_lengthmask_feature_probmask_feature_length)ry   r,  mask_time_indicesrQ   rR   rW  mask_feature_indicess          r#   _mask_hidden_states%TFHubertMainLayer._mask_hidden_states  s   
 4>m3L0
[ t{{$8$??  (HH)!Q

*:;RWWE&&rzz2::q'@AM [[''!+ 5-++44 KK88	! HH)!Q

*:;RWWE&&rzz2::q'@AM ;;((1,#8)++77 KK;;$ 
 HH%9!RZZ:J%K]\]^Mr%   c           	        U R                  [        R                  " U[        R                  5      U
S9nUbR  U R	                  [        R
                  " US5      5      n[        R                  " U[        U5      S   UR                  S9nU R                  XS9nUR                  S5      nU
(       a  U R                  XS9nU R                  UUUUU	U
S9nUS   nU	(       d	  U4USS  -   $ [        UUR                  UR                  S	9$ )
Nr  r'   r   )maxlenr>   rM  )rM  )r  r  r  r  r  r   r  )r4  r   rB   rC   rD  r  sequence_maskr   r>   r5  ro   rO  r6  r   r,  r  )ry   rs  r  token_type_idsposition_ids	head_maskinputs_embedsr  r  r  r  rz   r,  output_lengthsrM  encoder_outputss                   r#   r   TFHubertMainLayer.call  s    ..rww|RZZ/P[c.d%!BB2==Q_acCdeN--z-'@'C=K^K^N ///Q"JJ':; 44]4hM,,)/!5# ' 
 (*!#oab&999 +)77&11
 	
r%   )r   r   r6  r4  r5  r:  r  r   )rC  r8  )r,  r8  rM  r  	NNNNNNNNF)rs  r8  r  r  rT  r  rU  r  rV  r  rW  r  r  r  r  r  r  r  r  r   rz   r   )r   r   r   r   r   config_classrh   r   rD  rO  r   r   r   r   r   s   @r#   r2  r2    s    L	C)$*X  ,0+/)-&**..215#'/
/
 )/
 )	/

 '/
 $/
 (/
 ,/
 //
 !/
 /
 /
 /
r%   r2  c                  H   ^  \ rS rSrSr\rSrSr\	S 5       r
U 4S jrSrU =r$ )TFHubertPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
hubertrs  c                    [         R                  " S[         R                  SS9[         R                  " S[         R                  SS9[         R                  " S[         R                  SS9S.$ )N)Ni>  rs  r  )NNr  rT  )rs  r  rT  )r   
TensorSpecrC   rE   r   s    r#   input_signature'TFHubertPreTrainedModel.input_signature!  sL     MM-.Y mmL"((IYZ mmL"((IYZ
 	
r%   c                   > [         TU ]  " U/UQ70 UD6  [        R                  SU R                  R
                   S35        g )N
z has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tune this model, you need a GPU or a TPU)rg   rh   loggerwarningr{   r   ry   r   r   rz   r{   s       r#   rh    TFHubertPreTrainedModel.__init__)  sD    3&3F3(() *E E	
r%   rf   )r   r   r   r   r   r   r\  base_model_prefixmain_input_namepropertyrb  rh   r   r   r   s   @r#   r^  r^    s6    
  L $O
 

 
r%   r^  a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_values` only and nothing else: `model(input_values)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_values, attention_mask])` or `model([input_values, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_values": input_values, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Args:
        config ([`HubertConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a	  
    Args:
        input_values (`np.ndarray`, `tf.Tensor`, `list[tf.Tensor]` `dict[str, tf.Tensor]` or `dict[str, np.ndarray]` and each example must have the shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`np.ndarray` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`np.ndarray` or `tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_values` you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `input_values` indices into associated vectors
            than the model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False``):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zbThe bare TFHubert Model transformer outputting raw hidden-states without any specific head on top.c                     ^  \ rS rSrSU 4S jjr\" \5      \" \\	S9\
         S                     S	S jj5       5       5       rS
S jrSrU =r$ )TFHubertModeli  c                X   > [         TU ]  " U/UQ70 UD6  Xl        [        USS9U l        g )Nr_  r  )rg   rh   r   r2  r_  rh  s       r#   rh   TFHubertModel.__init__  s-    3&3F3'X>r%   output_typer\  c                    U(       a  UOU R                   R                  nU(       a  UOU R                   R                  nU	(       a  U	OU R                   R                  n	U R	                  UUUUUUUUU	U
S9
nU$ )a  

Returns:

Example:

```python
>>> from transformers import AutoProcessor, TFHubertModel
>>> from datasets import load_dataset

>>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
>>> model = TFHubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


>>> def map_to_array(example):
...     example["speech"] = example["audio"]["array"]
...     return example


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
>>> hidden_states = model(input_values).last_hidden_state
```
rs  r  rT  rU  rV  rW  r  r  r  r  )r   r  r  r  r_  )ry   rs  r  rT  rU  rV  rW  r  r  r  r  r   s               r#   r   TFHubertModel.call  s~    T 8L3QUQ\Q\QqQq1B-HeHe%0kdkk6M6M++%))%'/!5#  
 r%   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTr_  )r   r0  r   r1  r_  r   r   r   s     r#   r   TFHubertModel.build  s^    ::
44(4t{{//0!!$' 10 500s   A88
B)r   r   r_  r  r[  )rs  r8  r  r  rT  r  rU  r  rV  r  rW  r  r  r  r  r  r  r  r  r   r5  r&  r   )r   r   r   r   rh   r   HUBERT_INPUTS_DOCSTRINGr   r   _CONFIG_FOR_DOCr   r   r   r   r   r   s   @r#   rn  rn    s    
?
 ++BC+<?[ ,0+/)-&**.)-,0#'88 )8 )	8
 '8 $8 (8 '8 *8 !8 8 
.8  \ D8t( (r%   rn  zfTFHubert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).c                     ^  \ rS rSrS	U 4S jjrS rS r\" \5      \	" \
\S9\          S
                       SS jj5       5       5       rSS jrSrU =r$ )TFHubertForCTCi  c                  > [         TU ]  " U/UQ70 UD6  [        USS9U l        [        R
                  R                  UR                  5      U l        [        R
                  R                  UR                  SS9U l        [        US5      (       a#  UR                  (       a  UR                  U l        g UR                  U l        g )Nr_  r  lm_headadd_adapter)rg   rh   r2  r_  r   r   r  final_dropoutr  r  
vocab_sizer}  hasattrr~  output_hidden_sizerW  rh  s       r#   rh   TFHubertForCTC.__init__  s    3&3F3'X>||++F,@,@A||))&*;*;))L)0)G)GFL^L^F%% 	djdvdv 	r%   c                Z    [         R                  " S[        5        U R                  5         g)z
Calling this function will disable the gradient computation for the feature encoder so that its parameters will
not be updated during training.
zThe method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. Please use the equivalent `freeze_feature_encoder` method instead.N)r{  r|  r~  freeze_feature_encoderr   s    r#   freeze_feature_extractor'TFHubertForCTC.freeze_feature_extractor  s'    
 	Q	

 	##%r%   c                :    SU R                   R                  l        g)z
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
FN)r_  r4  r  r   s    r#   r  %TFHubertForCTC.freeze_feature_encoder  s    
 38%%/r%   rq  c                T   UbP  [         R                  " U5      U R                  R                  :  a"  [	        SU R                  R                   35      eU R                  UUUUUUUU	U
US9
nUS   nU R                  XS9nU R                  U5      nUGbX  Ub  UO"[         R                  " U[         R                  S9nU R
                  R                  [         R                  " USS95      n[         R                  " US:  [         R                  5      n[         R                  " USS9n[         R                  R                  UUUUU R                  R                   S	S
9nU R                  R"                  S:X  a-  [         R                  " U5      n[         R$                  " US5      nU R                  R"                  S:X  a-  [         R&                  " U5      n[         R$                  " US5      nOSnU
(       d  U4USS -   nUb  U4U-   $ U$ [)        UUUR*                  UR,                  S9$ )a  
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_values` docstring) Tokens with indices set to `-100` are ignored (masked),
    the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

Returns:

Example:

```python
>>> import tensorflow as tf
>>> from transformers import AutoProcessor, TFHubertForCTC
>>> from datasets import load_dataset

>>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
>>> model = TFHubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft")


>>> def map_to_array(example):
...     example["speech"] = example["audio"]["array"]
...     return example


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
>>> logits = model(input_values).logits
>>> predicted_ids = tf.argmax(logits, axis=-1)

>>> transcription = processor.decode(predicted_ids[0])

>>> # compute loss
>>> target_transcription = "A MAN SAID TO THE UNIVERSE SIR I EXIST"

>>> # Pass the transcription as text to encode labels
>>> labels = processor(text=transcription, return_tensors="tf").input_values

>>> loss = model(input_values, labels=labels).loss
```Nz$Label values must be <= vocab_size: rt  r   r  r=   r'   r(   F)logitslabelslogit_lengthlabel_lengthblank_indexlogits_time_majorsumr<   r   r   )lossr  r,  r  )r   
reduce_maxr   r  r?   r_  r  r}  rL   rC   rD  r  rB   rE   r   ctc_losspad_token_idctc_loss_reductionr*   reduce_meanr   r,  r  )ry   rs  r  rT  rU  rV  rW  r  r  r  r  r  r   r,  r  rC  labels_masktarget_lengthsr  r  s                       r#   r   TFHubertForCTC.call  s   t "--"74;;;Q;Q"QCDKKDZDZC[\]]++%))%'/!5#  
  
]Fm,"0"<",,|cecmcmBn  !KKHHWelnIopM ''&A+rxx8K]];R@N55>>*+ KK44"' " D {{--6}}T*zz$-{{--7~~d+zz$-DY,F)-)9TGf$EvE!//))	
 	
r%   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       Ny= f! , (       d  f       g = f)NTr_  r}  )	r   r0  r   r1  r_  r   r   r}  r  r   s     r#   r   TFHubertForCTC.build{  s    ::
44(4t{{//0!!$' 14D)5t||001""D$0G0G#HI 21 6 10 21s   C!.)C2!
C/2
D )r   r  r_  r}  r  r  )
NNNNNNNNNF)rs  r8  r  r  rT  r  rU  r  rV  r  rW  r  r  r  r  r  r  r  r  r  r  r  r5  z#TFCausalLMOutput | tuple[tf.Tensor]r   )r   r   r   r   rh   r  r  r   rx  r   r   ry  r   r   r   r   r   r   s   @r#   r{  r{    s    


&8 ++BC+;/Z ,0+/)-&**.)-#',0#' %q
q
 )q
 )	q

 'q
 $q
 (q
 'q
 !q
 *q
 !q
 q
 
-q
  [ Dq
f	J 	Jr%   r{  )r{  rn  r^  r3  )
rM   ztuple[int, int]rN   r   rO   r   rP   r   r5  r8  r   )r\   r8  r]   z
int | None)Br   
__future__r   r{  typingr   numpyr  
tensorflowr   activations_tfr   modeling_tf_outputsr   r   modeling_tf_utilsr	   r
   r   r   r   tf_utilsr   r   utilsr   r   r   r   configuration_hubertr   
get_loggerr   rf  ry  r[   r$   r7   rX   ra   r   Layerrc   r!  r   r  r;  rM  rU  rZ  rj  ry  r  r  r  r  r  r  r(  r2  r^  HUBERT_START_DOCSTRINGrx  rn  r{  __all__rf   r%   r#   <module>r     s    "     / F  3  / 
		H	%  O( 	GGG G 	G
 GV
6U** Ur5u||22 5r@5<<#5#5 @<G!3!3 GFG!3!3 GFGell&8&8 G<5<<--  'U\\//  'F
5 
N 2 2 NBgB** gBV)U%,,,, )UZ8S5<<-- 8Sx6S%,,*<*< 6StM&ell(( M&bO&U\\%7%7 O&d K
** K
 K
\
/ 
4( T5 p hI(+ I(	I(X p]J, ]J	]J@ Ir%   