
    cCi2                      S r SSKJr  SSKrSSKJr  SSKJr  SSKr	SSK
rSSKJr  SSKJrJrJr  SS	KJrJrJrJrJr  SS
KJrJr  SSKJrJrJrJrJ r   SSK!J"r"  \RF                  " \$5      r%Sr&Sr'Sr(Sr)\ " S S\5      5       r*S r+S r, SG         SHS jjr-SISJS jjr. " S S\R^                  R`                  5      r1 " S S\R^                  Rd                  5      r3 " S S\R^                  R`                  5      r4 " S S\R^                  R`                  5      r5 " S  S!\R^                  R`                  5      r6 " S" S#\R^                  R`                  5      r7 " S$ S%\R^                  R`                  5      r8 " S& S'\R^                  R`                  5      r9 " S( S)\95      r: " S* S+\R^                  R`                  5      r; " S, S-\R^                  R`                  5      r< " S. S/\R^                  R`                  5      r= " S0 S1\R^                  R`                  5      r> " S2 S3\R^                  R`                  5      r? " S4 S5\R^                  R`                  5      r@ " S6 S7\R^                  R`                  5      rA\ " S8 S9\R^                  R`                  5      5       rB " S: S;\5      rCS<rDS=rE\" S>\D5       " S? S@\C5      5       rF\" SA\D5       " SB SC\C5      5       rG " SD SE\C5      rH/ SFQrIg)KzTensorFlow Wav2Vec2 model.    )annotationsN)	dataclass)Any   )get_tf_activation)TFBaseModelOutputTFCausalLMOutputTFSequenceClassifierOutput)TFPreTrainedModelget_initializerkeraskeras_serializableunpack_inputs)
shape_liststable_softmax)ModelOutputadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )Wav2Vec2Config   zfacebook/wav2vec2-base-960hr   g    חc                  R    \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   S
r
g)TFWav2Vec2BaseModelOutput:   a  
Output type of [`TFWav2Vec2BaseModelOutput`], with potential hidden states and attentions.

Args:
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    extract_features (`tf.Tensor` of shape `(batch_size, sequence_length, conv_dim[-1])`):
        Sequence of extracted feature vectors of the last convolutional layer of the model.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
        `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Ntf.Tensor | Nonelast_hidden_stateextract_featuresztuple[tf.Tensor] | Nonehidden_states
attentions )__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r    r!   __static_attributes__r"       k/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/wav2vec2/modeling_tf_wav2vec2.pyr   r   :   s7    * +/'.)-&--1M*1*.J'.r*   r   c                    [         R                  R                  [         R                  R	                  [        U 5      SS5      5      * n[         R                  R                  X-   U5      u  p4U$ )z
Categorical sampling without replacement is currently not implemented. The gumbel-max trick will do for now - see
https://github.com/tensorflow/tensorflow/issues/9260 for more info
r   r   )tfmathlograndomuniformr   nntop_k)distributionnum_samplesz_indicess        r+   _sample_without_replacementr9   W   sO    
 
RYY&&z,'?AF	GGA\-{;JANr*   c           
        [        U5      n[        R                  " [        R                  " [        R                  " [        R
                  " US   5      SS9U5      SS/5      n[        R                  " [        R                  " U[        R                  " USS/5      /S5      5      n[        R                  " U[        R                  " U S/5      U5      $ )zL
Scatter function as in PyTorch with indices in format (batch_dim, indices)
r   axisr   )	r   r-   reshapebroadcast_toexpand_dimsrange	transposeconcat
scatter_nd)valuesbatch_indicesoutput_shapeindices_shapebroad_casted_batch_dimspair_indicess         r+    _scatter_values_on_batch_indicesrK   a   s     }-M jj
rxxa0@'AK][^_ac]d <<		+BBJJ}_`bd^eDf*gij klL==rzz&2$'?NNr*   c           	     r   U u  pEUS:  a  [        S5      e[        R                  R                  UUSU SU S3S9  U[        R                  " U[        R
                  5      -  U-  [        R                  R                  S5      -   n[        R                  " Xc5      n[        R                  " U[        R                  5      n[        R                  R                  XR-  U5      n[        R                  " U5      n[        R                  " XE4[        R                  S9n[        R                  " XEUS-
  -
  45      n[        X5      n	[        R                   " U	S	5      n	[        R"                  " U	SSU45      n	[        R$                  " XXb-  45      n	[        R&                  " U5      [        R(                  [        R(                  S
S
24   n
[        R"                  " XUS45      n
[        R$                  " XXb-  45      n
X-   n	[+        [        R,                  " U	5      U	[        R.                  " U5      5      nU$ )ab  
Computes random mask spans for a given shape

Args:
    shape: the shape for which to compute masks.
        should be of size 2 where first element is batch size and 2nd is timesteps
    attention_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
    mask_prob:
        probability for each token to be chosen as start of the span to be masked. this will be multiplied by
        number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
        however due to overlaps, the actual number will be smaller (unless no_overlap is True)
    mask_length: size of the mask
    min_masks: minimum number of masked spans

Adapted from [fairseq's
data_utils.py](https://github.com/pytorch/fairseq/blob/e0788f7007a8473a76db573985031f3c94201e79/fairseq/data/data_utils.py#L376).
r   z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: `messager   dtyper;   N)
ValueErrorr-   	debuggingassert_lesscastfloat32r0   r1   maximumint32r.   minimumsqueezezerosonesr9   r@   tiler>   rA   newaxisrK   	ones_likeshape)ra   	mask_probmask_length	min_masks
batch_sizesequence_lengthnum_masked_spansspec_aug_maskuniform_distspec_aug_mask_idxsoffsetss              r+   _compute_mask_indicesrl   p   s   . #(JQABBLL]^i]j k##2"316	   !277?BJJ#GG+UXZXaXaXiXijnXoozz"2>ww/: ww'EGWXzz"23 HHj:"((KM 77J;?(KLML 5\T (:B?!3aK5HI$6EUEc8dehh{#BJJ

A$=>Gggg,<a@AGjj/?/M"NOG+5 5
'(*<bhh}>UM r*   c                    [        U 5      S   nUb  UOUn[        R                  " S5      n[        R                  " XR                  S9n [        R
                  " U SS2SSSS24   SSUS45      nX4-
  [        -  $ )zW
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
r   Ng      ?rQ   )r   r-   constantrV   rR   r^   LARGE_NEGATIVE)masktgt_lensrc_lenone_cstexpanded_masks        r+   _expand_maskru      st     q!G ,g'Gkk#G774}}-DGGDD$!12Q7A4FGM#~55r*   c                     ^  \ rS rSrSr           S                     SU 4S jjjrU 4S jrS rU 4S jrS r	S r
S	 rS
 rS rS rS rS rS rS rS rS rSrU =r$ )TFWav2Vec2GroupNorm   zh
From tensorflow-addons https://www.tensorflow.org/addons/api_docs/python/tfa/layers/GroupNormalization
c                @  > [         TU ]  " S0 UD6  SU l        Xl        X l        X0l        X@l        XPl        [        R                  R                  U5      U l        [        R                  R                  U5      U l        [        R                  R                  U5      U l        [        R                  R                  U	5      U l        [        R                   R                  U
5      U l        [        R                   R                  U5      U l        U R'                  5         g )NTr"   )super__init__supports_maskinggroupsr=   epsiloncenterscaler   initializersgetbeta_initializergamma_initializerregularizersbeta_regularizergamma_regularizerconstraintsbeta_constraintgamma_constraint_check_axis)selfr}   r=   r~   r   r   r   r   r   r   r   r   kwargs	__class__s                r+   r{   TFWav2Vec2GroupNorm.__init__   s     	"6" $	
 % 2 2 6 67G H!&!3!3!7!78I!J % 2 2 6 67G H!&!3!3!7!78I!J$0044_E % 1 1 5 56F Gr*   c                   > U R                  U5        U R                  U5        U R                  U5        U R                  U5        U R	                  U5        U R                  U5        SU l        [        TU ]!  U5        g )NT)	_check_if_input_shape_is_none'_set_number_of_groups_for_instance_norm_check_size_of_dimensions_create_input_spec_add_gamma_weight_add_beta_weightbuiltrz   buildr   input_shaper   s     r+   r   TFWav2Vec2GroupNorm.build   sj    **;744[A&&{3,{+k*
k"r*   c                8   [         R                  R                  U5      n[        R                  " U5      nU R                  XU5      u  pEU R                  XB5      nX R                     U R                  -  S:H  nU(       d  [        R                  " Xc5      nU$ UnU$ Nr   )
r   backend	int_shaper-   ra   _reshape_into_groups_apply_normalizationr=   r}   r>   )	r   inputsr   tensor_input_shapereshaped_inputsgroup_shapenormalized_inputsis_instance_normoutputss	            r+   callTFWav2Vec2GroupNorm.call   s    mm--f5XXf-'+'@'@Vh'i$ 55oS'		2dkkAaGjj!2GG  (Gr*   c                  > U R                   U R                  U R                  U R                  U R                  [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      [
        R                  R                  U R                  5      S.n[         TU ]E  5       n0 UEUE$ )N)r}   r=   r~   r   r   r   r   r   r   r   r   )r}   r=   r~   r   r   r   r   	serializer   r   r   r   r   r   r   r   rz   
get_config)r   configbase_configr   s      r+   r   TFWav2Vec2GroupNorm.get_config  s    kkII||kkZZ % 2 2 < <T=R=R S!&!3!3!=!=d>T>T!U % 2 2 < <T=R=R S!&!3!3!=!=d>T>T!U$00::4;O;OP % 1 1 ; ;D<Q<Q R
 g(*(+(((r*   c                    U$ Nr"   r   r   s     r+   compute_output_shape(TFWav2Vec2GroupNorm.compute_output_shape  s    r*   c                   [        [        U5      5       Vs/ s H  oCU   PM	     nnX R                     U R                  -  S:H  nU(       d|  X R                     U R                  -  XPR                  '   UR	                  U R                  U R                  5        [
        R                  " U5      n[
        R                  " X5      nXu4$ X4$ s  snf r   )rA   lenr=   r}   insertr-   stackr>   )r   r   r   r   ir   r   r   s           r+   r   (TFWav2Vec2GroupNorm._reshape_into_groups  s    6;C<L6MN6M!,6MN'		2dkkAaG%0%;t{{%JK		"tyy$++6((;/K jj=O"//&& Os   Cc           	     0   [         R                  R                  U5      n[        [	        S[        U5      5      5      nX R                     U R                  -  S:H  nU(       d"  U R                  S:X  a  SOU R                  S-
  nO!U R                  S:X  a  SOU R                  S-
  nUR                  U5        [        R                  R                  XSS9u  pxU R                  U5      u  p[        R                  R                  UUUU	U
U R                  S9nU$ )Nr   r;   T)keepdims)meanvariancer   offsetvariance_epsilon)r   r   r   listrA   r   r=   r}   popr-   r2   moments_get_reshaped_weightsbatch_normalizationr~   )r   r   r   r   group_reduction_axesr   r=   r   r   gammabetar   s               r+   r   (TFWav2Vec2GroupNorm._apply_normalization&  s    mm--o>#E!S-=$>?'		2dkkAaGb2dii!mDb2dii!mD  &W[\00=EE55!\\ 6 
 ! r*   c                    U R                  U5      nS nS nU R                  (       a!  [        R                  " U R                  U5      nU R
                  (       a!  [        R                  " U R                  U5      nX44$ r   )_create_broadcast_shaper   r-   r>   r   r   r   )r   r   broadcast_shaper   r   s        r+   r   )TFWav2Vec2GroupNorm._get_reshaped_weights=  sZ    66{C::JJtzz?;E;;::dii9D{r*   c                    XR                      nUc3  [        S[        U R                   5      -   S-   [        U5      -   S-   5      eg )NzAxis z\ of input tensor should have a defined dimension but the layer received an input with shape .)r=   rS   strr   r   dims      r+   r   1TFWav2Vec2GroupNorm._check_if_input_shape_is_noneH  sZ    ))$;dii.!pq k"# 	  r*   c                N    XR                      nU R                  S:X  a  X l        g g Nr;   )r=   r}   r   s      r+   r   ;TFWav2Vec2GroupNorm._set_number_of_groups_for_instance_normS  s$    ))$;;"K r*   c                .   XR                      nX R                  :  a3  [        S[        U R                  5      -   S-   [        U5      -   S-   5      eX R                  -  S:w  a3  [        S[        U R                  5      -   S-   [        U5      -   S-   5      eg )NzNumber of groups (z.) cannot be more than the number of channels ().r   z0) must be a multiple of the number of channels ()r=   r}   rS   r   r   s      r+   r   -TFWav2Vec2GroupNorm._check_size_of_dimensionsY  s    ))$$dkk"#BC c( 	  !$dkk"#DE c( 	  "r*   c                :    U R                   S:X  a  [        S5      eg )Nr   zdYou are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead)r=   rS   r   s    r+   r   TFWav2Vec2GroupNorm._check_axism  s"    99>v  r*   c                    XR                      n[        R                  R                  [	        U5      U R                   U0S9U l        g )N)ndimaxes)r=   r   layers	InputSpecr   
input_specr   s      r+   r   &TFWav2Vec2GroupNorm._create_input_specs  s8    ))$,,00c+6FdiiY\M]0^r*   c                    XR                      nU4nU R                  (       a7  U R                  USU R                  U R                  U R
                  S9U l        g S U l        g )Nr   ra   nameinitializerregularizer
constraint)r=   r   
add_weightr   r   r   r   r   r   r   ra   s       r+   r   %TFWav2Vec2GroupNorm._add_gamma_weightw  s[    ))$:: 22 2200 ) DJ DJr*   c                    XR                      nU4nU R                  (       a7  U R                  USU R                  U R                  U R
                  S9U l        g S U l        g )Nr   r   )r=   r   r   r   r   r   r   r   s       r+   r   $TFWav2Vec2GroupNorm._add_beta_weight  s[    ))$;; 11 11// ( DI DIr*   c                <   S/[        U5      -  nXR                     U R                  -  S:H  nU(       dO  XR                     U R                  -  X R                  '   UR                  U R                  U R                  5        U$ U R                  X R                  '   U$ r   )r   r=   r}   r   )r   r   r   r   s       r+   r   +TFWav2Vec2GroupNorm._create_broadcast_shape  s    #K 00'		2dkkAaG)4YY)?4;;)NOII&""499dkk:  *.OII&r*   )r=   r   r   r   r   r   r   r~   r   r   r   r   r}   r   r   r|   )    r;   gMbP?TTr\   r]   NNNN)r}   intr=   r   r~   floatr   boolr   r   r   keras.initializers.Initializerr   r   r   keras.regularizers.Regularizerr   r   r   keras.constraints.Constraintr   r   )r#   r$   r%   r&   r'   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r)   __classcell__r   s   @r+   rw   rw      s     ;B<B;?<@8<9=  	
   9 : 9 : 6 7 <	# )"
'!.		(_ r*   rw   c                  P   ^  \ rS rSrSrU 4S jrS rS rU 4S jrU 4S jr	Sr
U =r$ )	TFWav2Vec2WeightNormConv1Di  zeAdapted from https://www.tensorflow.org/probability/api_docs/python/tfp/layers/weight_norm/WeightNormc           
        > [         TU ]  " SUUUSSSS.UD6  X@l        SU l        [        R
                  " SS/5      U l        g )	NvalidT	he_normal)filterskernel_sizer}   paddinguse_biasbias_initializerr   r   r   r"   )rz   r{   explicit_paddingfilter_axisr-   rn   kernel_norm_axes)r   r  r  r}   r  r   r   s         r+   r{   #TFWav2Vec2WeightNormConv1D.__init__  sV     	
#(	
 	
 !1 "QF 3r*   c                $   [         R                  " [         R                  " [         R                  " U R                  5      U R
                  S95      nU R                  R                  USS2[         R                  [         R                  4   5        g)z"Set the norm of the weight vector.r<   N)	r-   sqrt
reduce_sumsquareweight_vr  weight_gassignr_   )r   kernel_norms     r+   
_init_norm%TFWav2Vec2WeightNormConv1D._init_norm  sT    ggbmmBIIdmm,D4K`K`ab[BJJ

)BCDr*   c                    [         R                  R                  U R                  U R                  S9[         R
                  " U R                  5      -  n[         R
                  " U5      U l        g)zGenerate normalized weights.r<   N)r-   r2   l2_normalizer  r  rB   r  kernel)r   r  s     r+   _normalize_kernel,TFWav2Vec2WeightNormConv1D._normalize_kernel  sM    ##DMM8M8M#NQSQ]Q]^b^k^kQllll6*r*   c                  > U R                   (       d  [        TU ]	  U5        [        R                  " [        R
                  " U R                  5      SSS9U l        U R                  U l        U R                  S[        U R                  R                  U R                     5      SS4SU R                  R                  SS9U l        U R                  5         U R                  SU R                  4S	SS
9U l        g g )Nr  T)r   	trainabler  r   r]   )r   ra   r   rR   r  biasr\   )r   ra   r   r  )r   rz   r   r-   VariablerB   r  r  r   r   ra   r  rR   r  r  r  r  r   s     r+   r    TFWav2Vec2WeightNormConv1D.build  s    zzGM+&++bll4;;&?j\`aDK KKDM OO4==..t/?/?@A1aH"mm)) , DM OOVDLL?X_kopDI r*   c                   > U R                  5         [        R                  " USU R                  U R                  4S45      n[        TU ]  U5      nU$ )N)r   r   )r  r-   padr  rz   r   )r   r   padded_inputsoutputr   s       r+   r   TFWav2Vec2WeightNormConv1D.call  sM     	 v1F1FH]H]0^`f'ghm,r*   )r  r  r  r  r  r  r  )r#   r$   r%   r&   r'   r{   r  r  r   r   r)   r   r   s   @r+   r   r     s&    o4E
+
q"	 	r*   r   c                  D   ^  \ rS rSrSSU 4S jjjrSS jrS	S jrSrU =r$ )
TFWav2Vec2NoLayerNormConvLayeri  c                b  > [         TU ]  " S0 UD6  US:  a  UR                  U   OSU l        UR                  U   U l        [
        R                  R                  U R                  UR                  U   UR                  U   UR                  SS9U l        [        UR                  5      U l        g )Nr   r   convr  r  stridesr  r   r"   )rz   r{   conv_dimin_conv_dimout_conv_dimr   r   Conv1Dconv_kernelconv_stride	conv_biasr&  r   feat_extract_activation
activationr   r   layer_idr   r   s       r+   r{   'TFWav2Vec2NoLayerNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	 ,F,J,JKr*   c                J    U R                  U5      nU R                  U5      nU$ r   )r&  r1  r   r    s     r+   r   #TFWav2Vec2NoLayerNormConvLayer.call  s$    		-06r*   c                ,   U R                   (       a  g SU l         [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       g = fNTr&  )r   getattrr-   
name_scoper&  r   r   r*  r   s     r+   r   $TFWav2Vec2NoLayerNormConvLayer.build  sg    ::
4&2tyy~~.		tT-=-= >? /. 3..s   )B
B)r1  r   r&  r*  r+  r   r   r   r3  r   r   r   returnNoner    	tf.Tensorr?  rB  r   	r#   r$   r%   r&   r{   r   r   r)   r   r   s   @r+   r$  r$    s     L L
@ @r*   r$  c                  D   ^  \ rS rSrSSU 4S jjjrSS jrS	S jrSrU =r$ )
TFWav2Vec2LayerNormConvLayeri  c                  > [         TU ]  " S0 UD6  US:  a  UR                  U   OSU l        UR                  U   U l        [
        R                  R                  U R                  UR                  U   UR                  U   UR                  SS9U l        [
        R                  R                  SUR                  S9U l        [        UR                   5      U l        g )Nr   r   r&  r'  
layer_norm)r   r~   r"   )rz   r{   r)  r*  r+  r   r   r,  r-  r.  r/  r&  LayerNormalizationlayer_norm_epsrG  r   r0  r1  r2  s       r+   r{   %TFWav2Vec2LayerNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	  ,,99|U[UjUj9k+F,J,JKr*   c                l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   r&  rG  r1  r6  s     r+   r   !TFWav2Vec2LayerNormConvLayer.call  2    		-066r*   c                    U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       Ny= f! , (       d  f       g = fNTr&  rG  
r   r:  r-   r;  r&  r   r   r*  rG  r+  r   s     r+   r   "TFWav2Vec2LayerNormConvLayer.build      ::
4&2tyy~~.		tT-=-= >? /4t,8t334%%tT43D3D&EF 54 9 /. 54   )C.;)C?.
C<?
Dr1  r   r&  r*  rG  r+  r=  r>  rA  r   rC  r   s   @r+   rE  rE    s     L L	G 	Gr*   rE  c                  D   ^  \ rS rSrSSU 4S jjjrSS jrS	S jrSrU =r$ )
TFWav2Vec2GroupNormConvLayeri  c                  > [         TU ]  " S0 UD6  US:  a  UR                  U   OSU l        UR                  U   U l        [
        R                  R                  U R                  UR                  U   UR                  U   UR                  SS9U l        [        UR                  5      U l        [        U R                  UR                   SS9U l        g )Nr   r   r&  r'  rG  )r}   r~   r   r"   )rz   r{   r)  r*  r+  r   r   r,  r-  r.  r/  r&  r   r0  r1  rw   rI  rG  r2  s       r+   r{   %TFWav2Vec2GroupNormConvLayer.__init__  s    "6"8@16??84!"OOH5LL''%%**84&&x0%% ( 
	 ,F,J,JK-$$f.C.C,
r*   c                l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   rL  r6  s     r+   r   !TFWav2Vec2GroupNormConvLayer.call)  rN  r*   c                    U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       Ny= f! , (       d  f       g = frP  rQ  r   s     r+   r   "TFWav2Vec2GroupNormConvLayer.build/  rS  rT  rU  r=  r>  rA  r   rC  r   s   @r+   rW  rW    s    
 
"	G 	Gr*   rW  c                  @   ^  \ rS rSrSU 4S jjrSS jrSS jrSrU =r$ )	!TFWav2Vec2PositionalConvEmbeddingi;  c                  > [         TU ]  " S0 UD6  [        UR                  UR                  UR
                  UR                  S-  SS9U l        [        UR                  5      U l        [        UR                  5      U l        Xl        g )Nr   r&  )r  r  r}   r  r   r"   )rz   r{   r   hidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsr&  TFWav2Vec2SamePadLayerr  r   r0  r1  r   r   r   r   r   s      r+   r{   *TFWav2Vec2PositionalConvEmbedding.__init__<  sv    "6".&&6677#;;q@
	 .f.L.LM+F,J,JKr*   c                l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r&  r  r1  r6  s     r+   r   &TFWav2Vec2PositionalConvEmbedding.callI  s2    		-0]36r*   c                @   U R                   (       a  g SU l         [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       g = fr9  )	r   r:  r-   r;  r&  r   r   r   ra  r   s     r+   r   'TFWav2Vec2PositionalConvEmbedding.buildO  sk    ::
4&2tyy~~.		tT[[-D-D EF /. 3..s   3B
B)r1  r   r   r&  r  r   r   r   r   r?  r@  rA  r   rC  r   s   @r+   r_  r_  ;  s    G Gr*   r_  c                  .   ^  \ rS rSrU 4S jrS rSrU =r$ )rd  iX  c                V   > [         TU ]  " S0 UD6  US-  S:X  a  SU l        g SU l        g )Nr   r   r   r"   )rz   r{   num_pad_remove)r   rb  r   r   s      r+   r{   TFWav2Vec2SamePadLayer.__init__Y  s.    "6"#:Q#>!#Car*   c                X    U R                   S:  a  US S 2S U R                   * 2S S 24   nU$ )Nr   rn  r6  s     r+   r   TFWav2Vec2SamePadLayer.call]  s6    ")!-C0C0C/C-CQ*FGMr*   rq  )r#   r$   r%   r&   r{   r   r)   r   r   s   @r+   rd  rd  X  s    K r*   rd  c                  <   ^  \ rS rSrSU 4S jjrS rSS jrSrU =r$ )TFWav2Vec2FeatureEncoderic  c                  > [         TU ]  " S	0 UD6  UR                  S:X  aK  [        USSS 3S9/[	        UR
                  S-
  5       Vs/ s H  n[        XS-   SUS-    3S9PM     sn-   nOZUR                  S:X  a1  [	        UR
                  5       Vs/ s H  n[        XSU 3S9PM     nnO[        SUR                   S35      eX@l	        g s  snf s  snf )
Ngroupr   zconv_layers.)r3  r   r   layerz`config.feat_extract_norm` is z), but has to be one of ['group', 'layer']r"   )
rz   r{   feat_extract_normrW  rA   num_feat_extract_layersr$  rE  rS   conv_layers)r   r   r   r   rz  r   s        r+   r{   !TFWav2Vec2FeatureEncoder.__init__d  s   "6"##w.7S_`a_bQcdev==ABiBA /vAl[\_`[`ZaLbcBi K %%0 v==>>A -VUVTWFXY>  K
 01I1I0JJst  'i
s   
CCc                j    [         R                  " US5      nU R                   H  nU" U5      nM     U$ r   )r-   r@   rz  )r   input_valuesr    
conv_layers       r+   r   TFWav2Vec2FeatureEncoder.callw  s2    |R8**J&}5M +r*   c                   U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = f)NTrz  )r   r:  rz  r-   r;  r   r   )r   r   r~  s      r+   r   TFWav2Vec2FeatureEncoder.build}  sd    ::
4-9"..
]]:??3$$T* 43 / :33s   A77
B	)r   rz  rk  r   rC  r   s   @r+   rt  rt  c  s    '&+ +r*   rt  c                  (   ^  \ rS rSrU 4S jrSrU =r$ )TFWav2Vec2FeatureExtractori  c                   > [         TU ]  " U40 UD6  [        R                  " SU R                  R
                   SU R                  R                  S   R
                   S3[        5        g )NzThe class `zD` has been depreciated and will be removed in Transformers v5. Use `r   z
` instead.)rz   r{   warningswarnr   r#   	__bases__FutureWarningre  s      r+   r{   #TFWav2Vec2FeatureExtractor.__init__  s`    *6*$..112 3NN,,Q/889E 		
r*   r"   )r#   r$   r%   r&   r{   r)   r   r   s   @r+   r  r    s    
 
r*   r  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFWav2Vec2FeatureProjectioni  c                h  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  SS9U l        [        R                  R                  UR                  [        UR                  5      SSS9U l        [        R                  R                  UR                  S9U l        Xl        g )NrG  r~   r   r\   
projectionunitskernel_initializerr  r   )rater"   )rz   r{   r   r   rH  rI  rG  Densera  r   initializer_ranger  Dropoutfeat_proj_dropoutdropoutr   re  s      r+   r{   $TFWav2Vec2FeatureProjection.__init__  s    "6",,99&BWBW^j9k,,,,$$.v/G/GH$	 - 
 ||++1I1I+Jr*   c                j    U R                  U5      nU R                  U5      nU R                  XS9nX4$ Ntraining)rG  r  r  )r   r    r  norm_hidden_statess       r+   r    TFWav2Vec2FeatureProjection.call  s9    !__];(:;]F00r*   c                T   U R                   (       a  g SU l         [        U SS 5      bh  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  S   /5        S S S 5        [        U SS 5      bi  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  S   /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTrG  r;   r  )
r   r:  r-   r;  rG  r   r   r   r)  r  r   s     r+   r   !TFWav2Vec2FeatureProjection.build  s    ::
4t,8t334%%tT4;;3G3G3K&LM 54t,8t334%%tT4;;3G3G3K&LM 54 9 54 54s   6D6D
D
D')r   r   r  rG  r  r   r   Fr    rB  r  r   r?  rB  r   rC  r   s   @r+   r  r    s    1	N 	Nr*   r  c                     ^  \ rS rSrSr   S         S	U 4S jjjrS
S jr     S             SS jjrSS jrSr	U =r
$ )TFWav2Vec2Attentioni  z6Multi-headed attention from "Attention Is All You Needc                V  > [         TU ]  " S
0 UD6  Xl        X l        [        R
                  R                  U5      U l        X-  U l        U R                  U-  U R                  :w  a  [        SU R                   SU S35      eU R                  S-  U l
        X@l        [        R
                  R                  XSS9U l        [        R
                  R                  XSS9U l        [        R
                  R                  XSS9U l        [        R
                  R                  XS	S9U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: r   g      k_proj)r  r   q_projv_projout_projr"   )rz   r{   	embed_dim	num_headsr   r   r  r  head_dimrS   scaling
is_decoderr  r  r  r  r  )r   r  r  r  r  r  r   r   s          r+   r{   TFWav2Vec2Attention.__init__  s     	"6"""||++G4!.MMI%$..8MdnnM]$YKr3  }}d*$ll(((Qll(((Qll(((Q**9**Ur*   c           	         [         R                  " [         R                  " XX R                  U R                  45      S5      $ )Nr   r   r   r   )r-   rB   r>   r  r  )r   tensorseq_lenbszs       r+   _shapeTFWav2Vec2Attention._shape  s,    ||BJJvWnndmm/\]_kllr*   c           
     	   USLn[        U5      u  pn
U R                  U5      U R                  -  nU(       a  Ub  US   nUS   nGOU(       aE  U R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      nOUby  U R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      n[        R                  " US   U/SS9n[        R                  " US   U/SS9nODU R                  U R	                  U5      SU5      nU R                  U R                  U5      SU5      nU R                  (       a  X4nXR                  -  SU R                  4n[        R                  " U R                  XU5      U5      n[        R                  " X5      n[        R                  " X5      n[        U5      S   n[        R                  " XSS9n[        R                  R                  [        U5      XR                  -  X/S	XR                  -  X4 S
[        U5       3S9  Ub  [        R                  R                  [        U5      USX/SUSX4 S
[        U5       3S9  [        R                  " UUR                   S9n[        R                  " UXR                  X45      U-   n[        R                  " UXR                  -  X45      n[#        USS9nUb  [        R                  R                  [        U5      U R                  /SU R                   S
[        U5       3S9  [        R                  " US5      [        R                  " UXR                  X45      -  n[        R                  " UXR                  -  X45      nU R%                  UUS9n[        R                  " UU5      n[        R                  R                  [        U5      XR                  -  XR                  /SXR                  XR                  4 S
[        U5       3S9  [        R&                  " [        R                  " UXR                  XR                  45      S5      n[        R                  " UXU
45      nU R)                  U5      n[        R                  " UXR                  X45      nUUU4$ )z#Input shape: Batch x Time x ChannelNr   r   r;   r   r<   T)transpose_bz$Attention weights should be of size z	, but is rN   z!Attention mask should be of size rQ   z/Head mask for a single layer should be of size )r   r;   r   r   r  z `attn_output` should be of size r  )r   r  r  r  r  r  r-   rC   r  r  r  r>   matmulrT   assert_equalrV   rR   r   r  rB   r  )r   r    key_value_statespast_key_valueattention_masklayer_head_maskr  is_cross_attentionr  rq   r  query_states
key_statesvalue_states
proj_shaperr   attn_weights
attn_probsattn_outputs                      r+   r   TFWav2Vec2Attention.call  s\    .T9",]";i {{=1DLL@."<'*J)!,LT[[1A%BBLJ;;t{{3C'Db#NL'T[[%?SIJ;;t{{='A2sKLN1$5z#BKJ99nQ&7%FQOL T[[%?SIJ;;t{{='A2sKL?? )7NNN*B>
zz$++lS"I:VZZ
7
zz,;Z(+yytL
!!|$>>!746nn8Lg7_6` a|,-/	 	" 	
 %LL%%>*a*7a8R7S T">235	 &   WW^<;M;MNN::lS..'4[\_mmL::lS>>5I74\]L%l<&LL%%?+ Et~~EW X"?346	 &  ::o}E

sNNGEI L ::lS>>5I74\]L\\,\B
ii
L9
!!{#>>!7MM:2CR_R_3`2a b{+,.	 	" 	
 llJJ{S..'==$QRT`
 jjsY.GHmmK0"$**\CQX;b"cL.88r*   c                
   U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       GNL= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTr  r  r  r  )r   r:  r-   r;  r  r   r   r  r  r  r  r   s     r+   r   TFWav2Vec2Attention.buildK  s[   ::
44(4t{{//0!!4t~~">? 144(4t{{//0!!4t~~">? 144(4t{{//0!!4t~~">? 14T*6t}}112##T4$@A 32 7 10 10 10 32s0   )G ;)G$)G#)G4 
G
G #
G14
H)r   r  r  r  r  r  r  r  r  r  r  )        FT)
r  r   r  r   r  r   r  r   r  r   )r  rB  r  r   r  r   )NNNNF)r    rB  r  r   r  ztuple[tuple[tf.Tensor]] | Noner  r   r  r   r  bool | Noner?  z"tuple[tf.Tensor, tf.Tensor | None]r   )r#   r$   r%   r&   r'   r{   r  r   r   r)   r   r   s   @r+   r  r    s    @  VV V 	V
 V V V8m .29=+/,0 %t9 t9 +t9 7	t9
 )t9 *t9 t9 
,t9lB Br*   r  c                  D   ^  \ rS rSrSU 4S jjrSSS jjrS	S jrSrU =r$ )
TFWav2Vec2FeedForwardi]  c                &  > [         TU ]  " S0 UD6  [        R                  R	                  UR
                  5      U l        [        R                  R                  UR                  [        UR                  5      SSS9U l        [        UR                  5      U l        [        R                  R                  UR                  [        UR                  5      SSS9U l        [        R                  R	                  UR"                  5      U l        Xl        g )Nr\   intermediate_denser  output_denser"   )rz   r{   r   r   r  activation_dropoutintermediate_dropoutr  intermediate_sizer   r  r  r   
hidden_actintermediate_act_fnra  r  hidden_dropoutoutput_dropoutr   re  s      r+   r{   TFWav2Vec2FeedForward.__init__^  s    "6"$)LL$8$89R9R$S!"',,"4"4**.v/G/GH$%	 #5 #
 $5V5F5F#G !LL..$$.v/G/GH$	 / 
 $ll2263H3HIr*   c                    U R                  U5      nU R                  U5      nU R                  XS9nU R                  U5      nU R	                  XS9nU$ r  )r  r  r  r  r  )r   r    r  s      r+   r   TFWav2Vec2FeedForward.callt  s^    //>00?11-1S))-8++M+Mr*   c                H   U R                   (       a  g SU l         [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTr  r  )r   r:  r-   r;  r  r   r   r   ra  r  r  r   s     r+   r   TFWav2Vec2FeedForward.build}  s    ::
4-t4@t66;;<''--tT4;;;R;R.ST =4.:t00556!!''tT[[5R5R(ST 76 ; =< 76s   3D3D
D
D!)r   r   r  r  r  r  r  r  r  r  r   rC  r   s   @r+   r  r  ]  s    ,	U 	Ur*   r  c                  \   ^  \ rS rSrSU 4S jjr   S         SS jjrS	S jrSrU =r$ )
TFWav2Vec2EncoderLayeri  c                  > [         TU ]  " S	0 UD6  [        UR                  UR                  UR
                  SSS9U l        [        R                  R                  UR                  5      U l        [        R                  R                  UR                  SS9U l        [        USS9U l        [        R                  R                  UR                  SS9U l        Xl        g 
NF	attention)r  r  r  r  r   rG  r  feed_forwardr   final_layer_normr"   rz   r{   r  ra  num_attention_headsattention_dropoutr  r   r   r  r  r  rH  rI  rG  r  r  r  r   re  s      r+   r{   TFWav2Vec2EncoderLayer.__init__      "6",((00,,
 ||++F,A,AB,,99&BWBW^j9k1&~N % ? ?H]H]dv ? wr*   c                    UnU R                  XUS9u  pnU R                  XS9nXQ-   nU R                  U5      nXR                  U5      -   nU R	                  U5      nU4nU(       a  X4-  nU$ N)r  r  r  )r  r  rG  r  r  	r   r    r  output_attentionsr  attn_residualr  r7   r   s	            r+   r   TFWav2Vec2EncoderLayer.call  s     &)-8 *8 *
&Q ]F%56%(9(9-(HH--m< "&Gr*   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       GNS= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = fNTr  rG  r  r  r   r:  r-   r;  r  r   r   rG  r   ra  r  r  r   s     r+   r   TFWav2Vec2EncoderLayer.build  Z   ::
4d+7t~~223$$T* 44t,8t334%%tT4;;3J3J&KL 54.:t00556!!''- 74+T2>t4499:%%++T49P9P,QR ;: ? 43 54 76 ;:0   F:.3G!G=3G.:
G	
G
G+.
G<r  r   r   r  r  r  rG  r  NFF
r    rB  r  r   r  r  r  r   r?  ztuple[tf.Tensor]r   rC  r   s   @r+   r  r    sT    $ ,0).  ) '	
  
2S Sr*   r  c                  \   ^  \ rS rSrSU 4S jjr   S         SS jjrS	S jrSrU =r$ )
%TFWav2Vec2EncoderLayerStableLayerNormi  c                  > [         TU ]  " S	0 UD6  [        UR                  UR                  UR
                  SSS9U l        [        R                  R                  UR                  5      U l        [        R                  R                  UR                  SS9U l        [        USS9U l        [        R                  R                  UR                  SS9U l        Xl        g r  r  re  s      r+   r{   .TFWav2Vec2EncoderLayerStableLayerNorm.__init__  r  r*   c                    UnU R                  U5      nU R                  XUS9u  pnU R                  XS9nXQ-   nXR                  U R	                  U5      5      -   nU4nU(       a  X4-  nU$ r  )rG  r  r  r  r  r  s	            r+   r   *TFWav2Vec2EncoderLayerStableLayerNorm.call  s     &6)-8 *8 *
&Q ]F%5%(9(9$:O:OP]:^(__ "&Gr*   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       GNS= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = fr  r  r   s     r+   r   +TFWav2Vec2EncoderLayerStableLayerNorm.build  r  r  r  r  r  r  r   rC  r   s   @r+   r  r    sT    $ ,0).  ) '	
  
.S Sr*   r  c                  h   ^  \ rS rSrSU 4S jjr     S             SS jjrS	S jrSrU =r$ )
TFWav2Vec2Encoderi  c                |  > [         TU ]  " S0 UD6  Xl        [        USS9U l        [
        R                  R                  UR                  SS9U l	        [
        R                  R                  UR                  5      U l        [        UR                  5       Vs/ s H  n[        USU 3S9PM     snU l        g s  snf Npos_conv_embedr  rG  r  zlayers.r"   )rz   r{   r   r_  r  r   r   rH  rI  rG  r  r  r  rA   num_hidden_layersr  rw  r   r   r   r   r   s       r+   r{   TFWav2Vec2Encoder.__init__  s    "6"?M]^,,99&BWBW^j9k||++F,A,ABRWX^XpXpRqrRqQ,VGA3-HRqr
r   B9c                h   U(       a  SOS nU(       a  SOS nUb&  U[         R                  " US5      -  n[        U5      nOS nU R                  U5      n	X-   nU R	                  U5      nU R                  XS9n[        U R                  5       Hr  u  pU(       a  Xq4-   n[        R                  R                  SS5      nU(       a  XR                  R                  :  a  MS  U" UUUUS9nUS   nU(       d  Mj  XS   4-   nMt     U(       a  Xq4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )	Nr"   r;   r  r   r   r    r  r  r  c              3  .   #    U  H  oc  M  Uv   M     g 7fr   r"   .0vs     r+   	<genexpr>)TFWav2Vec2Encoder.call.<locals>.<genexpr>6       m$[q$[   	r   r    r!   )r-   r@   ru   r  rG  r  	enumeraterw  npr0   r1   r   	layerdroptupler   r   r    r  r  output_hidden_statesreturn_dictr  all_hidden_statesall_self_attentionsposition_embeddingsr   layer_moduledropout_probabilitylayer_outputss                 r+   r   TFWav2Vec2Encoder.call  sF    #7BD$5b4%)BNN>2,NNM).9N!N"11-@%;6]F(4OA#$58H$H! #%))"3"3Aq"90;;3H3HH(+-"3!	M *!,M  &91=M<O&O#%  5*   14D Dm]GZ$[mmm ++*
 	
r*   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       N= f! , (       d  f       N|= f! , (       d  f       Mt  = fNTr  rG  rw  r   r:  r-   r;  r  r   r   rG  r   ra  rw  r   r   rw  s      r+   r   TFWav2Vec2Encoder.build=     ::
4)40<t22778##))$/ 94t,8t334%%tT4;;3J3J&KL 54$'3]]5::.KK% /. $ 4 98 54 /.$   E.3E&E(
E
E%(
E7	r   r   r  rw  rG  r  r  NFFTFr    rB  r  r   r  r  r  r  r  r  r  r  r?  $TFBaseModelOutput | tuple[tf.Tensor]r   rC  r   s   @r+   r  r    sn    s ,0).,1#' %5
 5
 )5
 '	5

 *5
 !5
 5
 
.5
n& &r*   r  c                  h   ^  \ rS rSrSU 4S jjr     S             SS jjrS	S jrSrU =r$ )
 TFWav2Vec2EncoderStableLayerNormiM  c                |  > [         TU ]  " S0 UD6  Xl        [        USS9U l        [
        R                  R                  UR                  SS9U l	        [
        R                  R                  UR                  5      U l        [        UR                  5       Vs/ s H  n[        USU 3S9PM     snU l        g s  snf r  )rz   r{   r   r_  r  r   r   rH  rI  rG  r  r  r  rA   r  r  rw  r  s       r+   r{   )TFWav2Vec2EncoderStableLayerNorm.__init__N  s    "6"?M]^,,99&BWBW^j9k||++F,A,ABW\]c]u]uWv
WvRS1&}MWv

 
r
  c                h   U(       a  SOS nU(       a  SOS nUb&  U[         R                  " US5      -  n[        U5      nOS nU R                  U5      n	X-   nU R	                  XS9n[        U R                  5       Hr  u  pU(       a  Xq4-   n[        R                  R                  SS5      nU(       a  XR                  R                  :  a  MS  U" UUUUS9nUS   nU(       d  Mj  XS   4-   nMt     U R                  U5      nU(       a  Xq4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )	Nr"   r;   r  r   r   r  c              3  .   #    U  H  oc  M  Uv   M     g 7fr   r"   r  s     r+   r  8TFWav2Vec2EncoderStableLayerNorm.call.<locals>.<genexpr>  r  r  r  )r-   r@   ru   r  r  r  rw  r  r0   r1   r   r  rG  r  r   r  s                 r+   r   %TFWav2Vec2EncoderStableLayerNorm.callX  sF    #7BD$5b4%)BNN>2,NNM).9N!N"11-@%;]F(4OA#$58H$H! #%))"3"3Aq"90;;3H3HH(+-"3!	M *!,M  &91=M<O&O#%  5( 6 14D Dm]GZ$[mmm ++*
 	
r*   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       N= f! , (       d  f       N|= f! , (       d  f       Mt  = fr%  r&  r'  s      r+   r   &TFWav2Vec2EncoderStableLayerNorm.build  r)  r*  r+  r  r,  r-  r   rC  r   s   @r+   r0  r0  M  sm    
 ,0).,1#' %5
 5
 )5
 '	5

 *5
 !5
 5
 
.5
n& &r*   r0  c                     ^  \ rS rSr\rSU 4S jjrS	S jrS
S jrS	SS jjr	\
         S                     SS jj5       rSrU =r$ )TFWav2Vec2MainLayeri  c                   > [         TU ]  " S0 UD6  Xl        [        USS9U l        [        USS9U l        UR                  (       a  [        USS9U l	        g [        USS9U l	        g )Nfeature_extractorr  feature_projectionencoderr"   )rz   r{   r   rt  r<  r  r=  do_stable_layer_normr0  r>  r  re  s      r+   r{   TFWav2Vec2MainLayer.__init__  s]    "6"!9&GZ!["=fK_"`&&;FSDL,V)DDLr*   c                   U R                   (       a  g SU l         U R                  R                  S:  d  U R                  R                  S:  a,  U R	                  U R                  R
                  4SSSS9U l        [        U SS 5      bN  [        R                  " U R                  R                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R                  5         U R                  R                  S 5        S S S 5        [        U SS 5      bO  [        R                  " U R                  R                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N}= f! , (       d  f       g = f)	NTr  r1   masked_spec_embedra   r   r  r   r<  r=  r>  )r   r   mask_time_probmask_feature_probr   ra  rB  r:  r-   r;  r<  r   r   r=  r>  r   s     r+   r   TFWav2Vec2MainLayer.build  sG   ::
;;%%+t{{/L/Ls/R%)__{{..0iSW^q &5 &D" 4,d3?t55::;&&,,T2 <4-t4@t66;;<''--d3 =4D)5t||001""4( 21 6 <; =< 21s$   2FF!*F2
F!
F/2
G c                    S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )8
Computes the output length of the convolutional layers
c                    X-
  U-  S-   $ r   r"   input_lengthr  strides      r+   _conv_out_lengthNTFWav2Vec2MainLayer._get_feat_extract_output_lengths.<locals>._conv_out_length  s     !.69A==r*   )zipr   r-  r.  )r   input_lengthsrM  r  rL  s        r+    _get_feat_extract_output_lengths4TFWav2Vec2MainLayer._get_feat_extract_output_lengths  sF    
	>
 $'t{{'>'>@W@W#XK,]PM $Y r*   c                   [        U5      u  p4n[        U R                  SS5      (       d  U$ Ub  [        R                  " [        R
                  " USS2SS2[        R                  4   [        R                  5      U R                  [        R                  [        R                  SS24   U5      nOU R                  R                  S:  a  [        X44U R                  R                  U R                  R                  SS9n[        R                  " [        R
                  " USS2SS2[        R                  4   [        R                  5      U R                  [        R                  [        R                  SS24   U5      nU R                  R                  S:  ad  [        X54U R                  R                  U R                  R                  S9n[        R                  " USS2[        R                  SS24   US5      nU$ )z
Masks extracted features along time axis and/or along feature axis according to
[SpecAugment](https://huggingface.co/papers/1904.08779).
apply_spec_augmentTNr   r   )rb   rc   rd   )rb   rc   )r   r:  r   r-   whererV   r_   r   rB  rD  rl   mask_time_lengthrE  mask_feature_length)r   r    mask_time_indicesre   rf   ra  mask_feature_indicess          r+   _mask_hidden_states'TFWav2Vec2MainLayer._mask_hidden_states  s   
 4>m3L0
[ t{{$8$??  (HH)!Q

*:;RWWE&&rzz2::q'@AM [[''!+ 5-++44 KK88	! HH)!Q

*:;RWWE&&rzz2::q'@AM ;;((1,#8)++77 KK;;$ 
 HH%9!RZZ:J%K]\]^Mr*   c           	        U R                  [        R                  " U[        R                  5      U
S9nUbR  U R	                  [        R
                  " US5      5      n[        R                  " U[        U5      S   UR                  S9nU R                  XS9u  pUR                  S5      nU
(       a  U R                  XS9nU R                  UUUUU	U
S9nUS   nU	(       d	  X4USS  -   $ [        UUUR                  UR                  S	9$ )
Nr  r;   r   )maxlenrR   rX  )rX  r  r  r  r  r  r   )r   r   r    r!   )r<  r-   rV   rW   rQ  r  sequence_maskr   rR   r=  r   rZ  r>  r   r    r!   )r   r}  r  token_type_idsposition_ids	head_maskinputs_embedsr  r  r  r  r   r   output_lengthsr    rX  encoder_outputss                    r+   r   TFWav2Vec2MainLayer.call  s%     11"'',

2S^f1g %!BB2==Q_acCdeN--z2B'CA'FN^NdNdN +/*A*ABR*A*f'"JJ':; 44]4hM,,)/!5# ' 
 (*!4qr7JJJ(+-)77&11	
 	
r*   )r   r   r>  r<  r=  rB  r  r   )rP  rB  )r    rB  rX  r   	NNNNNNNNF)r}  rB  r  r   r`  r   ra  r   rb  r   rc  r   r  r  r  r  r  r  r  r   r   r   )r#   r$   r%   r&   r   config_classr{   r   rQ  rZ  r   r   r)   r   r   s   @r+   r:  r:    s    !L	E)$*X  ,0+/)-&**.)-,0#'1
1
 )1
 )	1

 '1
 $1
 (1
 '1
 *1
 !1
 1
 1
 1
r*   r:  c                  x   ^  \ rS rSrSr\rSrSr\	S 5       r
\	S 5       rU 4S jrSS jr S   SS	 jjrS
rU =r$ )TFWav2Vec2PreTrainedModeli0  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
wav2vec2r}  c                    [         R                  " S[         R                  SS9[         R                  " S[         R                  SS9S.$ )N)NNr}  r  r  r}  r  )r-   
TensorSpecrW   r   s    r+   input_signature)TFWav2Vec2PreTrainedModel.input_signature:  s7     MM,

X mmL"**K[\
 	
r*   c                    [         R                  R                  S[         R                  S9[         R                  " S[         R                  S9S.$ )N)r   i  )ra   rR   rm  )r-   r0   r1   rW   r]   r   s    r+   dummy_inputs&TFWav2Vec2PreTrainedModel.dummy_inputsA  s;     II--HBJJ-O ggHBJJG
 	
r*   c                   > [         TU ]  " U/UQ70 UD6  [        R                  SU R                  R
                   S35        g )N
z has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tune this model, you need a GPU or a TPU)rz   r{   loggerwarningr   r#   r   r   r   r   r   s       r+   r{   "TFWav2Vec2PreTrainedModel.__init__H  sD    3&3F3(() *E E	
r*   c                d   Uc  U R                   R                  OUnS n[        U R                   R                  U R                   R                  5       H  u  pEU" XU5      nM     U(       aD  [        U R                   R                  5       H!  nU" USU R                   R                  5      nM#     U$ )rH  c                L    [         R                  R                  X-
  U5      S-   $ r   )r-   r.   floordivrJ  s      r+   rM  TTFWav2Vec2PreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_lengthU  s!    77##L$>G!KKr*   r   )r   add_adapterrO  r-  r.  rA   num_adapter_layersadapter_stride)r   rP  r~  rM  r  rL  r7   s          r+   rQ  :TFWav2Vec2PreTrainedModel._get_feat_extract_output_lengthsO  s     2=1Ddkk--+	L $'t{{'>'>@W@W#XK,]PM $Y 4;;99: 04;;C]C] ^ ;r*   c           	        [         R                  R                  USS9S S 2S4   nU R                  XCS9n[         R                  " U[         R
                  5      n[         R                  " U5      S   n[         R                  " Xa4UR                  SS9n[         R                  " U[         R                  " [         R                  " U5      US-
  /SS9[         R                  " U/UR                  S9S	9n[         R                  " US/S9n[         R                  " USS9n[         R                  " US/S9n[         R                  " U[         R                  5      nU$ )
Nr;   r<   )r~  r   r  )rR   r   r   rQ   )r8   updates)r-   r.   cumsumrQ  rV   rY   ra   r\   rR   tensor_scatter_nd_updater   rA   r]   reverser   )r   feature_vector_lengthr  r~  non_padded_lengthsrd  re   s          r+   "_get_feature_vector_attention_mask<TFWav2Vec2PreTrainedModel._get_feature_vector_attention_mask`  s     WW^^N^DQUK>>?Q>k:XXn-a0
/~7K7KRb
 44HHbhhz2NQ4FGaPGGZL0D0DE

 N">>;N">9r*   r"   r   )r  r   r  rB  )r#   r$   r%   r&   r'   r   rh  base_model_prefixmain_input_namepropertyro  rr  r{   rQ  r  r)   r   r   s   @r+   rj  rj  0  sk    
 "L"$O
 
 
 

$ RV%(:C r*   rj  a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_values` only and nothing else: `model(input_values)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_values, attention_mask])` or `model([input_values, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_values": input_values, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Args:
        config ([`Wav2Vec2Config`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a	  
    Args:
        input_values (`np.ndarray`, `tf.Tensor`, `list[tf.Tensor]` `dict[str, tf.Tensor]` or `dict[str, np.ndarray]` and each example must have the shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`np.ndarray` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`np.ndarray` or `tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_values` you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert `input_values` indices into associated vectors
            than the model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False``):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zdThe bare TFWav2Vec2 Model transformer outputting raw hidden-states without any specific head on top.c                     ^  \ rS rSrSU 4S jjr\" \5      \" \\	S9\
         S                     S	S jj5       5       5       rS
S jrSrU =r$ )TFWav2Vec2Modeli  c                X   > [         TU ]  " U/UQ70 UD6  Xl        [        USS9U l        g )Nrk  r  )rz   r{   r   r:  rk  rx  s       r+   r{   TFWav2Vec2Model.__init__  s-    3&3F3+FDr*   output_typerh  c                    U(       a  UOU R                   R                  nU(       a  UOU R                   R                  nU	(       a  U	OU R                   R                  n	U R	                  UUUUUUUUU	U
S9
nU$ )a  

Returns:

Example:

```python
>>> from transformers import AutoProcessor, TFWav2Vec2Model
>>> from datasets import load_dataset

>>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
>>> model = TFWav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")


>>> def map_to_array(example):
...     example["speech"] = example["audio"]["array"]
...     return example


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
>>> hidden_states = model(input_values).last_hidden_state
```
r}  r  r`  ra  rb  rc  r  r  r  r  )r   r  r  r  rk  )r   r}  r  r`  ra  rb  rc  r  r  r  r  r   s               r+   r   TFWav2Vec2Model.call  s~    T 8L3QUQ\Q\QqQq1B-HeHe%0kdkk6M6M--%))%'/!5#   
 r*   c                   U R                   (       a  g SU l         [        U SS 5      bO  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        g g ! , (       d  f       g = f)NTrk  )r   r:  r-   r;  rk  r   r   r   s     r+   r   TFWav2Vec2Model.build!  s^    ::
4T*6t}}112##D) 32 722s   A88
B)r   r   rk  r  rg  )r}  rB  r  r   r`  r   ra  r   rb  r   rc  r   r  r  r  r  r  r  r  r   r?  r.  r   )r#   r$   r%   r&   r{   r   WAV2VEC2_INPUTS_DOCSTRINGr   r   _CONFIG_FOR_DOCr   r   r   r)   r   r   s   @r+   r  r    s    
E
 ++DE+<?[ ,0+/)-&**.)-,0#'88 )8 )	8
 '8 $8 (8 '8 *8 !8 8 
.8  \ F8t* *r*   r  zhTFWav2Vec2 Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).c                     ^  \ rS rSrS	U 4S jjrS rS r\\" \	5      \
" \\S9          S
                       SS jj5       5       5       rSS jrSrU =r$ )TFWav2Vec2ForCTCi*  c                  > [         TU ]  " U/UQ70 UD6  [        USS9U l        [        R
                  R                  UR                  5      U l        [        R
                  R                  UR                  SS9U l        [        US5      (       a#  UR                  (       a  UR                  U l        g UR                  U l        g )Nrk  r  lm_headr~  )rz   r{   r:  rk  r   r   r  final_dropoutr  r  
vocab_sizer  hasattrr~  output_hidden_sizera  rx  s       r+   r{   TFWav2Vec2ForCTC.__init__/  s    3&3F3+FD||++F,@,@A||))&*;*;))L)0)G)GFL^L^F%% 	djdvdv 	r*   c                Z    [         R                  " S[        5        U R                  5         gz
Calling this function will disable the gradient computation for the feature encoder so that its parameters will
not be updated during training.
zThe method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. Please use the equivalent `freeze_feature_encoder` method instead.Nr  r  r  freeze_feature_encoderr   s    r+   freeze_feature_extractor)TFWav2Vec2ForCTC.freeze_feature_extractor9  '    
 	Q	

 	##%r*   c                :    SU R                   R                  l        gz
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
FNrk  r<  r  r   s    r+   r  'TFWav2Vec2ForCTC.freeze_feature_encoderE      
 5:''1r*   r  c                .   UbP  [         R                  " U5      U R                  R                  :  a"  [	        SU R                  R                   35      eU R                  UUUUUUUU	U
US9
nUS   nU R                  XS9nU R                  U5      nUGbA  Ub  UO"[         R                  " U[         R                  S9nU R
                  R                  [         R                  " USS95      n[         R                  " US:  [         R                  5      n[         R                  " USS9n[         R                  R                  UUUUU R                  R                   S	S
9nU R                  R"                  S:X  a  [         R                  " U5      nU R                  R"                  S:X  a  [         R$                  " U5      n[         R&                  " US5      nOSnU
(       d  U4U[(        S -   nUb  U4U-   $ U$ [+        UUUR,                  UR.                  S9$ )a  
labels (`tf.Tensor` or `np.ndarray` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_values` docstring) Tokens with indices set to `-100` are ignored (masked),
    the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

Returns:

Example:

```python
>>> import tensorflow as tf
>>> from transformers import AutoProcessor, TFWav2Vec2ForCTC
>>> from datasets import load_dataset
>>> from torchcodec.decoders import AudioDecoder

>>> processor = AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
>>> model = TFWav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")


>>> def map_to_array(example):
...     example["speech"] = example["audio"]["array"]
...     return example


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values  # Batch size 1
>>> logits = model(input_values).logits
>>> predicted_ids = tf.argmax(logits, axis=-1)

>>> transcription = processor.decode(predicted_ids[0])

>>> # compute loss
>>> target_transcription = "A MAN SAID TO THE UNIVERSE SIR I EXIST"

>>> # Pass transcription as `text` to encode labels
>>> labels = processor(text=transcription, return_tensors="tf").input_ids

>>> loss = model(input_values, labels=labels).loss
```Nz$Label values must be <= vocab_size: r  r   r  rQ   r;   r<   F)logitslabelslogit_lengthlabel_lengthblank_indexlogits_time_majorsumr   rP   lossr  r    r!   )r-   
reduce_maxr   r  rS   rk  r  r  r`   rW   rQ  r  rV   rY   r2   ctc_losspad_token_idctc_loss_reductionreduce_meanr>   _HIDDEN_STATES_START_POSITIONr	   r    r!   )r   r}  r  r`  ra  rb  rc  r  r  r  r  r  r   r    r  rP  labels_masktarget_lengthsr  r!  s                       r+   r   TFWav2Vec2ForCTC.callL  s   v "--"74;;;Q;Q"QCDKKDZDZC[\]]--%))%'/!5#   
  
]Fm,"0"<",,|cecmcmBn  !MMJJ2==YgnpKqrM ''&A+rxx8K]];R@N55>>*+ KK44"' " D {{--6}}T*{{--7~~d+::dD)DDY)F)G!HHF)-)9TGf$EvE!//))	
 	
r*   c                   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       Ny= f! , (       d  f       g = f)NTrk  r  )	r   r:  r-   r;  rk  r   r   r  r  r   s     r+   r   TFWav2Vec2ForCTC.build  s    ::
4T*6t}}112##D) 34D)5t||001""D$0G0G#HI 21 6 32 21s   C!.)C2!
C/2
D )r   r  r  r  rk  r  )
NNNNNNNNNF)r}  rB  r  r   r`  r   ra  r   rb  r   rc  r   r  r  r  r   r  r  r  r  r  r  r?  z#TFCausalLMOutput | tuple[tf.Tensor]r   )r#   r$   r%   r&   r{   r  r  r   r   r  r   r	   r  r   r   r)   r   r   s   @r+   r  r  *  s    


&: *+DE+;/Z ,0+/)-&**.)-#',0#' %r
r
 )r
 )	r

 'r
 $r
 (r
 'r
 !r
 *r
 !r
 r
 
-r
 [ F r
h	J 	Jr*   r  c                     ^  \ rS rSrU 4S jrS rS rS r\      S	               S
S jj5       r	SS jr
SrU =r$ )#TFWav2Vec2ForSequenceClassificationi  c                  > [         TU ]  U5        [        USS9U l        UR                  S-   U l        [        R                  " U R                  5       5         UR                  (       a"  U R                  U R
                  4SSSS9U l        S S S 5        Xl        [        R                  R                  UR                   SS	9U l        [        R                  R                  UR$                  S S
S9U l        g ! , (       d  f       Np= f)Nrk  r  r   r]   Tlayer_weightsrC  	projector)r  r   
classifier)r  r1  r   )rz   r{   r:  rk  r  
num_layersr-   r;  _name_scopeuse_weighted_layer_sumr   r  r   r   r   r  classifier_proj_sizer  
num_labelsr  )r   r   r   s     r+   r{   ,TFWav2Vec2ForSequenceClassification.__init__  s     +FD 22Q6]]4++-.,,%)__??,&DWf &5 &" /
 ++&2M2MT_+`,,,,63D3DQU\h,i /.s   4C66
Dc                Z    [         R                  " S[        5        U R                  5         gr  r  r   s    r+   r  <TFWav2Vec2ForSequenceClassification.freeze_feature_extractor  r  r*   c                :    SU R                   R                  l        gr  r  r   s    r+   r  :TFWav2Vec2ForSequenceClassification.freeze_feature_encoder  r  r*   c                L    U R                   R                   H
  nSUl        M     g)z
Calling this function will disable the gradient computation for the base model so that its parameters will not
be updated during training. Only the classification head will be updated.
FN)rk  r   r  )r   rw  s     r+   freeze_base_model5TFWav2Vec2ForSequenceClassification.freeze_base_model  s    
 ]]))E#EO *r*   c           
        Ub  UOU R                   R                  nU R                   R                  (       a  SOUnU R                  UUUUUUS9nU R                   R                  (       av  U[           n	[
        R                  " U	SS9n	[
        R                  R                  U R                  SS9n
[
        R                  " U	[
        R                  " U
/ SQ5      -  SS9n	OUS   n	U R                  U	5      n	Uc  [
        R                  " U	SS9nOU R                  [        U	5      S   U5      n[
        R                   " XR"                  5      n[
        R$                  " U	[
        R&                  " USS95      n	[
        R(                  " [
        R                  " U	SS9[
        R&                  " [
        R                  " USS9SS95      nU R+                  U5      nS nUbg  [,        R.                  R1                  SS9nU" [
        R                  " US/5      [
        R                  " USU R                   R2                  /5      5      nU(       d  U4U[        S  -   nUb  U4U-   $ U$ [5        UUUR6                  UR8                  S	9$ )
NTr^  r   r<   r;   )r;   r   r   r   )from_logitsr  )r   use_return_dictr  rk  r  r-   r   r2   softmaxr  r  r>   r  r  r  r   rV   rR   multiplyr@   divider  r   lossesSparseCategoricalCrossentropyr  r
   r    r!   )r   r}  r  r  r  r  r  r  r   r    norm_weightspooled_outputpadding_maskpadding_mask_floatr  r  loss_fnr!  s                     r+   r   (TFWav2Vec2ForSequenceClassification.call  s)    &1%<k$++B]B]'+{{'I'ItOc--)/!5#   
 ;;--#$ABMHH];M55==););"=ELMM-"**\S]:^*^efgM#AJM}5!NN=qAMBB:mC\]^C_aopL!#7J7J!KKKr~~FX_a7bcMIIm!4bnnR]]SelmEnuv6wM /ll@@T@RG2::frd3RZZT[[McMcHd5efDY)F)G!HHF)-)9TGf$EvE)!//))	
 	
r*   c                "   U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        [        U SS 5      bf  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  R                  /5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTrk  r  r  )r   r:  r-   r;  rk  r   r   r  r   ra  r  r  r   s     r+   r   )TFWav2Vec2ForSequenceClassification.build0  s   ::
4T*6t}}112##D) 34d+7t~~223$$dD$++2I2I%JK 44t,8t334%%tT4;;3S3S&TU 54 9 32 43 54s$   E.3E/!3F 
E,/
E= 
F)r   r  r   r  r  r  rk  )NNNNNF)r}  rB  r  r   r  r  r  r  r  r  r  r   r  r   r?  z-TFSequenceClassifierOutput | tuple[tf.Tensor]r   )r#   r$   r%   r&   r{   r  r  r  r   r   r   r)   r   r   s   @r+   r  r    s    j
&:$  ,0)-,0#'#'5
5
 )5
 '	5

 *5
 !5
 !5
 5
 
75
 5
nV Vr*   r  )r  r  rj  r  r=  )
ra   ztuple[int, int]rb   r   rc   r   rd   r   r?  rB  r   )rp   rB  rq   z
int | None)Jr'   
__future__r   r  dataclassesr   typingr   numpyr  
tensorflowr-   activations_tfr   modeling_tf_outputsr   r	   r
   modeling_tf_utilsr   r   r   r   r   tf_utilsr   r   utilsr   r   r   r   r   configuration_wav2vec2r   
get_loggerr#   rv  r  _CHECKPOINT_FOR_DOCr  ro   r   r9   rK   rl   ru   r   Layerrw   r,  r   r$  rE  rW  r_  rd  rt  r  r  r  r  r  r  r  r0  r:  rj  WAV2VEC2_START_DOCSTRINGr  r  r  r  __all__r"   r*   r+   <module>r     s   ! "  !    / b b  3  3 
		H	% !" 3 "  / / /8O& 	GGG G 	G
 GV
6U%,,,, Up5!4!4 5p@U\\%7%7 @:G5<<#5#5 GD!G5<<#5#5 !GHG(:(: G:U\\// !+u||11 !+H
!9 
N%,,"4"4 NBgB%,,,, gBT)UELL.. )UX8SU\\// 8Sv6SELL,>,> 6SrM&** M&`O&u||'9'9 O&d M
%,,,, M
 M
`E 1 EP( T5 p jI*/ I*	I*X r^J0 ^J	^JBmV*C mV` vr*   