
    cCi                     
   S r SSKrSSKJrJr  SSKrSSKJr  SSKJr  SSK	J
r
JrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJr  SSKJr  SSKJrJrJr  SSKJr  SSK J!r!  \RD                  " \#5      r$Sr%Sr&Sr'/ SQr(Sr)Sr* " S S\RV                  5      r, " S S\RV                  5      r- " S S\RV                  5      r. " S S\RV                  5      r/ " S S \RV                  5      r0 " S! S"\RV                  5      r1 " S# S$\RV                  5      r2 " S% S&\RV                  5      r3 " S' S(\5      r4 " S) S*\5      r5S+r6S,r7 " S- S.\55      r8\" S/\65       " S0 S1\55      5       r9\" S2\65       " S3 S4\55      5       r:/ S5Qr;g)6zPyTorch M-CTC-T model.    N)OptionalUnion)nn   )ACT2FN)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward)is_deepspeed_zero3_enabled)is_fsdp_managed_module)_prepare_4d_attention_mask)GradientCheckpointingLayer)BaseModelOutputCausalLMOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)logging   )MCTCTConfigr   zspeechbrain/m-ctc-t-large)r      i   zY"Mr. Quilter is the apostle of the middle classes, and we're glad to welcome his gospel."gv@c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )MCTCTConv1dSubsampler3   z
Convolutional subsampler: a stack of 1D convolution (along temporal dimension) followed by non-linear activation
via gated linear units (https://huggingface.co/papers/1911.08460)
c                 Z  >^  [         TT ]  5         UT l        UR                  T l        [
        R                  " UR                  5      T l        UR                  T l
        UR                  UR                  -  T l        T R                  S:  a*  UR                  c  [        S5      eUR                  T l        OS T l        UR"                  S-  T l        UR&                  T l        UR*                  T l        [
        R.                  " U 4S j[1        T R(                  5       5       5      T l        g )Nr   zbNeed to specify `conv_channels` configuration in `MCTCTConfig` to use multiple convolution layers.   c           	   3     >#    U  Hw  u  p[         R                  " US :X  a  TR                  OTR                  U   UTR                  S-
  :  a  TR                  U   OTR
                  UTR                  U   SS9v   My     g7f)r   r   valid)kernel_sizestridepaddingN)r   Conv1din_channelsmid_channels
num_layersout_channelsr!   ).0ikselfs      m/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/deprecated/mctct/modeling_mctct.py	<genexpr>1MCTCTConv1dSubsampler.__init__.<locals>.<genexpr>U   sz      	)
 4 II$%F  0A0A!0D()DOOa,?(?!!!$TEVEV{{1~ 4s   A?B)super__init__configconv_glu_dimglu_dimr   Dropoutconv_dropoutdropoutnum_conv_layersr&   input_feat_per_channelinput_channelsr$   conv_channels
ValueErrorr%   hidden_sizer'   conv_kernelr    conv_strider!   
ModuleList	enumerateconv_layersr+   r1   	__class__s   ` r,   r0   MCTCTConv1dSubsampler.__init__9   s    **zz&"5"56 00!886;P;PP??Q##+  
 !' 4 4D $D"..2!--((
 == 	)
 "$"2"23	)
 	
    c                    [        S U R                   5       5      n[        R                  R                  R                  USSX"4SS5      nUR                  SS5      R                  5       nU R                   HC  nU" U5      n[        R                  R                  X0R                  S9nU R                  U5      nME     UR                  SS5      R                  5       nU$ )Nc              3   *   #    U  H	  oS -  v   M     g7f)r   N )r(   sizes     r,   r-   0MCTCTConv1dSubsampler.forward.<locals>.<genexpr>c   s     =,<Dai,<s   r   constantr   r   dim)sumr    torchr   
functionalpad	transpose
contiguousrA   glur3   r6   )r+   input_featuresr"   hidden_statesconvs        r,   forwardMCTCTConv1dSubsampler.forward`   s     =D,<,<==,,00!QAY[eghi&00A6AAC$$D /MMM--m-NM LL7M %
 &//15@@BrE   )
r1   rA   r6   r3   r$   r    r%   r&   r'   r!   	__name__
__module____qualname____firstlineno____doc__r0   rX   __static_attributes____classcell__rC   s   @r,   r   r   3   s    
%
N rE   r   c                   8   ^  \ rS rSrSrU 4S jr SS jrSrU =r$ )MCTCTEmbeddingsp   zGConstruct the embeddings from word, position and token_type embeddings.c           	        > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        5       U l        [        R                  " UR                  5      U l        U R#                  S[$        R&                  " UR                  5      R)                  S5      SS9  U R#                  S[$        R*                  " U R,                  R/                  5       [$        R0                  U R,                  R2                  S9SS9  g )N)padding_idxposition_ids)r   F)
persistenttoken_type_idsdtypedevice)r/   r0   r   	Embedding
vocab_sizer<   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddingsMCTCTLayerNorm	LayerNormr4   hidden_dropout_probr6   register_bufferrO   arangeexpandzerosrh   rI   longrn   rB   s     r,   r0   MCTCTEmbeddings.__init__s   s   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"
 ()zz&"<"<= 	ELL)G)GHOOPWXej 	 	
 	KK))..0

4K\K\KcKcd 	 	
rE   c                    Ub  UR                  5       OUR                  5       S S nUS   nUc  U R                  S S 2XWU-   24   nUcv  [        U S5      (       a-  U R                  S S 2S U24   nUR	                  US   U5      n	U	nO8[
        R                  " U[
        R                  U R                  R                  S9nUc  U R                  U5      nU R                  U5      n
XJ-   nU R                  U5      nU R                  U5      nU$ )Nri   r   rk   r   rl   )rI   rh   hasattrrk   r|   rO   r}   r~   rn   rr   rv   rx   r6   )r+   rU   rk   rh   inputs_embedspast_key_values_lengthinput_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedrv   
embeddingss               r,   rX   MCTCTEmbeddings.forward   s    0>/In))+}OaOaOcdgegOh ^
,,Q0FVlIl0l-lmL
 !t-..*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00@M $ : :> J":
^^J/
\\*-
rE   )rx   r6   rt   rv   rr   )NNNNr   rZ   rb   s   @r,   rd   rd   p   s    Q
. wx rE   rd   c                   J   ^  \ rS rSrU 4S jrS rS rS r   SS jrSr	U =r
$ )	MCTCTSelfAttention   c                 ^  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        UR                  U l        U R                  U R                  -  U l        [        R                  " UR                  U R                  SS9U l        [        R                  " UR                  U R                  SS9U l        [        R                  " UR                  U R                  SS9U l        [        R                  " UR                  5      U l        UR"                  U l        [        R$                  " SUR"                  -  S	-
  U R                  5      U l        UR(                  U l        g )
Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()Fbiasr   r   )r/   r0   r<   num_attention_headsr   r;   attention_head_dimattention_head_sizeall_head_sizer   Linearquerykeyvaluer4   attention_probs_dropout_probr6   rs   ro   distance_embedding
is_decoderrB   s     r,   r0   MCTCTSelfAttention.__init__   s[    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #)#<#< !558P8PPYYv1143E3EER
99V//1C1C%PYYv1143E3EER
zz&"E"EF'-'E'E$"$,,q63Q3Q/QTU/UW[WoWo"p ++rE   c                     UR                  5       S S U R                  U R                  4-   nUR                  " U6 nUR	                  SSSS5      $ )Nri   r   r   r      )rI   r   r   viewpermute)r+   xnew_x_shapes      r,   transpose_for_scores'MCTCTSelfAttention.transpose_for_scores   sL    ffhsmt'?'?AYAY&ZZFFK yyAq!$$rE   c           	         [        UR                  5      S:  a4  UR                  " [        [	        [        UR                  5      5      5      6 nUR
                  " [        U5      6 R                  " [        [	        [        U5      5      5      6 $ )Nr   )lenshaper   reversedrangereshape)r+   r   r   s      r,   reshape_fortran"MCTCTSelfAttention.reshape_fortran   s^    qww<!		8E#agg,$789Ayy(5/*22HU3u:=N4OPPrE   c           	         UR                  SSSS5      nUR                  u  p#pE[        R                  " U[        R                  " X$XE4UR
                  S94SS9nU R                  XX4-   U-  SU/5      nUS S 2S XC-   S-
  U-  24   nU R                  XX4-   S-
  XE/5      nUS-  nUS S 2XfU-   24   R                  SS5      nUR                  SSSS5      $ )Nr   r   r   r   rn   rL   )r   r   rO   catr}   rn   r   rR   )r+   scoresbatchhidden_stateseq_lenheads	halfpoints          r,   "relative_position_embedding_rotate5MCTCTSelfAttention.relative_position_embedding_rotate   s    1a+.4ll+W FEKK0PY_YfYf$ghnop %%f|7MQX6XZ[]b.cd Cg4q8GCCCD %%fl6Lq6PRY.ab A%	97':::;EEaK~~aAq))rE   c                    U R                  U5      nU[        R                  " U R                  5      -  nU R	                  U R                  U5      5      nU R	                  U R                  U5      5      nU R	                  U5      n[        R                  " XR                  SS5      5      n	U R                  R                  n
[        R                  " SXR                  SS5      5      nU R                  U5      nX-   n	Ub  X-   n	[        R                  R!                  U	SS9nU R#                  U5      nUb  X-  n[        R                  " X5      nUR%                  SSSS5      R'                  SS	9nU(       a  X4nU$ U4nU$ )
Nri   zlh, bche -> bcler   r   rL   r   r   )	start_dim)r   mathsqrtr   r   r   r   rO   matmulrR   r   weighteinsumr   r   rP   softmaxr6   r   flatten)r+   rV   attention_mask	head_maskoutput_attentionsmixed_query_layer	key_layervalue_layerquery_layerattention_scorespositional_embeddingrelative_position_scoresattention_probscontext_layeroutputss                  r,   rX   MCTCTSelfAttention.forward   s|    !JJ}5-		$:R:R0SS--dhh}.EF	//

=0IJ//0AB !<<5H5HR5PQ  $66==#(<<0BDXZoZopqstZu#v #'#J#JKc#d +F%/@ --//0@b/I ,,7  -9O_B%--aAq9AABAO6G=2 O\M]rE   )
r   r   r   r6   r   r   rs   r   r   r   NNF)r[   r\   r]   r^   r0   r   r   r   rX   r`   ra   rb   s   @r,   r   r      s-    ,.%
Q
*8 . .rE   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )rw   i  c                    > [         TU ]  5         [        R                  " [        R
                  " S5      5      U l        [        R                  " [        R                  " S5      5      U l        g Nr   )	r/   r0   r   	ParameterrO   onessingleton_weightr}   singleton_bias)r+   rC   s    r,   r0   MCTCTLayerNorm.__init__  s@     "UZZ] ; ll5;;q>:rE   c                 8    XR                   -  U R                  -   $ N)r   r   r+   rV   s     r,   rX   MCTCTLayerNorm.forward  s     5 559L9LLLrE   )r   r   r[   r\   r]   r^   r0   rX   r`   ra   rb   s   @r,   rw   rw     s    ;
M MrE   rw   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )MCTCTSelfOutputi   c                 2  > [         TU ]  5         Xl        [        R                  " UR
                  UR
                  SS9U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g NFr   )eps)r/   r0   r1   r   r   r<   denserx   layer_norm_epsr4   ry   r6   rB   s     r,   r0   MCTCTSelfOutput.__init__!  sg    YYv1163E3EER
f&8&8f>S>STzz&"<"<=rE   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r6   rx   r+   rV   input_tensors      r,   rX   MCTCTSelfOutput.forward(  5    

=1]3}'CDrE   )rx   r1   r   r6   r   rb   s   @r,   r   r      s    > rE   r   c                   >   ^  \ rS rSrU 4S jrS r   SS jrSrU =r$ )MCTCTAttentioni/  c                    > [         TU ]  5         [        U5      U l        [	        U5      U l        [        5       U l        g r   )r/   r0   r   r+   r   outputsetpruned_headsrB   s     r,   r0   MCTCTAttention.__init__0  s0    &v.	%f-ErE   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   rL   )r   r   r+   r   r   r   r   r   r   r   r   r   r   union)r+   r   indexs      r,   prune_headsMCTCTAttention.prune_heads6  s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rE   c                 j    U R                  UUUU5      nU R                  US   U5      nU4USS  -   nU$ )Nr   r   )r+   r   )r+   rV   r   r   r   self_outputsattention_outputr   s           r,   rX   MCTCTAttention.forwardH  sN     yy	
  ;;|AF#%QR(88rE   )r   r   r+   r   )	r[   r\   r]   r^   r0   r   rX   r`   ra   rb   s   @r,   r   r   /  s"    ";*  rE   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )MCTCTIntermediatei[  c                   > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g )NFr   )r/   r0   r   r   r<   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnrB   s     r,   r0   MCTCTIntermediate.__init__\  sc    YYv1163K3KRWX
f''--'-f.?.?'@D$'-'8'8D$rE   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r  r   s     r,   rX   MCTCTIntermediate.forwardd  s&    

=100?rE   r  r   rb   s   @r,   r   r   [  s    9 rE   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )MCTCTOutputij  c                 &  > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r/   r0   r   r   r   r<   r   rx   r   r4   ry   r6   rB   s     r,   r0   MCTCTOutput.__init__k  sc    YYv779K9KRWX
f&8&8f>S>STzz&"<"<=rE   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      r,   rX   MCTCTOutput.forwardq  r   rE   )rx   r   r6   r   rb   s   @r,   r
  r
  j  s    > rE   r
  c                   F   ^  \ rS rSrS\4U 4S jjr   SS jrS rSrU =r	$ )
MCTCTLayerix  r1   c                    > [         TU ]  5         SU l        UR                  U l        [	        U5      U l        [        U5      U l        UR                  U l        [        U5      U l
        g r   )r/   r0   seq_len_dimchunk_size_feed_forwardr   intermediater   	attentionr   r
  r   rB   s     r,   r0   MCTCTLayer.__init__y  sV    '-'E'E$-f5'/ ++!&)rE   c                     U R                  XX4S9nUS   nUSS  n[        U R                  U R                  U R                  U5      nU4U-   nU$ )N)r   r   r   )r  r   feed_forward_chunkr  r  )	r+   rV   r   r   r   self_attention_outputsr   r   layer_outputs	            r,   rX   MCTCTLayer.forward  so     "&9 "0 "
 2!4(,0##T%A%A4CSCSUe
  /G+rE   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r  r   )r+   r   intermediate_outputr  s       r,   r  MCTCTLayer.feed_forward_chunk  s)    "//0@A{{#6IrE   )r  r  r  r   r   r  r   )
r[   r\   r]   r^   r   r0   rX   r  r`   ra   rb   s   @r,   r  r  x  s)    	*{ 	* * rE   r  c                   ^    \ rS rSr% Sr\\S'   SrSrSr	S r
S\R                  4S	 jrS
 rSrg)MCTCTPreTrainedModeli  zz
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
r1   mctctrU   Tc                    U R                   R                  n[        U[        R                  5      (       aW  UR
                  R                  R                  SUS9  UR                  b$  UR                  R                  R                  5         GOI[        U[        R                  5      (       ac  UR
                  R                  R                  SUS9  UR                  b1  UR
                  R                  UR                     R                  5         O[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR
                  R                  R                  S5        O^[        U[        5      (       aI  UR                  R                  R                  S5        UR                   R                  R                  5         [        U[        R                  [        R"                  45      (       aW  UR
                  R                  R                  SUS9  UR                  b%  UR                  R                  R                  5         ggg)zInitialize the weightsg        )meanstdNg      ?)r1   initializer_ranger  r   r   r   datanormal_r   zero_ro   rg   rx   fill_rw   r   r   r#   )r+   moduler$  s      r,   _init_weights"MCTCTPreTrainedModel._init_weights  s   kk++fbii(( MM&&CS&9{{&  &&(--MM&&CS&9!!-""6#5#56<<>--KK""$MM$$S)//##((..s3!!&&,,.fryy"))455MM&&CS&9{{&  &&( ' 6rE   input_lengthsc                 "   Sn[        [        U R                  R                  5      U R                  R                  U R                  R
                  5       H6  u  p4nUS-  nUSU-  -   X$S-
  -  -
  S-
  n[        R                  " XSS9S-   nM8     U$ )z8
Computes the output length of the convolutional layers
r   r   trunc)rounding_mode)zipr   r1   r7   r=   r>   rO   div)r+   r-  dilation_	kernel_szr!   r"   s          r,    _get_feat_extract_output_lengths5MCTCTPreTrainedModel._get_feat_extract_output_lengths  s     $'$++--.0G0GI`I`%
 A&  1nG)AK7(RSm:TTWXXM!IIm7SVWWM%
 rE   c                    [        UR                  5      S:  a  US S 2S S 2S4   nU R                  UR                  S5      5      nUR	                  5       S   n[
        R                  " XA4UR                  UR                  S9nSU[
        R                  " XBR                  S9US-
  4'   UR                  S/5      R                  S5      R                  S/5      R                  5       nU$ )Nr   ri   r   rl   r   r   )r   r   r6  rN   rI   rO   r}   rm   rn   r{   flipcumsumr~   )r+   feature_vector_lengthr   subsampled_lengthsbszs        r,   "_get_feature_vector_attention_mask7MCTCTPreTrainedModel._get_feature_vector_attention_mask  s     ~##$q(+Aq"H5N "BB>CUCUVXCYZ!!#A&(0D0D^MbMb
 efS1F1FGI[^_I_`a',,bT299"=BBB4HMMOrE   rH   N)r[   r\   r]   r^   r_   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointingr+  rO   
LongTensorr6  r>  r`   rH   rE   r,   r   r     s<    
 &O&*#)0e>N>N rE   r   aH  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`MCTCTConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_features (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`Wav2Vec2CTCTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
c                      ^  \ rS rSrS\4U 4S jjr   SS\R                  S\R                  S\R                  S\S\S	\S
\	\
\4   4S jjrSrU =r$ )MCTCTEncoderi  r1   c                 ,  > [         TU ]  U5        UR                  U l        [        5       U l        [        U5      U l        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l        g s  snf )NF)r/   r0   ry   rw   
layer_normr   rW   r   r?   r   num_hidden_layersr  layersgradient_checkpointing)r+   r1   r4  rC   s      r,   r0   MCTCTEncoder.__init__  ss     #)#=#= (*)&1	mmvG_G_A`$aA`AZ%7A`$ab&+# %bs   (BrU   r   r   r   output_hidden_statesreturn_dictreturnc                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nU R                  U5      nUb  U R                  UR                  S   U5      n[        R                  R                  XpR                  U R                  S9nUb  [        X'R                  5      nU(       a  SOS n	U(       a  SOS n
Ub`  UR                  5       S   [!        U R"                  5      :w  a6  [%        S[!        U R"                  5       SUR                  5       S    S35      e['        5       =(       d    [)        U 5      n[+        U R"                  5       H  u  pU(       a  X4-   n	[,        R.                  " / 5      nU R                  =(       a    XR                   R0                  :  nU(       a  U(       a  U" UUUS9nUS   nU(       a  S	nU(       d  M  U
WS   4-   n
M     U(       a  X4-   n	U(       d  [3        S
 XU
4 5       5      $ [5        XU
S9$ )Nr   )ptrainingrH   r   z&The head_mask should be specified for z layers, but it is for .)rV   r   r   )NNc              3   .   #    U  H  oc  M  Uv   M     g 7fr   rH   )r(   vs     r,   r-   'MCTCTEncoder.forward.<locals>.<genexpr>`  s     e$Sq$Ss   	last_hidden_staterV   
attentions)r1   r   rM  use_return_dictrH  rW   r>  r   r   rP   r6   ry   rR  r   rm   rI   r   rJ  r;   r   r   r@   rO   rand	layerdroptupler   )r+   rU   r   r   r   rM  rN  r   rV   encoder_statesall_attentionssynced_gpusidxencoder_layerdropout_probabilityskip_the_layerlayer_outputss                    r,   rX   MCTCTEncoder.forward  s,    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]8		.1 %!DD]EXEXYZE[]klN--m?W?Wbfbobo-p %7H[H[\N30d  ~~"c$++&66 <S=M<N O%%.^^%5a%8$9< 
 12R6LT6R"+DKK"8C#!/2B!B #(**R.!]]Z/B[[EZEZ/ZN![ -"/#1&7! !.a 0 ,  !/=3C2E!E- #90  +.>>Ne]N$Seee+Vd
 	
rE   )rW   rK  ry   rH  rJ  )FFT)r[   r\   r]   r^   r   r0   rO   Tensorboolr   r]  r   rX   r`   ra   rb   s   @r,   rF  rF    s    ,{ , #(%* I
I
 I
 <<	I

  I
 #I
 I
 
uo%	&I
 I
rE   rF  zaThe bare M-CTC-T Model transformer outputting raw hidden-states without any specific head on top.c                     ^  \ rS rSrU 4S jr\" \R                  S5      5      \" \	\
\S\S9     SS\R                  S\\R                     S\\R                     S	\\   S
\\   S\\   S\\\
4   4S jj5       5       rSrU =r$ )
MCTCTModelif  c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g r   )r/   r0   r1   rF  encoder	post_initrB   s     r,   r0   MCTCTModel.__init__k  s-     #F+ 	rE   zbatch_size, sequence_lengthaudio)
checkpointoutput_typeconfig_classmodalityexpected_outputrU   r   r   r   rM  rN  rO  c           	      P   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  UUUUUUS9nUS   nU(       d	  U4USS  -   $ [        UUR                  UR                  S9$ )Nz#You have to specify input_features.r   r   r   rM  rN  r   r   rW  )	r1   r   rM  rZ  r;   rl  r   rV   rY  )	r+   rU   r   r   r   rM  rN  encoder_outputssequence_outputs	            r,   rX   MCTCTModel.forwardt  s    " 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]!BCC,,)/!5# ' 
 *!,#%(;;;-)77&11
 	
rE   )r1   rl  )NNNNN)r[   r\   r]   r^   r0   r
   MCTCT_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPErO   rg  r   rh  r   r]  rX   r`   ra   rb   s   @r,   rj  rj  f  s    
 ++A+H+HIf+gh&#$. 26,0,0/3&*#
#
 !.#
 ELL)	#

 $D>#
 'tn#
 d^#
 
uo%	&#
 i#
rE   rj  zcMCTCT Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).c                     ^  \ rS rSrU 4S jr\" \5      \" \\	\
\\S9      SS\R                  S\\R                     S\\R                     S\\   S\\   S	\\   S
\\R"                     S\\\	4   4S jj5       5       rSrU =r$ )MCTCTForCTCi  c                   > [         TU ]  U5        [        U5      U l        UR                  c  [        SU R                   S35      eUR                  n[        R                  " X!R                  5      U l
        U R                  5         g )NzYou are trying to instantiate z with a configuration that does not define the vocabulary size of the language model head. Please instantiate the model as follows: `MCTCTForCTC.from_pretrained(..., vocab_size=vocab_size)`. or define `vocab_size` of your model's configuration.)r/   r0   rj  r!  rp   r;   rC   r<   r   r   ctc_headrm  )r+   r1   output_hidden_sizerC   s      r,   r0   MCTCTForCTC.__init__  s     '
$00@ AH H  $//		"46G6GH 	rE   )rp  rq  rr  rt  expected_lossrU   r   r   r   rM  rN  labelsrO  c                    UbJ  UR                  5       U R                  R                  :  a"  [        SU R                  R                   35      eUb  UOU R                  R                  nU R                  UUUUUUS9nUS   n	U R                  U	5      n
SnUGbe  Ub  UO/[        R                  " UR                  SS [        R                  S9nU R                  UR                  S5      5      R                  [        R                  5      nUS:  nUR                  S5      nUR                  U5      n[        R                   R#                  U
S[        R$                  S9R'                  SS5      n[        R(                  R*                  R-                  S	S
9   [        R                   R/                  UUUUU R                  R0                  U R                  R2                  U R                  R4                  S9nSSS5        U(       d  U
4U[6        S -   nUb  U4U-   $ U$ [9        XUR:                  UR<                  S9$ ! , (       d  f       NL= f)a  
labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
    Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to
    the sequence length of the output logits. Indices are selected in `[-100, 0, ..., config.vocab_size - 1]`.
    All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
    config.vocab_size - 1]`.
Nz$Label values must be <= vocab_size: rv  r   ri   )rm   )rM   rm   r   F)enabled)blank	reductionzero_infinity)losslogitsrV   rY  )maxr1   rp   r;   rZ  r!  r  rO   r   r   r~   r6  rN   tomasked_selectr   rP   log_softmaxfloat32rR   backendscudnnflagsctc_lossrq   ctc_loss_reductionctc_zero_infinity_HIDDEN_STATES_START_POSITIONr   rV   rY  )r+   rU   r   r   r   rM  rN  r  r   rV   r  r  r-  labels_masktarget_lengthsflattened_targets	log_probsr   s                     r,   rX   MCTCTForCTC.forward  s    2 &**,$++2H2H"HCDKKDZDZC[\]]%0%<k$++B]B]**)/!5#  
  
}- "- ZZ 4 4Sb 9L 
 !AA.BTBTUWBXY\\]b]g]ghM !A+K(__R0N & 4 4[ A 11&b1V``abdefI%%++E+:}}--%!"++22"kk<<"&++"?"? .  ; Y)F)G!HHF)-)9TGf$EvEG4I4IV]VhVh
 	
 ;:s   A H<<
I
)r  r!  )NNNNNN)r[   r\   r]   r^   r0   r
   rz  r   r|  r   r}  _CTC_EXPECTED_OUTPUT_CTC_EXPECTED_LOSSrO   rg  r   rh  rD  r   r]  rX   r`   ra   rb   s   @r,   r  r    s    
& ++AB&"$,( 26,0,0/3&*-1E
E
 !.E
 ELL)	E

 $D>E
 'tnE
 d^E
 ))*E
 
un$	%E
 CE
rE   r  )r  rj  r   )<r_   r   typingr   r   rO   r   activationsr   
file_utilsr   r	   r
   integrations.deepspeedr   integrations.fsdpr   modeling_attn_mask_utilsr   modeling_layersr   modeling_outputsr   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   configuration_mctctr   
get_loggerr[   loggerr  r}  r|  r~  r  r  Moduler   rd   r   rw   r   r   r   r
  r  r   MCTCT_START_DOCSTRINGrz  rF  rj  r  __all__rH   rE   r,   <module>r     s     "   " r r A 8 C : @ . m m  , 
		H	% !  2 '  t  :BII :z7bii 7ti iXMRYY Mbii )RYY )X		 ")) $+ $NB? BJ	  @T
' T
n g5
% 5
	5
p ma
& a
	a
H @rE   