
    bivN                       S SK Jr  S SKrS SKJr  S SKJr  S SKJ	r	J
r
JrJrJrJr  S SKrS SKJrJrJrJrJrJrJrJr J!r"J#r$  S SK%J&r'J(r)J*r+  S SK,J-r-   S S	K.J/r0  / S
Qr2\" S/ SQ5      r3S r4S r5 " S S\5      r6 " S S\5      r7 " S S\5      r8 " S S5      r9SSSSSSSS \:" S5      S SSSS4                                 S"S jjr;S#S  jr<S#S! jr=g! \1 a     S S	KJ/r0   N! \1 a    Sr0  Nf = ff = f)$    )annotationsN)abstractmethod)
namedtuple)DictList
NamedTupleOptionalTupleUnion)
CriterionTypeLexiconDecoderLexiconDecoderOptionsLexiconFreeDecoderLexiconFreeDecoderOptionsLMLMStateSmearingModeTrieZeroLM)create_word_dict
Dictionary
load_words)_download_asset)KenLM)CTCHypothesis
CTCDecoderCTCDecoderLMCTCDecoderLMStatectc_decoderdownload_pretrained_filesPretrainedFileslexicontokenslmc                   U R                  5       n[        XT5      nUR                  S5      nUR                  5        Hd  u  pUR	                  U5      n
UR                  Xz5      u  pU	 H5  nU Vs/ s H  oR	                  U5      PM     nnUR                  XU5        M7     Mf     UR                  [        R                  5        U$ s  snf )NF)

index_size_Triestartitems	get_indexscoreinsertsmear_SmearingModeMAX)tokens_dict	word_dictr#   r%   silence
vocab_sizetriestart_stateword	spellingsword_idx_r,   spellingtokenspelling_idxs                   `/home/james-whalen/.local/lib/python3.13/site-packages/torchaudio/models/decoder/_ctc_decoder.py_construct_trier?   2   s    '')J%D((5/K"==?&&t,88K2!HFNOhU11%8hLOKK6 " + 	JJ}  !K Ps   /Cc                J   S nUb  [        U5      nU (       a  Uc  [        U 5      nU$ U (       do  Ucl  [        U5      [        L aZ  [	        UR                  5       5       Vs0 s H%  ocR                  U5      UR                  U5      //_M'     nnU//Xt'   [        U5      nU$ s  snf N)_Dictionary_create_word_dicttypestrranger'   	get_entry)r#   r%   lm_dictr1   unk_wordr2   ids           r>   _get_word_dictrL   A   s    I(	9$%g.	  *tBx3MRS^SiSiSkMlmMl""1%)>)>q)A(B'CCMlm zl%a(		 ns   ,B c                  H    \ rS rSr% SrS\S'    S\S'    S\S'    S	\S
'   Srg)r   P   zORepresents hypothesis generated by CTC beam search decoder :class:`CTCDecoder`.torch.LongTensorr$   z	List[str]wordsfloatr,   torch.IntTensor	timesteps N)__name__
__module____qualname____firstlineno____doc____annotations____static_attributes__rT       r>   r   r   P   s,    Zh L+nr\   r   c                  T   ^  \ rS rSrSr\SU 4S jj5       rSU 4S jjrS	S jrSr	U =r
$ )
r   e   zLanguage model state.c                   > [         TU ]  $ )zMap of indices to LM states)superchildren)self	__class__s    r>   ra   CTCDecoderLMState.childrenh   s     wr\   c                "   > [         TU ]  U5      $ )zReturns child corresponding to usr_index, or creates and returns a new state if input index
is not found.

Args:
    usr_index (int): index corresponding to child state

Returns:
    CTCDecoderLMState: child state corresponding to usr_index
)r`   child)rb   	usr_indexrc   s     r>   rf   CTCDecoderLMState.childm   s     w}Y''r\   c                    g)zCompare two language model states.

Args:
    state (CTCDecoderLMState): LM state to compare against

Returns:
    int: 0 if the states are the same, -1 if self is less, +1 if self is greater.
NrT   rb   states     r>   compareCTCDecoderLMState.comparey   s     	r\   rT   )returnzDict[int, CTCDecoderLMState])rg   intrn   r   )rk   r   rn   r   )rU   rV   rW   rX   rY   propertyra   rf   rl   r[   __classcell__)rc   s   @r>   r   r   e   s'       
(	 	r\   r   c                  T    \ rS rSrSr\SS j5       r\S	S j5       r\S
S j5       rSr	g)r      zVLanguage model base class for creating custom language models to use with the decoder.c                    [         e)zInitialize or reset the language model.

Args:
    start_with_nothing (bool): whether or not to start sentence with sil token.

Returns:
    CTCDecoderLMState: starting state
NotImplementedError)rb   start_with_nothings     r>   r)   CTCDecoderLM.start   s
     "!r\   c                    [         e)a(  Evaluate the language model based on the current LM state and new word.

Args:
    state (CTCDecoderLMState): current LM state
    usr_token_idx (int): index of the word

Returns:
    (CTCDecoderLMState, float)
        CTCDecoderLMState:
            new LM state
        float:
            score
ru   )rb   rk   usr_token_idxs      r>   r,   CTCDecoderLM.score   s
     "!r\   c                    [         e)zEvaluate end for language model based on current LM state.

Args:
    state (CTCDecoderLMState): current LM state

Returns:
    (CTCDecoderLMState, float)
        CTCDecoderLMState:
            new LM state
        float:
            score
ru   rj   s     r>   finishCTCDecoderLM.finish   s
     "!r\   rT   N)rw   boolrn   r   )rk   r   rz   ro   rn   Tuple[CTCDecoderLMState, float])rk   r   rn   r   )
rU   rV   rW   rX   rY   r   r)   r,   r}   r[   rT   r\   r>   r   r      s?    `	" 	" " "  " "r\   r   c                      \ rS rSrSr                    SS jrSS jrSS jrS rS r	SS jr
SS	 jrSS
 jr S     SS jjrSS jrSrg)r      zCTC beam search decoder from *Flashlight* :cite:`kahn2022flashlight`.

.. devices:: CPU

Note:
    To build the decoder, please use the factory function :func:`ctc_decoder`.
c
           
     x   Xl         X0l        X@l        U R                  R                  U5      U l        U R                  R                  U5      n
/ nU(       aB  [        XCX%U
5      nUR                  U	5      n	Sn[        UUUU
U R                  U	UU5      U l        O[        XeXR                  U5      U l        XPl	        g)a  
Args:
    nbest (int): number of best decodings to return
    lexicon (Dict or None): lexicon mapping of words to spellings, or None for lexicon-free decoder
    word_dict (_Dictionary): dictionary of words
    tokens_dict (_Dictionary): dictionary of tokens
    lm (CTCDecoderLM): language model. If using a lexicon, only word level LMs are currently supported
    decoder_options (_LexiconDecoderOptions or _LexiconFreeDecoderOptions):
        parameters used for beam search decoding
    blank_token (str): token corresopnding to blank
    sil_token (str): token corresponding to silence
    unk_word (str): word corresponding to unknown
FN)
nbestr2   r1   r+   blankr?   _LexiconDecoderdecoder_LexiconFreeDecoderr%   )rb   r   r#   r2   r1   r%   decoder_optionsblank_token	sil_tokenrI   r3   transitionsr5   token_lms                 r>   __init__CTCDecoder.__init__   s    4 
"&%%//<
"",,Y7";7PD **84HH*

	DL /GZZYdeDL r\   c                   ^  S [         R                  " U5       5       n[        U 4S jU5      n[        R                  " [        U5      5      $ )Nc              3  *   #    U  H	  oS    v   M     g7f)r   NrT   ).0gs     r>   	<genexpr>)CTCDecoder._get_tokens.<locals>.<genexpr>   s     /.!.s   c                "   > U TR                   :g  $ rA   )r   )xrb   s    r>   <lambda>(CTCDecoder._get_tokens.<locals>.<lambda>   s    TZZr\   )itgroupbyfiltertorch
LongTensorlist)rb   idxss   ` r>   _get_tokensCTCDecoder._get_tokens   s8    /bjj.//6T
++r\   c                    / n[        U5       H:  u  p4X@R                  :X  a  M  US:X  d  XAUS-
     :w  d  M)  UR                  U5        M<     [        R                  " U5      $ )z8Returns frame numbers corresponding to non-blank tokens.r      )	enumerater   appendr   	IntTensor)rb   r   rS   rJ   idxs        r>   _get_timestepsCTCDecoder._get_timesteps   sY     	oFAjj AvQU+  #	 &
 y))r\   c                8    U R                   R                  5         g)zInitialize the internal state of the decoder.

See :py:meth:`decode_step` for the usage.

.. note::

   This method is required only when performing online decoding.
   It is not necessary when performing batch decoding with :py:meth:`__call__`.
N)r   decode_beginrb   s    r>   r   CTCDecoder.decode_begin  s     	!!#r\   c                8    U R                   R                  5         g)zFinalize the internal state of the decoder.

See :py:meth:`decode_step` for the usage.

.. note::

   This method is required only when performing online decoding.
   It is not necessary when performing batch decoding with :py:meth:`__call__`.
N)r   
decode_endr   s    r>   r   CTCDecoder.decode_end  s     	!r\   c                   UR                   [        R                  :w  a  [        S5      eUR                  (       d  [        S5      eUR                  5       (       d  [        S5      eUR                  S:w  a  [        SUR                   35      eUR                  5       u  p#U R                  R                  UR                  5       X#5        g)a  Perform incremental decoding on top of the curent internal state.

.. note::

   This method is required only when performing online decoding.
   It is not necessary when performing batch decoding with :py:meth:`__call__`.

Args:
    emissions (torch.FloatTensor): CPU tensor of shape `(frame, num_tokens)` storing sequences of
        probability distribution over labels; output of acoustic model.

Example:
    >>> decoder = torchaudio.models.decoder.ctc_decoder(...)
    >>> decoder.decode_begin()
    >>> decoder.decode_step(emission1)
    >>> decoder.decode_step(emission2)
    >>> decoder.decode_end()
    >>> result = decoder.get_final_hypothesis()
emissions must be float32.emissions must be a CPU tensor.emissions must be contiguous.   zemissions must be 2D. Found N)dtyper   float32
ValueErroris_cpuRuntimeErroris_contiguousndimshapesizer   decode_stepdata_ptr)rb   	emissionsTNs       r>   r   CTCDecoder.decode_step   s    ( ??emm+9::@AA&&((>??>>Q!=ioo=NOPP~~  !3!3!5q<r\   c                D   U VVs/ s H  n[        U R                  UR                  5      UR                   Vs/ s H%  o3S:  d  M
  U R                  R                  U5      PM'     snUR                  U R                  UR                  5      S9PM     snn$ s  snf s  snnf )Nr   )r$   rP   r,   rS   )r   r   r$   rP   r2   rG   r,   r   )rb   resultsresultr   s       r>   _to_hypoCTCDecoder._to_hypoC  s     "
 " ''6<BLLSLqQRF2t~~//2LSll--fmm<	 "
 	
 T
s   3B	BB%.BBc                r    U R                   R                  5       nU R                  USU R                   5      $ )a  Get the final hypothesis

Returns:
    List[CTCHypothesis]:
        List of sorted best hypotheses.

.. note::

   This method is required only when performing online decoding.
   It is not necessary when performing batch decoding with :py:meth:`__call__`.
N)r   get_all_final_hypothesisr   r   )rb   r   s     r>   get_final_hypothesisCTCDecoder.get_final_hypothesisN  s0     ,,779}}W\tzz233r\   Nc                   UR                   [        R                  :w  a  [        S5      eUR                  (       d  [        S5      eUR                  5       (       d  [        S5      eUR                  S:w  a  [        SUR                   35      eUb  UR                  (       d  [        S5      eUR                  5       u  p4nUc  [        R                  " U4U5      nSn/ n[        U5       Hv  nUR                  5       Xh-  UR                  S	5      -  -   n	U R                  R                  XU   U5      n
UR!                  U R#                  U
SU R$                   5      5        Mx     U$ )
a{  
Performs batched offline decoding.

.. note::

   This method performs offline decoding in one go. To perform incremental decoding,
   please refer to :py:meth:`decode_step`.

Args:
    emissions (torch.FloatTensor): CPU tensor of shape `(batch, frame, num_tokens)` storing sequences of
        probability distribution over labels; output of acoustic model.
    lengths (Tensor or None, optional): CPU tensor of shape `(batch, )` storing the valid length of
        in time axis of the output Tensor in each batch.

Returns:
    List[List[CTCHypothesis]]:
        List of sorted best hypotheses for each audio sequence in the batch.
r   r   r      zemissions must be 3D. Found Nzlengths must be a CPU tensor.   r   )r   r   r   r   r   r   r   r   r   r   fullrF   r   strider   decoder   r   r   )rb   r   lengthsBr   r   float_byteshyposbemissions_ptrr   s              r>   __call__CTCDecoder.__call__]  s2   , ??emm+9::@AA&&((>??>>Q!=ioo=NOPPw~~>??.."a?jj!q)GqA%..0;?YEUEUVWEX3XXMll))-QGGLLw|'<=>  r\   c                ~    U Vs/ s H+  o R                   R                  UR                  5       5      PM-     sn$ s  snf )z
Map raw token IDs into corresponding tokens

Args:
    idxs (LongTensor): raw token IDs generated from decoder

Returns:
    List: tokens corresponding to the input IDs
)r1   rG   item)rb   r   r   s      r>   idxs_to_tokensCTCDecoder.idxs_to_tokens  s3     CGG$3  **388:6$GGGs   2:)r   r   r%   r   r1   r2   )r   ro   r#   zOptional[Dict]r2   rB   r1   rB   r%   r   r   z9Union[_LexiconDecoderOptions, _LexiconFreeDecoderOptions]r   rE   r   rE   rI   rE   rn   None)r   rR   rn   rO   )r   rR   rn   rR   )r   torch.FloatTensor)rn   zList[CTCHypothesis]rA   )r   r   r   zOptional[torch.Tensor]rn   zList[List[CTCHypothesis]])r   rO   rn   r   )rU   rV   rW   rX   rY   r   r   r   r   r   r   r   r   r   r   r[   rT   r\   r>   r   r      s    77  7 	7
 !7 7 S7 7 7 7 
7r,
	*
$
"!=F	
4  OS0*05K0	"0d
Hr\   r   r   2   r   z-infF-|z<unk>c                   Ub  [        U5      [        La  [        S5      e[        U5      nU (       aB  [	        U 5      n [        UU=(       d    UR                  5       UUU	U
UU[        R                  S9	nO4[        UU=(       d    UR                  5       UUUU[        R                  S9n[        XUUU5      n[        U5      [        L a  [        c  [        S5      e[        UU5      nOUc
  [        5       n[        UU UUUUUUUS9	$ )a  Builds an instance of :class:`CTCDecoder`.

Args:
    lexicon (str or None): lexicon file containing the possible words and corresponding spellings.
        Each line consists of a word and its space separated spelling. If `None`, uses lexicon-free
        decoding.
    tokens (str or List[str]): file or list containing valid tokens. If using a file, the expected
        format is for tokens mapping to the same index to be on the same line
    lm (str, CTCDecoderLM, or None, optional): either a path containing KenLM language model,
        custom language model of type `CTCDecoderLM`, or `None` if not using a language model
    lm_dict (str or None, optional): file consisting of the dictionary used for the LM, with a word
        per line sorted by LM index. If decoding with a lexicon, entries in lm_dict must also occur
        in the lexicon file. If `None`, dictionary for LM is constructed using the lexicon file.
        (Default: None)
    nbest (int, optional): number of best decodings to return (Default: 1)
    beam_size (int, optional): max number of hypos to hold after each decode step (Default: 50)
    beam_size_token (int, optional): max number of tokens to consider at each decode step.
        If `None`, it is set to the total number of tokens (Default: None)
    beam_threshold (float, optional): threshold for pruning hypothesis (Default: 50)
    lm_weight (float, optional): weight of language model (Default: 2)
    word_score (float, optional): word insertion score (Default: 0)
    unk_score (float, optional): unknown word insertion score (Default: -inf)
    sil_score (float, optional): silence insertion score (Default: 0)
    log_add (bool, optional): whether or not to use logadd when merging hypotheses (Default: False)
    blank_token (str, optional): token corresponding to blank (Default: "-")
    sil_token (str, optional): token corresponding to silence (Default: "|")
    unk_word (str, optional): word corresponding to unknown (Default: "<unk>")

Returns:
    CTCDecoder: decoder

Example
    >>> decoder = ctc_decoder(
    >>>     lexicon="lexicon.txt",
    >>>     tokens="tokens.txt",
    >>>     lm="kenlm.bin",
    >>> )
    >>> results = decoder(emissions) # List of shape (B, nbest) of Hypotheses
z!lm_dict must be None or str type.)		beam_sizebeam_size_tokenbeam_threshold	lm_weight
word_score	unk_score	sil_scorelog_addcriterion_type)r   r   r   r   r   r   r   zflashlight-text is installed, but KenLM is not installed. Please refer to https://github.com/kpu/kenlm#python-module for how to install it.)	r   r#   r2   r1   r%   r   r   r   rI   )rD   rE   r   rB   _load_words_LexiconDecoderOptionsr'   _CriterionTypeCTC_LexiconFreeDecoderOptionsrL   _KenLMr   _ZeroLMr   )r#   r$   r%   rH   r   r   r   r   r   r   r   r   r   r   r   rI   r1   r   r2   s                      r>   r   r     s    r tG}C7<==f%K g&0+G{/E/E/G)!)--

 5+G{/E/E/G))--
 wG[(KIBx3>d  B	"	Y'
 
r\   c                p    U S;  a  [        U  S35      eSU  3n[        U S3U S3U S:w  a  U S3S9$ S S9$ )	N)librispeechzlibrispeech-3-gramzlibrispeech-4-gramzZ not supported. Must be one of ['librispeech-3-gram', 'librispeech-4-gram', 'librispeech']zdecoder-assets/z/lexicon.txtz/tokens.txtr   z/lm.binr"   )r   _PretrainedFiles)modelprefixs     r>   _get_filenamesr     ss    OOgop
 	
 ug&F(,'%!&-!7fXW  >B r\   c                    [        U 5      n[        UR                  5      n[        UR                  5      nUR                  b  [        UR                  5      nOSn[        UUUS9$ )a!  
Retrieves pretrained data files used for :func:`ctc_decoder`.

Args:
    model (str): pretrained language model to download.
        Valid values are: ``"librispeech-3-gram"``, ``"librispeech-4-gram"`` and ``"librispeech"``.

Returns:
    Object with the following attributes

        * ``lm``: path corresponding to downloaded language model,
          or ``None`` if the model is not associated with an lm
        * ``lexicon``: path corresponding to downloaded lexicon file
        * ``tokens``: path corresponding to downloaded tokens file
Nr"   )r   r   r#   r$   r%   r   )r   fileslexicon_filetokens_filelm_files        r>   r    r      s]    " 5!E"5==1L!%,,/Kxx!%((+ r\   )"r#   Optional[str]r$   zUnion[str, List[str]]r%   zUnion[str, CTCDecoderLM]rH   r  r   ro   r   ro   r   zOptional[int]r   rQ   r   rQ   r   rQ   r   rQ   r   rQ   r   r   r   rE   r   rE   rI   rE   rn   r   )r   rE   rn   r   )>
__future__r   	itertoolsr   abcr   collectionsr   typingr   r   r   r	   r
   r   r   flashlight.lib.text.decoderr   r   r   r   r   r   r   r   r   r   r   _LMr   _LMStater   r/   r   r(   r   r   flashlight.lib.text.dictionaryr   rC   r   rB   r   r   torchaudio.utilsr   !flashlight.lib.text.decoder.kenlmr   r   	Exception__all__r   r?   rL   r   r   r   r   rQ   r   r   r    rT   r\   r>   <module>r     s   "   " A A    
 -A /1LM oJ o* @."3 ."bcH cHR $(!%)V}!nn!n 	!n 	n
 n n #n n n n n n n n n  !n" #nbw  ? s*   C! !D (C00C<8D ;C<<D 