
    C1igS              )       0   S SK r S SKrS SKrS SKrS SKrS SKJrJrJrJ	r	J
r
Jr  S SKJrJrJrJrJrJr  S r   S6S\S\\	\      S\
\\	\         S\S	\S
\\   4S jjr  S7S\S\\	\      S\\	\      S\S	\S
\\   4S jjr  S8S\S\\	\      S\S	\S
\\   4
S jjr  S7S\S\\	\      S\S	\S
\\   4
S jjr S9SSSSSSSS SSSSSS.S\S\	\   S\
\	\      S\S\S\S\S\S\S\S\S \S!\
\	\	\         S"\
\\\	\   \	\   4      S#\S$\S
\\   4"S% jjjr  S:S&S SSSSSS SSSSS'SS(.S\S)\\	\   \	\	\      4   S\S	\S*\S+\S\S\S\S\S\S\S \S!\
\	\	\         S"\
\\\	\   \	\   4      S,\
\	\      S-\S.\\/\4   S
\\   4&S/ jjjr  S:S&S SSSSSS SSSSS'SS(.S\S)\\	\   \	\	\      4   S\S	\S*\S+\S\S\S\S\S\S\S \S!\
\	\	\         S"\
\\\	\   \	\   4      S,\
\	\      S-\S.\\/\4   S
\\   4&S0 jjjr " S1 S25      rS3 r S4 r!S5 r"g);    N)AsyncIterableCallableIterableListOptionalUnion)GenerationResultGenerationStepResult	GeneratorScoringResultTranslationResult
Translatorc                  *   [        [        S[        5        [        [        S[        5        [        [        S[        5        [        [
        S[        5        [        [
        S[        5        [        [
        S[        5        [        [
        S[        5        g)z4Registers additional attributes to compiled modules.translate_iterablescore_iterablegenerate_tokensgenerate_iterableasync_generate_tokensN)
setattrr   translator_translate_iterabletranslator_score_iterabletranslator_generate_tokensr   generator_generate_iterablegenerator_score_iterablegenerator_generate_tokensgenerator_async_generate_tokens     P/home/james-whalen/.local/lib/python3.13/site-packages/ctranslate2/extensions.pyregister_extensionsr       sd    J,.KLJ(*CDJ)+EFI*,GHI')ABI(*CDI.0OPr   
translatorsourcetarget_prefixmax_batch_size
batch_typereturnc              +      #    U/nUb  UR                  U5        [        U R                  UUU40 UD6 Sh  vN   g N7f)a  Translates an iterable of tokenized examples.

This method is built on top of :meth:`ctranslate2.Translator.translate_batch`
to efficiently translate an arbitrarily large stream of data. It enables the
following optimizations:

* stream processing (the iterable is not fully materialized in memory)
* parallel translations (if the translator has multiple workers)
* asynchronous batch prefetching
* local sorting by length

Arguments:
  source: An iterable of tokenized source examples.
  target_prefix: An optional iterable of tokenized target prefixes.
  max_batch_size: The maximum batch size.
  batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
  **kwargs: Any translation options accepted by
    :meth:`ctranslate2.Translator.translate_batch`.

Returns:
  A generator iterator over :class:`ctranslate2.TranslationResult` instances.

Example:
  This method can be used to efficiently translate text files:

  .. code-block:: python

      # Replace by your own tokenization and detokenization functions.
      tokenize_fn = lambda line: line.strip().split()
      detokenize_fn = lambda tokens: " ".join(tokens)

      with open("input.txt") as input_file:
          source = map(tokenize_fn, input_file)
          results = translator.translate_iterable(source, max_batch_size=64)

          for result in results:
              tokens = result.hypotheses[0]
              target = detokenize_fn(tokens)
              print(target)
N)append_process_iterabletranslate_batch)r!   r"   r#   r$   r%   kwargs	iterabless          r   r   r      sQ     ` I ' ""	
   s   4><>targetc              +   T   #    [        U R                  X/UU40 UD6 Sh  vN   g N7f)a9  Scores an iterable of tokenized examples.

This method is built on top of :meth:`ctranslate2.Translator.score_batch`
to efficiently score an arbitrarily large stream of data. It enables the
following optimizations:

* stream processing (the iterable is not fully materialized in memory)
* parallel scoring (if the translator has multiple workers)
* asynchronous batch prefetching
* local sorting by length

Arguments:
  source: An iterable of tokenized source examples.
  target: An iterable of tokenized target examples.
  max_batch_size: The maximum batch size.
  batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
  **kwargs: Any scoring options accepted by
    :meth:`ctranslate2.Translator.score_batch`.

Returns:
  A generator iterator over :class:`ctranslate2.ScoringResult` instances.
Nr)   score_batch)r!   r"   r-   r$   r%   r+   s         r   r   r   [   s8     < !		
      (&(	generatorstart_tokensc              +   T   #    [        U R                  U/UU40 UD6 Sh  vN   g N7f)a#  Generates from an iterable of tokenized prompts.

This method is built on top of :meth:`ctranslate2.Generator.generate_batch`
to efficiently run generation on an arbitrarily large stream of data. It enables
the following optimizations:

* stream processing (the iterable is not fully materialized in memory)
* parallel generations (if the generator has multiple workers)
* asynchronous batch prefetching
* local sorting by length

Arguments:
  start_tokens: An iterable of tokenized prompts.
  max_batch_size: The maximum batch size.
  batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
  **kwargs: Any generation options accepted by
    :meth:`ctranslate2.Generator.generate_batch`.

Returns:
  A generator iterator over :class:`ctranslate2.GenerationResult` instances.
N)r)   generate_batch)r2   r3   r$   r%   r+   s        r   r   r      s7     8 !  		
   r1   tokensc              +   T   #    [        U R                  U/UU40 UD6 Sh  vN   g N7f)a  Scores an iterable of tokenized examples.

This method is built on top of :meth:`ctranslate2.Generator.score_batch`
to efficiently score an arbitrarily large stream of data. It enables
the following optimizations:

* stream processing (the iterable is not fully materialized in memory)
* parallel scoring (if the generator has multiple workers)
* asynchronous batch prefetching
* local sorting by length

Arguments:
  tokens: An iterable of tokenized examples.
  max_batch_size: The maximum batch size.
  batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
  **kwargs: Any score options accepted by
    :meth:`ctranslate2.Generator.score_batch`.

Returns:
  A generator iterator over :class:`ctranslate2.ScoringResult` instances.
Nr/   )r2   r6   r$   r%   r+   s        r   r   r      s7     8 !		
   r1         Fi   )max_decoding_lengthmin_decoding_lengthsampling_topksampling_toppsampling_temperaturereturn_log_probrepetition_penaltyno_repeat_ngram_sizedisable_unksuppress_sequences	end_tokenmax_input_lengthuse_vmapr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   c             #   r   #    [        U R                  U/Ub  U/OSU	U
UUUUUUUUUUUS9 Sh  vN   g N7f)a  Yields tokens as they are generated by the model.

Arguments:
  source: Source tokens.
  target_prefix: Optional target prefix tokens.
  max_decoding_length: Maximum prediction length.
  min_decoding_length: Minimum prediction length.
  sampling_topk: Randomly sample predictions from the top K candidates.
  sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
  sampling_temperature: Sampling temperature to generate more random samples.
  return_log_prob: Include the token log probability in the result.
  repetition_penalty: Penalty applied to the score of previously generated tokens
    (set > 1 to penalize).
  no_repeat_ngram_size: Prevent repetitions of ngrams with this size
    (set 0 to disable).
  disable_unk: Disable the generation of the unknown token.
  suppress_sequences: Disable the generation of some sequences of tokens.
  end_token: Stop the decoding on one of these tokens (defaults to the model EOS token).
  max_input_length: Truncate inputs after this many tokens (set 0 to disable).
  use_vmap: Use the vocabulary mapping file saved in this model

Returns:
  A generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

Note:
  This generation method is not compatible with beam search which requires a complete decoding.
N)r@   rA   rB   rC   rD   r:   r;   r<   r=   r>   return_scoresrE   rF   )_generate_tokensr*   )r!   r"   r#   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   s                   r   r   r      s[     \  ""	(4$-1-//##1%)!  s   -757i   T)
max_length
min_lengthr<   r=   r>   r?   r@   rA   rB   rC   rD   static_promptcache_static_promptcallbackpromptrJ   rK   rL   rM   rN   c             #     #    [        U5      S:  a  [        US   [        5      (       a  U/n[        U R                  U40 SU_SU_SU
_SU_SU_SU_SU_S	U_S
U_SU_SU_SU_SU	_SU_SU_SS_SU_6 Sh  vN   g N7f)a]  Yields tokens as they are generated by the model.

Arguments:
  prompt: Batch of start tokens. If the decoder starts from a
    special start token like <s>, this token should be added to this input.
  max_batch_size: The maximum batch size.
  batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
  max_length: Maximum generation length.
  min_length: Minimum generation length.
  sampling_topk: Randomly sample predictions from the top K candidates.
  sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
  sampling_temperature: Sampling temperature to generate more random samples.
  return_log_prob: Include the token log probability in the result.
  repetition_penalty: Penalty applied to the score of previously generated tokens
    (set > 1 to penalize).
  no_repeat_ngram_size: Prevent repetitions of ngrams with this size
    (set 0 to disable).
  disable_unk: Disable the generation of the unknown token.
  suppress_sequences: Disable the generation of some sequences of tokens.
  end_token: Stop the decoding on one these tokens (defaults to the model EOS token).
  static_prompt: If the model expects a static prompt (a.k.a. system prompt)
    it can be set here to simplify the inputs and optionally cache the model
    state for this prompt to accelerate future generations.
  cache_static_prompt: Cache the model state after the static prompt and
    reuse it for future generations using the same static prompt.
  callback: Optional function that is called for each generated token when
    obj:`beam_size` is 1. If the callback function returns ``True``, the
    decoding will stop for this batch index.

Returns:
  A generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

Note:
  This generation method is not compatible with beam search which requires a complete decoding.
r   r$   r%   r@   rA   rB   rC   rD   rJ   rK   r<   r=   r>   rH   rL   rM   include_prompt_in_resultFrN   N)len
isinstancestrrI   r5   )r2   rO   r$   r%   rJ   rK   r<   r=   r>   r?   r@   rA   rB   rC   rD   rL   rM   rN   s                     r   r   r     s     p 6{Q:fQi55   & 	
 . 2   .    $ $ 2 &  $!" 0#$ "'%& '  s   A6B 8A>9B c                 #    [        U5      S:  a  [        US   [        5      (       a  U/n[        U R                  U40 SU_SU_SU
_SU_SU_SU_SU_S	U_S
U_SU_SU_SU_SU	_SU_SU_SS_SU_6  Sh  vN nU7v   M   N
 g7f)av  Yields tokens asynchronously as they are generated by the model.

Arguments:
  prompt: Batch of start tokens. If the decoder starts from a
    special start token like <s>, this token should be added to this input.
  max_batch_size: The maximum batch size.
  batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
  max_length: Maximum generation length.
  min_length: Minimum generation length.
  sampling_topk: Randomly sample predictions from the top K candidates.
  sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
  sampling_temperature: Sampling temperature to generate more random samples.
  return_log_prob: Include the token log probability in the result.
  repetition_penalty: Penalty applied to the score of previously generated tokens
    (set > 1 to penalize).
  no_repeat_ngram_size: Prevent repetitions of ngrams with this size
    (set 0 to disable).
  disable_unk: Disable the generation of the unknown token.
  suppress_sequences: Disable the generation of some sequences of tokens.
  end_token: Stop the decoding on one of these tokens (defaults to the model EOS token).
  static_prompt: If the model expects a static prompt (a.k.a. system prompt)
    it can be set here to simplify the inputs and optionally cache the model
    state for this prompt to accelerate future generations.
  cache_static_prompt: Cache the model state after the static prompt and
    reuse it for future generations using the same static prompt.
  callback: Optional function that is called for each generated token when
    obj:`beam_size` is 1. If the callback function returns ``True``, the
    decoding will stop for this batch index.

Returns:
  An async generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

Note:
  This generation method is not compatible with beam search which requires a complete decoding.
r   r$   r%   r@   rA   rB   rC   rD   rJ   rK   r<   r=   r>   rH   rL   rM   rQ   FrN   N)rR   rS   rT   AsyncGeneratorr5   )r2   rO   r$   r%   rJ   rK   r<   r=   r>   r?   r@   rA   rB   rC   rD   rL   rM   rN   step_results                      r   r   r   _  s    p 6{Q:fQi55+   & 	
 . 2   .    $ $ 2 &  $!" 0#$ "'%& ' k* + s*   A3B	5B9B:B=B	BB	c                   ,    \ rS rSrS rS rS rS rSrg)rV   i  c                     [         R                  " 5       U l        [        R                  " 5       U l        S U l        Xl        X l        X0l	        g N)
asyncioQueuequeue	threadingEventshutdown_eventiterator_taskprocess_funcargsr+   )selfrb   rc   r+   s       r   __init__AsyncGenerator.__init__  s6    ]]_
'oo/!(	r   c                   #    [        U R                  /U R                  Q70 U R                  D6 He  nU R                  R                  U5      I S h  vN   [        R                  " S5      I S h  vN   U R                  R                  5       (       d  Me    O   U R                  R                  S 5      I S h  vN   g  Nk NO N	7f)Ng-C6?)
rI   rb   rc   r+   r]   putr[   sleepr`   is_set)rd   rW   s     r   producerAsyncGenerator.producer  s     +
 $		
-1[[
K **..-----'''""))++
 jjnnT""" .' 	#s<   ACB=C/B?0!C"C7C8C?CCc                 X    [         R                  " U R                  5       5      U l        U $ rZ   )r[   create_taskrk   ra   )rd   s    r   	__aiter__AsyncGenerator.__aiter__  s     $00Ar   c                 X  #    U R                   R                  5       (       a  [        e U R                  R	                  5       I S h  vN nUc   U R                   R                  5         [        eU$  N)! [        R                   a!    U R                   R                  5         [        ef = f7frZ   )r`   rj   StopAsyncIterationr]   getsetr[   CancelledError)rd   items     r   	__anext__AsyncGenerator.__anext__  s     %%''$$	%))D|##'')((K	 *
 %% 	%##%$$	%s.   &B*A2 A0(A2 /B*0A2 25B''B*)rc   ra   r+   rb   r]   r`   N)	__name__
__module____qualname____firstlineno__re   rk   ro   rw   __static_attributes__r   r   r   rV   rV     s    
#%r   rV   c              /     ^^^	^
#    [         R                  " 5       m	[        R                  " 5       mUR	                  SS 5      m
T
c  S m
UU	U
4S jnUR                  SSUS.5        U " U0 UD6mUU	4S jn[        R                  " USS9nUR                  5          T	R	                  5       nUc  O[        U[        5      (       a  Ue Uv   M2  UR                  5         g ! [         a    TR                  5          M.  f = f7f)	NrN   c                     g)NFr   )rW   s    r   <lambda>"_generate_tokens.<locals>.<lambda>  s    Er   c                 h   > T" U 5      nTR                  U 5        TR                  5       =(       d    U$ rZ   )rh   rj   )rW   user_callback_resultgenerator_closedstep_resultsuser_callbacks     r   	_callback#_generate_tokens.<locals>._callback  s1    ,[9%&&(@,@@r   Tr9   )asynchronous	beam_sizerN   c                     >  T H  n U R                  5         M     TR                  S 5        g ! [         a  nTR                  U5         S nAN2S nAff = frZ   )result	Exceptionrh   )r   easync_resultsr   s     r   _catch_exception*_generate_tokens.<locals>._catch_exception  sN    	 ' ( 	  	 Q	 s   . 
AAA)r-   daemon)r]   r\   r^   r_   rs   updateThreadstartrS   r   GeneratorExitrt   join)rb   rc   r+   r   r   threadrW   r   r   r   r   s          @@@@r   rI   rI     s     ;;=L (JJz40M1A MM !	
 !$1&1M %5dCF
LLN
"&&(k9--	   KKM  	  "	s*   C C?C C?C<8C?;C<<C?c              +   t  #    US:  a  [        S5      e[        U5      S:X  a  US   nO[        R                  " U6 nUR	                  UUSS.5        US:  a  US-  OUn[
        R                  " 5       n[        XVU5       H  nUR                  " U " U0 UD65        U(       d  M$  US   R                  5       (       d  M>  UR                  " 5       R                  5       v   U(       d  Mh  US   R                  5       (       a  MD  M     U(       a+  UR                  " 5       R                  5       v   U(       a  M*  g g 7f)Nr9   zmax_batch_size must be >= 1r   T)r$   r%   r      )
ValueErrorrR   	itertoolszip_longestr   collectionsdeque_batch_iteratorextenddonepopleftr   )	rb   r,   r$   r%   r+   iterableread_batch_sizer]   streamss	            r   r)   r)     s    677
9~Q<(()4
MM,$ 	
 .<a-?nr)^OE"8jI\75f56ea--/((** ea J mmo$$&& %s$   BD8#D8=&D8'D83D86D8c              #      #    S nSnU  H  n[        U[        5      (       d  U4nUS:X  a   U(       a  [        US   5      U:X  a  Uv   S nO`US:X  aL  [        U[        US   5      5      nU(       a,  [        US   5      S-   U-  U:  a  Uv   S n[        US   5      nO[	        SU-  5      eUc  [        S U 5       5      n[        X55       H3  u  pgUc  [        U5      S:  a  [	        S5      eUR                  U5        M5     M     Ub  Uv   g g 7f)Nr   examplesr6   r9   zInvalid batch type %sc              3   &   #    U  H  n/ v   M	     g 7frZ   r   ).0_s     r   	<genexpr>"_batch_iterator.<locals>.<genexpr>F  s     01Bs   z+Input iterables do not have the same length)rS   tuplerR   maxr   zipr(   )r   
batch_sizer%   r   rJ   examplebatchelements           r   r   r   -  s    GJ'5))jG#3wqz?j88#ZWQZ9JC
Oa/:=
J _
 4zABB?000G!'3NE3w<!#3 !NOOLL! 4- 6  s   DD)N    r   )@   r   )r   r   rZ   )r   r   )#r[   r   r   r]   r^   typingr   r   r   r   r   r   ctranslate2._extr	   r
   r   r   r   r   r    rT   intr   r   r   r   floatboolr   r   r   rV   rI   r)   r   r   r   r   <module>r      s        K K Q 48 ::T#Y: HT#Y/0: 	:
 :  :B  $$T#Y$ T#Y$ 	$
 $ m$T  	""49%" " 	" "P  	""T#Y" " 	" m"P *.?
  # "#! ! !48<@ #??I? DI&?
 ? ? ? ?  ? ? ? ? ? !d3i1? c49d3i789?  !?" #?$ "#%?J  	N "#! ! !48<@)- $7;'NN$s)T$s)_,-N N 	N N N N N  N N N N N !d3i1N  c49d3i789!N" DI&#N$ %N& ,-t34'N( "#)Nh  	O "#! ! !48<@)- $7;'OO$s)T$s)_,-O O 	O O O O O  O O O O O !d3i1O  c49d3i789!O" DI&#O$ %O& ,-t34'O( '()Od%% %%P3l'< r   