
    n iR              )       r   d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZ d dlmZmZmZmZmZmZ d Z	 	 	 d9dedee	e                  d	e
ee	e                           d
ededee         fdZ	 	 d:dedee	e                  dee	e                  d
ededee         fdZ	 	 d;dedee	e                  d
ededee         f
dZ	 	 d:dedee	e                  d
ededee         f
dZ	 d<dddddddd dddddddede	e         d	e
e	e                  dededededed ed!ed"ed#ed$e
e	e	e                           d%e
eee	e         e	e         f                  d&ed'edee         f"d(Z	 	 d=d)d dddddd ddddd*dd+ded,ee	e         e	e	e                  f         d
eded-ed.edededed ed!ed"ed#ed$e
e	e	e                           d%e
eee	e         e	e         f                  d/e
e	e                  d0ed1eegef         dee         f&d2Z	 	 d=d)d dddddd ddddd*dd+ded,ee	e         e	e	e                  f         d
eded-ed.edededed ed!ed"ed#ed$e
e	e	e                           d%e
eee	e         e	e         f                  d/e
e	e                  d0ed1eegef         dee         f&d3Z G d4 d5          Zd6 Z d7 Z!d8 Z"dS )>    N)AsyncIterableCallableIterableListOptionalUnion)GenerationResultGenerationStepResult	GeneratorScoringResultTranslationResult
Translatorc                     t          t          dt                     t          t          dt                     t          t          dt                     t          t
          dt                     t          t
          dt                     t          t
          dt                     t          t
          dt                     dS )z4Registers additional attributes to compiled modules.translate_iterablescore_iterablegenerate_tokensgenerate_iterableasync_generate_tokensN)
setattrr   translator_translate_iterabletranslator_score_iterabletranslator_generate_tokensr   generator_generate_iterablegenerator_score_iterablegenerator_generate_tokensgenerator_async_generate_tokens     V/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/ctranslate2/extensions.pyregister_extensionsr       s    J,.KLLLJ(*CDDDJ)+EFFFI*,GHHHI')ABBBI(*CDDDI.0OPPPPPr       examples
translatorsourcetarget_prefixmax_batch_size
batch_typereturnc              +   t   K   |g}||                     |           t          | j        |||fi |E d{V  dS )a6  Translates an iterable of tokenized examples.

    This method is built on top of :meth:`ctranslate2.Translator.translate_batch`
    to efficiently translate an arbitrarily large stream of data. It enables the
    following optimizations:

    * stream processing (the iterable is not fully materialized in memory)
    * parallel translations (if the translator has multiple workers)
    * asynchronous batch prefetching
    * local sorting by length

    Arguments:
      source: An iterable of tokenized source examples.
      target_prefix: An optional iterable of tokenized target prefixes.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      **kwargs: Any translation options accepted by
        :meth:`ctranslate2.Translator.translate_batch`.

    Returns:
      A generator iterator over :class:`ctranslate2.TranslationResult` instances.

    Example:
      This method can be used to efficiently translate text files:

      .. code-block:: python

          # Replace by your own tokenization and detokenization functions.
          tokenize_fn = lambda line: line.strip().split()
          detokenize_fn = lambda tokens: " ".join(tokens)

          with open("input.txt") as input_file:
              source = map(tokenize_fn, input_file)
              results = translator.translate_iterable(source, max_batch_size=64)

              for result in results:
                  tokens = result.hypotheses[0]
                  target = detokenize_fn(tokens)
                  print(target)
    N)append_process_iterabletranslate_batch)r#   r$   r%   r&   r'   kwargs	iterabless          r   r   r      s      ` I ''' "	 
           r   @   targetc              +   D   K   t          | j        ||g||fi |E d{V  dS )a}  Scores an iterable of tokenized examples.

    This method is built on top of :meth:`ctranslate2.Translator.score_batch`
    to efficiently score an arbitrarily large stream of data. It enables the
    following optimizations:

    * stream processing (the iterable is not fully materialized in memory)
    * parallel scoring (if the translator has multiple workers)
    * asynchronous batch prefetching
    * local sorting by length

    Arguments:
      source: An iterable of tokenized source examples.
      target: An iterable of tokenized target examples.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      **kwargs: Any scoring options accepted by
        :meth:`ctranslate2.Translator.score_batch`.

    Returns:
      A generator iterator over :class:`ctranslate2.ScoringResult` instances.
    Nr+   score_batch)r#   r$   r0   r&   r'   r-   s         r   r   r   [   se      < !		 
           r   	generatorstart_tokensc              +   B   K   t          | j        |g||fi |E d{V  dS )ac  Generates from an iterable of tokenized prompts.

    This method is built on top of :meth:`ctranslate2.Generator.generate_batch`
    to efficiently run generation on an arbitrarily large stream of data. It enables
    the following optimizations:

    * stream processing (the iterable is not fully materialized in memory)
    * parallel generations (if the generator has multiple workers)
    * asynchronous batch prefetching
    * local sorting by length

    Arguments:
      start_tokens: An iterable of tokenized prompts.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      **kwargs: Any generation options accepted by
        :meth:`ctranslate2.Generator.generate_batch`.

    Returns:
      A generator iterator over :class:`ctranslate2.GenerationResult` instances.
    N)r+   generate_batch)r4   r5   r&   r'   r-   s        r   r   r      sb      8 ! 		 
           r   tokensc              +   B   K   t          | j        |g||fi |E d{V  dS )a9  Scores an iterable of tokenized examples.

    This method is built on top of :meth:`ctranslate2.Generator.score_batch`
    to efficiently score an arbitrarily large stream of data. It enables
    the following optimizations:

    * stream processing (the iterable is not fully materialized in memory)
    * parallel scoring (if the generator has multiple workers)
    * asynchronous batch prefetching
    * local sorting by length

    Arguments:
      tokens: An iterable of tokenized examples.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      **kwargs: Any score options accepted by
        :meth:`ctranslate2.Generator.score_batch`.

    Returns:
      A generator iterator over :class:`ctranslate2.ScoringResult` instances.
    Nr2   )r4   r8   r&   r'   r-   s        r   r   r      sb      8 !		 
           r         Fi   )max_decoding_lengthmin_decoding_lengthsampling_topksampling_toppsampling_temperaturereturn_log_probrepetition_penaltyno_repeat_ngram_sizedisable_unksuppress_sequences	end_tokenmax_input_lengthuse_vmapr<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   c             #   j   K   t          | j        |g||gnd|	|
|||||||||||          E d{V  dS )a{  Yields tokens as they are generated by the model.

    Arguments:
      source: Source tokens.
      target_prefix: Optional target prefix tokens.
      max_decoding_length: Maximum prediction length.
      min_decoding_length: Minimum prediction length.
      sampling_topk: Randomly sample predictions from the top K candidates.
      sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
      sampling_temperature: Sampling temperature to generate more random samples.
      return_log_prob: Include the token log probability in the result.
      repetition_penalty: Penalty applied to the score of previously generated tokens
        (set > 1 to penalize).
      no_repeat_ngram_size: Prevent repetitions of ngrams with this size
        (set 0 to disable).
      disable_unk: Disable the generation of the unknown token.
      suppress_sequences: Disable the generation of some sequences of tokens.
      end_token: Stop the decoding on one of these tokens (defaults to the model EOS token).
      max_input_length: Truncate inputs after this many tokens (set 0 to disable).
      use_vmap: Use the vocabulary mapping file saved in this model

    Returns:
      A generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

    Note:
      This generation method is not compatible with beam search which requires a complete decoding.
    N)rB   rC   rD   rE   rF   r<   r=   r>   r?   r@   return_scoresrG   rH   )_generate_tokensr,   )r#   r$   r%   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   s                   r   r   r      s      \  "	(4$-1-//##1%)!           r   i   T)
max_length
min_lengthr>   r?   r@   rA   rB   rC   rD   rE   rF   static_promptcache_static_promptcallbackpromptrL   rM   rN   rO   rP   c             #      K   t          |          dk    rt          |d         t                    r|g}t          | j        |fi d|d|d|
d|d|d|d|d	|d
|d|d|d|d|	d|d|ddd|E d{V  dS )a  Yields tokens as they are generated by the model.

    Arguments:
      prompt: Batch of start tokens. If the decoder starts from a
        special start token like <s>, this token should be added to this input.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      max_length: Maximum generation length.
      min_length: Minimum generation length.
      sampling_topk: Randomly sample predictions from the top K candidates.
      sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
      sampling_temperature: Sampling temperature to generate more random samples.
      return_log_prob: Include the token log probability in the result.
      repetition_penalty: Penalty applied to the score of previously generated tokens
        (set > 1 to penalize).
      no_repeat_ngram_size: Prevent repetitions of ngrams with this size
        (set 0 to disable).
      disable_unk: Disable the generation of the unknown token.
      suppress_sequences: Disable the generation of some sequences of tokens.
      end_token: Stop the decoding on one these tokens (defaults to the model EOS token).
      static_prompt: If the model expects a static prompt (a.k.a. system prompt)
        it can be set here to simplify the inputs and optionally cache the model
        state for this prompt to accelerate future generations.
      cache_static_prompt: Cache the model state after the static prompt and
        reuse it for future generations using the same static prompt.
      callback: Optional function that is called for each generated token when
        obj:`beam_size` is 1. If the callback function returns ``True``, the
        decoding will stop for this batch index.

    Returns:
      A generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

    Note:
      This generation method is not compatible with beam search which requires a complete decoding.
    r   r&   r'   rB   rC   rD   rE   rF   rL   rM   r>   r?   r@   rJ   rN   rO   include_prompt_in_resultFrP   N)len
isinstancestrrK   r7   )r4   rQ   r&   r'   rL   rM   r>   r?   r@   rA   rB   rC   rD   rE   rF   rN   rO   rP   s                     r   r   r     sJ     p 6{{Q:fQi55    &~ :	
 .- 21  K .- ) : : $m $m 21 &o  $m!" 0/#$ "'%& '         r   c                 K   t          |          dk    rt          |d         t                    r|g}t          | j        |fi d|d|d|
d|d|d|d|d	|d
|d|d|d|d|	d|d|ddd|2 3 d{V }|W V  6 dS )a  Yields tokens asynchronously as they are generated by the model.

    Arguments:
      prompt: Batch of start tokens. If the decoder starts from a
        special start token like <s>, this token should be added to this input.
      max_batch_size: The maximum batch size.
      batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
      max_length: Maximum generation length.
      min_length: Minimum generation length.
      sampling_topk: Randomly sample predictions from the top K candidates.
      sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
      sampling_temperature: Sampling temperature to generate more random samples.
      return_log_prob: Include the token log probability in the result.
      repetition_penalty: Penalty applied to the score of previously generated tokens
        (set > 1 to penalize).
      no_repeat_ngram_size: Prevent repetitions of ngrams with this size
        (set 0 to disable).
      disable_unk: Disable the generation of the unknown token.
      suppress_sequences: Disable the generation of some sequences of tokens.
      end_token: Stop the decoding on one of these tokens (defaults to the model EOS token).
      static_prompt: If the model expects a static prompt (a.k.a. system prompt)
        it can be set here to simplify the inputs and optionally cache the model
        state for this prompt to accelerate future generations.
      cache_static_prompt: Cache the model state after the static prompt and
        reuse it for future generations using the same static prompt.
      callback: Optional function that is called for each generated token when
        obj:`beam_size` is 1. If the callback function returns ``True``, the
        decoding will stop for this batch index.

    Returns:
      An async generator iterator over :class:`ctranslate2.GenerationStepResult` instances.

    Note:
      This generation method is not compatible with beam search which requires a complete decoding.
    r   r&   r'   rB   rC   rD   rE   rF   rL   rM   r>   r?   r@   rJ   rN   rO   rS   FrP   N)rT   rU   rV   AsyncGeneratorr7   )r4   rQ   r&   r'   rL   rM   r>   r?   r@   rA   rB   rC   rD   rE   rF   rN   rO   rP   step_results                      r   r   r   _  se     p 6{{Q:fQi55+    &~ :	
 .- 21  K .- ) : : $m $m 21 &o  $m!" 0/#$ "'%& '       k* +  s   8Bc                   &    e Zd Zd Zd Zd Zd ZdS )rX   c                     t          j                    | _        t          j                    | _        d | _        || _        || _        || _	        d S N)
asyncioQueuequeue	threadingEventshutdown_eventiterator_taskprocess_funcargsr-   )selfrd   re   r-   s       r   __init__zAsyncGenerator.__init__  sA    ]__
'o//!(	r   c                 8  K   t          | j        g| j        R i | j        D ]W}| j                            |           d {V  t          j        d           d {V  | j        	                                r nX| j                            d            d {V  d S )Ng-C6?)
rK   rd   re   r-   r_   putr]   sleeprb   is_set)rf   rY   s     r   producerzAsyncGenerator.producer  s      +
 $	
 
 
-1[
 
 	 	K *..----------'''''''''"))++ jnnT"""""""""""r   c                 \    t          j        |                                           | _        | S r\   )r]   create_taskrl   rc   )rf   s    r   	__aiter__zAsyncGenerator.__aiter__  s"    $0AAr   c                 4  K   | j                                         rt          	 | j                                         d {V }| | j                                          t          |S # t          j        $ r! | j                                          t          w xY wr\   )rb   rk   StopAsyncIterationr_   getsetr]   CancelledError)rf   items     r   	__anext__zAsyncGenerator.__anext__  s      %%'' 	%$$	%))))))))D|#'')))((K% 	% 	% 	%##%%%$$	%s   AA' '0BN)__name__
__module____qualname__rg   rl   ro   rv   r   r   r   rX   rX     sP          
# 
# 
#  % % % % %r   rX   c              /      	
K   t          j                    	t          j                    |                    dd           

d 
	
fd}|                    dd|d            | |i |	fd}t          j        |d          }|                                 	 	                                }|nBt          |t                    r|	 |V  n$# t          $ r                                  Y nw xY wY|                                 d S )	NrP   c                     dS )NFr   )rY   s    r   <lambda>z"_generate_tokens.<locals>.<lambda>  s    E r   c                 p     |           }                     |                                            p|S r\   )ri   rk   )rY   user_callback_resultgenerator_closedstep_resultsuser_callbacks     r   	_callbackz#_generate_tokens.<locals>._callback  s?    ,}[99%%%&&((@,@@r   Tr;   )asynchronous	beam_sizerP   c                      	 D ]} |                                   n,# t          $ r}                    |           Y d }~nd }~ww xY w                    d            d S r\   )result	Exceptionri   )r   easync_resultsr   s     r   _catch_exceptionz*_generate_tokens.<locals>._catch_exception  s    	 '      	  	  	 Q	 s    
AAA)r0   daemon)r_   r^   r`   ra   rr   updateThreadstartrU   r   GeneratorExitrs   join)rd   re   r-   r   r   threadrY   r   r   r   r   s          @@@@r   rK   rK     s     ;==L ((JJz400M11A A A A A A A MM !	
 	
   !L$1&11M      %5dCCCF
LLNNN"&&((k9-- 		 	 	 	  """E	  KKMMMMMs   C C65C6c              +   V  K   |dk     rt          d          t          |          dk    r	|d         }nt          j        | }|                    ||dd           |dk    r|dz  n|}t          j                    }t          |||          D ]s} |j         | |i |           |rY|d         	                                r? |j
                                                    V  |r|d         	                                ?t|r' |j
                                                    V  |%d S d S )Nr;   zmax_batch_size must be >= 1r   T)r&   r'   r      )
ValueErrorrT   	itertoolszip_longestr   collectionsdeque_batch_iteratorextenddonepopleftr   )	rd   r.   r&   r'   r-   iterableread_batch_sizer_   streamss	            r   r+   r+     sx     6777
9~~Q<()4
MM,$ 	
 	
   .<a-?-?nr))^OE"8_jII + +\\75f55666 	+a 	+%-//((*****  	+a 	+  'emoo$$&&&&&  ' ' ' ' 'r   c              #     K   d }d}| D ]}t          |t                    s|f}|t          d |D                       }t          ||          D ]>\  }}|"t          |          dk    rt	          d          |                    |           ?|dk    r|dz  }n1|dk    r|t          |d                   z  }nt	          d|z            ||k    r|V  d }d}||V  d S d S )Nr   c              3      K   | ]}g V  d S r\   r   ).0_s     r   	<genexpr>z"_batch_iterator.<locals>.<genexpr>6  s"      001B000000r   r;   z+Input iterables do not have the same lengthr"   r8   zInvalid batch type %s)rU   tupleziprT   r   r*   )r   
batch_sizer'   r   cur_batch_sizeexamplebatchelements           r   r   r   -  s5     GN  '5)) 	!jG?0000000G!'733 	" 	"NE73w<<!#3#3 !NOOOLL!!!!##aNN8##c'!*oo-NN4zABBBZ''MMMGN r   )Nr!   r"   )r/   r"   )r!   r"   r\   )r   r"   )#r]   r   r   r_   r`   typingr   r   r   r   r   r   ctranslate2._extr	   r
   r   r   r   r   r    rV   intr   r   r   r   floatboolr   r   r   rX   rK   r+   r   r   r   r   <module>r      s                 K K K K K K K K K K K K K K K K               Q Q Q 48 : ::T#Y: HT#Y/0: 	:
 :  : : : :B  $ $$T#Y$ T#Y$ 	$
 $ m$ $ $ $T  	" ""49%" " 	" " " " "P  	" ""T#Y" " 	" m" " " "P *.?
  # "#! ! !48<@ #? ? ??I? DI&?
 ? ? ? ?  ? ? ? ? ? !d3i1? c49d3i789?  !?" #?$ "#%? ? ? ?J  	N "#! ! !48<@)- $7;'N N NN$s)T$s)_,-N N 	N N N N N  N N N N N !d3i1N  c49d3i789!N" DI&#N$ %N& ,-t34'N( "#)N N N Nh  	O "#! ! !48<@)- $7;'O O OO$s)T$s)_,-O O 	O O O O O  O O O O O !d3i1O  c49d3i789!O" DI&#O$ %O& ,-t34'O( '()O O O Od%% %% %% %% %% %% %% %%P3 3 3l' ' '<    r   