
    o i:             	          d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZmZmZ d dlmZ d dlZd dlZd dlZd dlmZ d dlmZmZ d d	lmZ d d
lm Z m!Z! d dl"m#Z#m$Z$m%Z%m&Z& d dl'm(Z(m)Z)m*Z*m+Z+ e G d d                      Z,e G d d                      Z-e G d d                      Z.e G d d                      Z/ G d d          Z0 G d d          Z1dee-         dee2         de3dee-         fdZ4dej5        dej6        fdZ7d e8de9fd!Z:d"e!d#ee3         deee3                  fd$Z;d%ee2         d&e8d'e8ddfd(Z<dS ))    N)asdict	dataclass)	signature)ceil)BinaryIOIterableListOptionalTupleUnion)warn)tqdm)decode_audiopad_or_trim)FeatureExtractor)_LANGUAGE_CODES	Tokenizer)download_modelformat_timestampget_end
get_logger)SpeechTimestampsMap
VadOptionscollect_chunksget_speech_timestampsc                   >    e Zd ZU eed<   eed<   eed<   eed<   d ZdS )Wordstartendwordprobabilityc                 L    t          dt          d           t          |           S )NzIWord._asdict() method is deprecated, use dataclasses.asdict(Word) instead   r   DeprecationWarningr   selfs    Y/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/faster_whisper/transcribe.py_asdictzWord._asdict&   s+    W	
 	
 	

 d||    N)__name__
__module____qualname__float__annotations__strr)    r*   r(   r   r      sL         LLL	JJJ
III    r*   r   c                       e Zd ZU eed<   eed<   eed<   eed<   eed<   ee         ed<   eed<   eed<   eed	<   eee	                  ed
<   ee         ed<   d Z
dS )Segmentidseekr   r   texttokensavg_logprobcompression_rationo_speech_probwordstemperaturec                 L    t          dt          d           t          |           S )NzOSegment._asdict() method is deprecated, use dataclasses.asdict(Segment) insteadr#   r$   r&   s    r(   r)   zSegment._asdict=   s+    ]	
 	
 	

 d||r*   N)r+   r,   r-   intr/   r.   r0   r	   r
   r   r)   r1   r*   r(   r3   r3   /   s         GGG
IIILLL	JJJ
IIIIDJ%       r*   r3   c                      e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   ee         ed<   ee         ed<   ee         ed	<   eed
<   eed<   ee         ed<   ee	e
ee         f                  ed<   ee
         ed<   eed<   eee                  ed<   eed<   eed<   eed<   e
ed<   e
ed<   eed<   ee         ed<   e	e
ee         f         ed<   ee         ed<   ee
         ed<   dS )TranscriptionOptions	beam_sizebest_ofpatiencelength_penaltyrepetition_penaltyno_repeat_ngram_sizelog_prob_thresholdno_speech_thresholdcompression_ratio_thresholdcondition_on_previous_textprompt_reset_on_temperaturetemperaturesinitial_promptprefixsuppress_blanksuppress_tokenswithout_timestampsmax_initial_timestampword_timestampsprepend_punctuationsappend_punctuationsmultilingualmax_new_tokensclip_timestampshallucination_silence_thresholdhotwordsN)r+   r,   r-   r>   r/   r.   r
   boolr	   r   r0   r   r1   r*   r(   r@   r@   F   s        NNNLLLOOO '''!%(((!)%000 $$$$!&&&&u+U3#567777SMd3i((((    SM!!!3U+,,,,%-e_444smr*   r@   c                   ~    e Zd ZU eed<   eed<   eed<   eed<   eeeeef                           ed<   e	ed<   e
ed<   dS )	TranscriptionInfolanguagelanguage_probabilitydurationduration_after_vadall_language_probstranscription_optionsvad_optionsN)r+   r,   r-   r0   r/   r.   r
   r	   r   r@   r   r1   r*   r(   r]   r]   d   ss         MMMOOO eCJ&7!89999////r*   r]   c            M       z   e Zd Zd Zd Zdej        dedefdZ	ddd	d
d
ddddg ddddddddddgddd	ddd	ddddddddddf#de
eeej        f         dee         dedededededed ed!ed"e
eee         eed#f         f         d$ee         d%ee         d&ee         d'ed(ed)ee
eee         f                  d*ee         d+ed,eee                  d-ed.ed/ed0ed1ed2ed3ed4ee
eef                  d5ee         d6ee         d7eee                  d8ee         d9ed:ee         d;ee         d<ed=eee         ef         fJd>Zd? ZdS )@BatchedInferencePipelinec                 "    || _         d| _        d S )N        )modellast_speech_timestamp)r'   ri   s     r(   __init__z!BatchedInferencePipeline.__init__p   s     $)
%("""r*   c           	                                 ||          \  }}g }g }t          ||          D ]\  d         }	t          t          |	           j        j        z            }
|                    |
            j                            d         d         |
|	d          \  }}}|                     fd|D                        |j        r4 j        	                    ||||j
        |j         j                   _        |S )Nr`   r7   offsetr   	tokenizerr7   time_offsetsegment_sizesegment_durationr5   c                 8   g | ]}t                              |d                    d         d         |d          |d         |d         t                              |d                              t          d         j        j        z                      S )r7   r8   r:   r   r   rm   )r6   r8   r:   r7   r   r   r9   r5   )dictdecodeget_compression_ratior>   ri   frames_per_second).0
subsegmentchunk_metadataoutputr'   ro   s     r(   
<listcomp>z4BatchedInferencePipeline.forward.<locals>.<listcomp>   s        # &--j.BCC$*=$9'-.>'?)(3(1&u-*?%,,Z-ABB+ + !*84tz7SS     r*   )generate_segment_batchedzipr>   r   ri   rw   append_split_segments_by_timestampsrS   add_word_timestampsrT   rU   rj   )r'   featuresro   chunks_metadataoptionsencoder_outputoutputssegmented_outputssegment_sizesr`   rq   subsegmentsr5   single_timestamp_endingrz   r{   s   ` `           @@r(   forwardz BatchedInferencePipeline.forwardw   sv   "&"?"?i#
 #
 &)/7&C&C "	 "	"NF%j1HtH~~
0LLMML  ...
 
88#h'*84)!) 9  	' $$       '2     & " 		)-)G)G!,+** *D& ! r*   r   ro   r   c                    |j         d         }| j                            |j                            |j                  ng |j        |j                  |j        t                    |j        z   }n| j        j	        }|| j        j	        k    rPt          dt                     d|t                    z
   d| d| j        j	         d| j        j	         d          | j                            |          }fd	t          |          D             }|j        refd
| j        j                            |          D             }                    j                  }	t!          |          D ]\  }
}|||
         |	<   | j        j                            |||j        |j        |j        ||j        |j        dd|j        d         |j        |j                  }g }|D ]o}t          |j        d                   }|j        d         ||j        z  z  }|                    t;          ||dz   z  |j        |j        d                              p||fS )Nr   )previous_tokensrQ   rZ   The length of the prompt is , and the `max_new_tokens` C. Thus, the combined length of the prompt and `max_new_tokens` is: 6. This exceeds the `max_length` of the Whisper model: . You should either reduce the length of your prompt, or reduce the value of `max_new_tokens`, so that their combined length is less that .c                 8    g | ]}                                 S r1   )copy)rx   _prompts     r(   r|   zEBatchedInferencePipeline.generate_segment_batched.<locals>.<listcomp>   s!    <<<Q6;;==<<<r*   c                 \    g | ](}j                             |d          d                    )S r   )ro   token_to_id)rx   segment_langsro   s     r(   r|   zEBatchedInferencePipeline.generate_segment_batched.<locals>.<listcomp>   sD       ! #//a0@0CDD  r*   T)rA   rC   rD   
max_lengthrO   rP   return_scoresreturn_no_speech_probsampling_temperaturerE   rF      )r8   r:   r7   )shaperi   
get_promptrM   encoderQ   rZ   rW   lenr   
ValueErrorrangerV   detect_languageindexr^   	enumerategeneraterA   rC   rD   rO   rP   rL   rE   rF   sequences_idsscoresr   rt   r:   )r'   r   ro   r   
batch_sizer   r   promptslanguage_tokenslanguage_token_indexilanguage_tokenresultsr{   resultseq_lencum_logprobr   s     `              @r(   r}   z1BatchedInferencePipeline.generate_segment_batched   s    ^A&
&& )5   !7888&9% ' 	
 	
 !-Vw'==JJ.J
---Ws6{{ W WF+W W,6W W 7;j6KW W ?Cj>SW W W   **844<<<<%
*;*;<<< 	B   %)Z%5%E%En%U%U  O $*<<	0B#C#C %.%?%? B B!>3A
/00*"++'%"1!"1#3"&!(!5a!8&9!(!= , 
 
   	 	F&.q122G -*gw7M.MNKMM +w{ ;#)#8!/2      v%%r*   N
transcribeF   r   r   rh   g?g?333333?g?      ?333333@      r   T      ?r      "'“¿([{-   "'.。,，!！?？:：”)]}、   audior^   tasklog_progressrA   rB   rC   rD   rE   rF   r<   .rI   rG   rH   rJ   rK   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   
vad_filtervad_parametersrW   chunk_lengthrX   rY   r   rZ   language_detection_thresholdlanguage_detection_segmentsreturnc%           
      	   4  j         j        j        4|r2 j         j         j        s! j         j                            d           d}t          |t          j                  st          |4          }|j
        d         4z  }% j         j                            dt          |%                     |p j         j        j        }|s|ru|t          |d          }nPt          |t                    r;d	|                                v r|                    d	           t          d?i |d	|i}t%          ||          }n'|%|k     rd|j
        d         d
g}nt'          d          d}&t)          |||          \  }'}(nd}&4fd|D             }g g }(}'t+          |          D ]\  })}*|'                    ||*d         |*d                             |*d         |*d         z
  4z  }+|+dk    r  j         j                            d|)           |(                    |*d         4z  |+|*gd           t/          d |D                       4z  }, j         j                            dt          |%|,z
                       |,r fd|'D             ng }-d}.| j         j         j        sd}d}/n j                             t          j        |-t          j         j         j         j        dfdd          gz   d          |$|#          \  }}/}. j         j                            d||/           n= j         j         j        s*|dk    r$ j         j                            d|z             d}d}/t9           j         j         j         j         j        ||           }0|-rt          j        d! |-D                       ng }-t?          d?i d"|d#|d$|d%|d&|	d'|
d(|d)|d*|d+t          |t@          tB          f          r
|dd         n|gd,|d-|d.|d/|rtE          |0|          n|d0|d1|d2|d3|"d4|d5dd6dd7|d8d9d:|d;|d<d=}1tG          ||/|%|,|1||.>          }2 $                    |-|0|(|!|1|          }3|&stK          |3|4          }3|3|2fS )@at  transcribe audio in chunks in batched fashion and return with language info.

        Arguments:
            audio: Path to the input file (or a file-like object), or the audio waveform.
            language: The language spoken in the audio. It should be a language code such
                as "en" or "fr". If not set, the language will be detected in the first 30 seconds
                of audio.
            task: Task to execute (transcribe or translate).
            log_progress: whether to show progress bar or not.
            beam_size: Beam size to use for decoding.
            best_of: Number of candidates when sampling with non-zero temperature.
            patience: Beam search patience factor.
            length_penalty: Exponential length penalty constant.
            repetition_penalty: Penalty applied to the score of previously generated tokens
                (set > 1 to penalize).
            no_repeat_ngram_size: Prevent repetitions of ngrams with this size (set 0 to disable).
            temperature: Temperature for sampling. If a list or tuple is passed,
                only the first value is used.
            initial_prompt: Optional text string or iterable of token ids to provide as a
                prompt for the each window.
            suppress_blank: Suppress blank outputs at the beginning of the sampling.
            suppress_tokens: List of token IDs to suppress. -1 will suppress a default set
                of symbols as defined in `tokenizer.non_speech_tokens()`.
            without_timestamps: Only sample text tokens.
            word_timestamps: Extract word-level timestamps using the cross-attention pattern
                and dynamic time warping, and include the timestamps for each word in each segment.
                Set as False.
            prepend_punctuations: If word_timestamps is True, merge these punctuation symbols
                with the next word
            append_punctuations: If word_timestamps is True, merge these punctuation symbols
                with the previous word
            multilingual: Perform language detection on every segment.
            vad_filter: Enable the voice activity detection (VAD) to filter out parts of the audio
                without speech. This step is using the Silero VAD model
                https://github.com/snakers4/silero-vad.
            vad_parameters: Dictionary of Silero VAD parameters or VadOptions class (see available
                parameters and default values in the class `VadOptions`).
            max_new_tokens: Maximum number of new tokens to generate per-chunk. If not set,
                the maximum will be set by the default max_length.
            chunk_length: The length of audio segments. If it is not None, it will overwrite the
                default chunk_length of the FeatureExtractor.
            clip_timestamps: Optionally provide list of dictionaries each containing "start" and
                "end" keys that specify the start and end of the voiced region within
                `chunk_length` boundary. vad_filter will be ignored if clip_timestamps is used.
            batch_size: the maximum number of parallel requests to model for decoding.
            hotwords:
                Hotwords/hint phrases to the model. Has no effect if prefix is not None.
            language_detection_threshold: If the maximum probability of the language tokens is
                higher than this value, the language is detected.
            language_detection_segments: Number of segments to consider for the language detection.

        Unused Arguments
            compression_ratio_threshold: If the gzip compression ratio is above this value,
                treat as failed.
            log_prob_threshold: If the average log probability over sampled tokens is
                below this value, treat as failed.
            no_speech_threshold: If the no_speech probability is higher than this value AND
                the average log probability over sampled tokens is below `log_prob_threshold`,
                consider the segment as silent.
            condition_on_previous_text: If True, the previous output of the model is provided
                as a prompt for the next window; disabling may make the text inconsistent across
                windows, but the model becomes less prone to getting stuck in a failure loop,
                such as repetition looping or timestamps going out of sync. Set as False
            prompt_reset_on_temperature: Resets prompt if temperature is above this value.
                Arg has effect only if condition_on_previous_text is True. Set at 0.5
            prefix: Optional text to provide as a prefix at the beginning of each window.
            max_initial_timestamp: The initial timestamp cannot be later than this, set at 0.0.
            hallucination_silence_threshold: Optional[float]
                When word_timestamps is True, skip silent periods longer than this threshold
                (in seconds) when a possible hallucination is detected. set as None.
        Returns:
          A tuple with:

            - a generator over transcribed segments
            - an instance of TranscriptionInfo
        iThe current model is English-only but the multilingual parameter is set toTrue; setting to False instead.Fsampling_rater   !Processing audio with duration %sN   )max_speech_duration_smin_silence_duration_msr   )r   r   zPNo clip timestamps found. Set 'vad_filter' to True or provide 'clip_timestamps'.)max_durationTc                 P    g | ]"}fd |                                 D             #S )c                 <    i | ]\  }}|t          |z            S r1   )r>   )rx   kvr   s      r(   
<dictcomp>zBBatchedInferencePipeline.transcribe.<locals>.<listcomp>.<dictcomp>  s,    GGGtq!CM)**GGGr*   )items)rx   segmentr   s     r(   r|   z7BatchedInferencePipeline.transcribe.<locals>.<listcomp>  sF        HGGGw}}GGG  r*   r   r      zSSegment %d is longer than 30 seconds, only the first 30 seconds will be transcribed)rm   r`   segmentsc              3   8   K   | ]}|d          |d         z
  V  dS )r   r   Nr1   )rx   r   s     r(   	<genexpr>z6BatchedInferencePipeline.transcribe.<locals>.<genexpr>  s0      SS''"22SSSSSSr*   VAD filter removed %s of audioc                 X    g | ]&}j                             |          d ddf         'S ).Nr   )ri   feature_extractor)rx   chunkr'   s     r(   r|   z7BatchedInferencePipeline.transcribe.<locals>.<listcomp>  s6    UUUuTZ))%00crc:UUUr*   enr   g      float32)dtypeaxisr   r   r   ,Detected language '%s' with probability %.2f`The current model is English-only but the language parameter is set to '%s'; using 'en' instead.r   r^   c                 ,    g | ]}t          |          S r1   )r   )rx   features     r(   r|   z7BatchedInferencePipeline.transcribe.<locals>.<listcomp>  s     CCCwk'**CCCr*   rA   rB   rC   rD   rE   rF   rG   rH   rI   rL   rM   rN   rO   rP   rT   rU   rW   rZ   rS   rY   rJ   rX   rK   r   rV   rQ   rR   rh   r^   r_   r`   ra   rc   rd   rb   r1   )&ri   r   r   is_multilingualloggerwarning
isinstancenpndarrayr   r   infor   r   r   rt   keyspopr   RuntimeErrorr   r   r   sumr   concatenatefulln_melsr   hf_tokenizerstackr@   listtupleget_suppressed_tokensr]   _batched_segments_generatorrestore_speech_timestamps)5r'   r   r^   r   r   rA   rB   rC   rD   rE   rF   r<   rI   rG   rH   rJ   rK   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   r   r   rW   r   rX   rY   r   rZ   r   r   r`   clip_timestamps_providedaudio_chunksr   r   clipclip_durationra   r   rb   r_   ro   r   r   r   r   s5   `                                                   @r(   r   z#BatchedInferencePipeline.transcribe   s   v 
4B 	!
 0 @ 	!J%%2   !L%,, 	E mDDDE;q>M1
/1A(1K1K	
 	
 	
 $Ptz'C'P 7	 !)%/.:03& & &NN  55 ..2E2E2G2GGG&**+BCCC%/ & &(& &@L& & &N #8~"N"NL((-.u{1~#F#F"G"M  
 (-$,:\- - -)L//
 (,$   .  O
 -//L$_55  4##E$w-$u+*E$FGGG!%etG}!< M 2%%J%--H    &&"&w--"?$1%)F     SS?SSSSS 	
 	
,X(::;;	
 	
 	
 "UUUUUUUU 	 ":#3 '($$ J..^ GTZ%5%<a$@$iXXX    1L1M / 
 
	(& 
!&&B(    :#3  D8H8H
!))*,45    #$ J#J,	
 
 
	 IQXBHCC(CCCDDDVX 	 ' #
 #
 #
i#
G#
 X#
 *>	#

  21#
 "6!5#
  21#
 !4 3#
 )D(C#
 kD%=99#BQB!]#
 *>#
  6!#
" *>##
( #%%iAAA$+#
. "6!5/#
0 !4 31#
2 *>3#
4 X5#
6 ,O7#
8 -1D9#
: (-u;#
< ,O=#
> ),?#
@ &A#
B  21C#
D #&#E#
J !!51")&1
 
 
 33
 
 ( 	0/= H ~r*   c              #     K   t          t          |          | d          }d}t          dt          |          |          D ]}	|                     ||	|	|z            |||	|	|z            |          }
|
D ]}|D ]}|dz  }t	          |d         ||d         t          |d         d          t          |d         d          |j        sd nd	 |d
         D             |d         |d         |d         |d         |j        d                   V  |                    d           |	                                 d| _
        d S )Nr   )totaldisablepositionr   r5   r6   r      r   c                 &    g | ]}t          d i |S r1   r   rx   r    s     r(   r|   zHBatchedInferencePipeline._batched_segments_generator.<locals>.<listcomp>]  s"    !L!L!L4$,,,,!L!L!Lr*   r;   r7   r8   r:   r9   )r5   r4   r6   r   r   r;   r7   r8   r:   r9   r<   rh   )r   r   r   r   r3   roundrS   rL   updatecloserj   )r'   r   ro   r   r   r   r   pbarseg_idxr   r   r   r   s                r(   r  z4BatchedInferencePipeline._batched_segments_generatorD  s      #h--\1AANNNq#h--44 	 	AllQ^+,A
N 23	 G "  %  GqLG!$V_"$V_#GG$4a88!'%.!44 $+#:MDD!L!L77;K!L!L!L&x0$+M$:'./?'@*12E*F$+$8$;      $ A+. 	

%("""r*   )r+   r,   r-   rk   r   r   r   r   r@   r}   r   r0   r   r
   r[   r>   r.   r	   r   r   rt   r   r3   r]   r   r  r1   r*   r(   rf   rf   o   s^       ) ) )5! 5! 5!nN&*N& N& &	N& N& N& N&f #' " !$%$%E
 E
 E
 8;.2/2+/-0>B $#02t#''* %$2#E"<@(,&*04;?"&8;+,YD DS(BJ./D 3-D 	D
 D D D D D "D "D 5$u+uUCZ/@@AD( &.e_)D* %UO+D, &e_-D. %)/D0 &+1D2 !sHSM'9!:;3D4 5D6 7D8 "$s),9D: !;D<  %=D> ?D@ "ADB !CDD EDF GDH !tZ'7!89IDJ !KDL smMDN "$t*-ODP *2%QDR SDT 3-UDV '/uoWDX &)YDZ 
x "33	4[D D D DL
%) %) %) %) %)r*   rf   c            K          e Zd Z	 	 	 	 	 	 	 	 	 	 dadeded	eeee         f         d
edededee         dede	dee         deeeef                  fdZ
edee         fd            Zdbde	fdZdddddddddg ddddddddddgdddd d!dddddd"ddddf"d#eeeej        f         d$ee         d%ed&ed'ed(ed)ed*ed+ed,ed-eeee         eed.f         f         d/ee         d0ee         d1ee         d2ed3ed4eeeee         f                  d5ee         d6ed7eee                  d8ed9ed:ed;ed<ed=ed>ed?eee	ef                  d@ee         dAee         dBeeee         f         dCee         dDee         dEee         dFedeee         ef         fHdGZdHedIee         dJedKedLedMedeee                  fdNZ	 dbdOej        dHedPedQeej                 dee         f
dRZdOej        dej        fdSZdQej        dTee         dHedPedeej        j         eeef         f
dUZ!	 	 	 dcdHedVee         d8ed5ee         dDee         dee         fdWZ"dXee	         dHedQej        dYed;ed<edZedefd[Z#	 dddHed]ee         dQej        dYed^edee	         fd_Z$	 	 	 	 	 	 ded#eej                 dOeej                 d>ed?ee	ef         dFedEedeeeeeeef                  f         fd`Z%dS )fWhisperModelautor   defaultr   NFmodel_size_or_pathdevicedevice_indexcompute_typecpu_threadsnum_workersdownload_rootlocal_files_onlyfilesrevisionuse_auth_tokenc           
         t                      | _        d\  }}|	r/|}|	                    dd          }|	                    dd          }n6t          j                            |          r|}nt          ||||
|          }t          j        j	        |f||||||	d|| _
        t          j                            |d          }|r%t          j                            |          | _        nyt          j                            |          r%t          j                            |          | _        n5t          j                            d| j
        j        rdnd	z             | _        |                     ||          | _        t-          di | j        | _        d
| _        | j        j        | j        z  | _        | j        j        | j        j        z  | _        | j        j        | j        z  | _        d| _        d| _        dS )a>	  Initializes the Whisper model.

        Args:
          model_size_or_path: Size of the model to use (tiny, tiny.en, base, base.en,
            small, small.en, distil-small.en, medium, medium.en, distil-medium.en, large-v1,
            large-v2, large-v3, large, distil-large-v2, distil-large-v3, large-v3-turbo, or turbo),
            a path to a converted model directory, or a CTranslate2-converted Whisper model ID from
            the HF Hub. When a size or a model ID is configured, the converted model is downloaded
            from the Hugging Face Hub.
          device: Device to use for computation ("cpu", "cuda", "auto").
          device_index: Device ID to use.
            The model can also be loaded on multiple GPUs by passing a list of IDs
            (e.g. [0, 1, 2, 3]). In that case, multiple transcriptions can run in parallel
            when transcribe() is called from multiple Python threads (see also num_workers).
          compute_type: Type to use for computation.
            See https://opennmt.net/CTranslate2/quantization.html.
          cpu_threads: Number of threads to use when running on CPU (4 by default).
            A non zero value overrides the OMP_NUM_THREADS environment variable.
          num_workers: When transcribe() is called from multiple Python threads,
            having multiple workers enables true parallelism when running the model
            (concurrent calls to self.model.generate() will run in parallel).
            This can improve the global throughput at the cost of increased memory usage.
          download_root: Directory where the models should be saved. If not set, the models
            are saved in the standard Hugging Face cache directory.
          local_files_only:  If True, avoid downloading the file and return the path to the
            local cached file if it exists.
          files: Load model files from the memory. This argument is a dictionary mapping file names
            to file contents as file-like or bytes objects. If this is set, model_path acts as an
            identifier for this model.
          revision:
            An optional Git revision id which can be a branch name, a tag, or a
            commit hash.
          use_auth_token: HuggingFace authentication token or True to use the
            token stored by the HuggingFace config folder.
        )NNztokenizer.jsonNpreprocessor_config.json)r"  	cache_dirr$  r%  )r  r  r  intra_threadsinter_threadsr#  zopenai/whisper-tiny z.enr#   g{Gz?i  r1   ) r   r   r   ospathisdirr   ctranslate2modelsWhisperri   join
tokenizersr   from_bufferr   isfile	from_filefrom_pretrainedr   _get_feature_kwargsfeat_kwargsr   r   input_stride
hop_lengthnum_samples_per_tokenr   rw   tokens_per_secondtime_precisionr   )r'   r  r  r  r  r  r   r!  r"  r#  r$  r%  model_kwargstokenizer_bytespreprocessor_bytes
model_pathtokenizer_files                    r(   rk   zWhisperModel.__init__m  s   d !ll.8++ 	+J#ii(8$??O!&+Et!L!LW]]-.. 		+JJ'"!1'!-  J !'/	
%%%%	
 	
 	
 	

 j2BCC 	 * 4 @ @ Q QDW^^N++ 	 * 4 > >~ N ND * 4 D D%tz/I)TuU! !D  33J@RSS!1!E!ED4D!E!E"-0AA 	" "0D4J4UU 	 "0D4NN 	 #r*   r   c                 H    | j         j        rt          t                    ndgS )z%The languages supported by the model.r   )ri   r   r   r   r&   s    r(   supported_languagesz WhisperModel.supported_languages  s#     )-
(BNtO$$$Nr*   c                 J   i }	 t           j                            |d          }|rt          j        |          }n`t           j                            |          r?t          |dd          5 }t          j        |          }d d d            n# 1 swxY w Y   n|S t          t          j
                  j                                        fd|                                D             S # t          j        $ r%}| j                            d|           Y d }~nd }~ww xY w|S )Nr'  rutf-8)encodingc                 $    i | ]\  }}|v 	||S r1   r1   )rx   r   r   
valid_keyss      r(   r   z4WhisperModel._get_feature_kwargs.<locals>.<dictcomp>  s$    GGGTQqJAqr*   z&Could not load preprocessor config: %s)r,  r-  r2  jsonloadsr5  openloadr   r   rk   
parametersr   r   JSONDecodeErrorr   r   )r'   rB  rA  configconfig_pathfileerK  s          @r(   r8  z WhisperModel._get_feature_kwargs  sg   	M',,z3MNNK! $677,, +sW=== -!Yt__F- - - - - - - - - - - - - - - "#3#<==HMMOOJGGGGV\\^^GGGG# 	M 	M 	MK H!LLLLLLLL	M sC   A(C, -BC, BC, BC, AC, ,D ;DD r   r   r   r   r   r   Tr   r   r   r   r   0r   r^   r   r   rA   rB   rC   rD   rE   rF   r<   .rI   rG   rH   rJ   rK   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   r   r   rW   r   rX   rY   rZ   r   r   c$           
      ~  4 | j         j        4|r(| j        j        s| j                            d           d}t          |t          j                  st          |4          }|j
        d         4z  }$|$}%| j                            dt          |$                     |r|dk    r|t                      }n!t          |t                    rt          d2i |}t          ||          }&t!          ||&          \  }'}(t          j        |'d          }|j
        d         4z  }%| j                            d	t          |$|%z
                       | j                            t&          j                  r:| j                            d
d                    4fd|&D                                  nd}&|                      ||          })d}*d}+|| j        j        sd}d},nt          |t.                    r(t1          |                    d          d                   n|d         }-|)j
        d         dz
  }.|-| j        z  |.k     rt7          |-| j        z            nd}/|                     |)d|/df         |#|"          \  }},}+| j                            d||,           n3| j        j        s%|dk    r| j                            d|z             d}d},t;          | j        | j        j        ||          }0t?          d2i d|d|d|d|d|	d|
d|d|d|d |d!|d"t          |t@          tB          f          r|n|gd#|d$|d%|d&|rtE          |0|          n|d'|d(|d)|d*|d+|d,|d-|d.|d/| d0|!}1| #                    |)|0|1||*          }2|&rtI          |2|&4          }2tK          ||,|$|%|1||+1          }3|2|3fS )3a'  Transcribes an input file.

        Arguments:
          audio: Path to the input file (or a file-like object), or the audio waveform.
          language: The language spoken in the audio. It should be a language code such
            as "en" or "fr". If not set, the language will be detected in the first 30 seconds
            of audio.
          task: Task to execute (transcribe or translate).
          log_progress: whether to show progress bar or not.
          beam_size: Beam size to use for decoding.
          best_of: Number of candidates when sampling with non-zero temperature.
          patience: Beam search patience factor.
          length_penalty: Exponential length penalty constant.
          repetition_penalty: Penalty applied to the score of previously generated tokens
            (set > 1 to penalize).
          no_repeat_ngram_size: Prevent repetitions of ngrams with this size (set 0 to disable).
          temperature: Temperature for sampling. It can be a tuple of temperatures,
            which will be successively used upon failures according to either
            `compression_ratio_threshold` or `log_prob_threshold`.
          compression_ratio_threshold: If the gzip compression ratio is above this value,
            treat as failed.
          log_prob_threshold: If the average log probability over sampled tokens is
            below this value, treat as failed.
          no_speech_threshold: If the no_speech probability is higher than this value AND
            the average log probability over sampled tokens is below `log_prob_threshold`,
            consider the segment as silent.
          condition_on_previous_text: If True, the previous output of the model is provided
            as a prompt for the next window; disabling may make the text inconsistent across
            windows, but the model becomes less prone to getting stuck in a failure loop,
            such as repetition looping or timestamps going out of sync.
          prompt_reset_on_temperature: Resets prompt if temperature is above this value.
            Arg has effect only if condition_on_previous_text is True.
          initial_prompt: Optional text string or iterable of token ids to provide as a
            prompt for the first window.
          prefix: Optional text to provide as a prefix for the first window.
          suppress_blank: Suppress blank outputs at the beginning of the sampling.
          suppress_tokens: List of token IDs to suppress. -1 will suppress a default set
            of symbols as defined in `tokenizer.non_speech_tokens()`.
          without_timestamps: Only sample text tokens.
          max_initial_timestamp: The initial timestamp cannot be later than this.
          word_timestamps: Extract word-level timestamps using the cross-attention pattern
            and dynamic time warping, and include the timestamps for each word in each segment.
          prepend_punctuations: If word_timestamps is True, merge these punctuation symbols
            with the next word
          append_punctuations: If word_timestamps is True, merge these punctuation symbols
            with the previous word
          multilingual: Perform language detection on every segment.
          vad_filter: Enable the voice activity detection (VAD) to filter out parts of the audio
            without speech. This step is using the Silero VAD model
            https://github.com/snakers4/silero-vad.
          vad_parameters: Dictionary of Silero VAD parameters or VadOptions class (see available
            parameters and default values in the class `VadOptions`).
          max_new_tokens: Maximum number of new tokens to generate per-chunk. If not set,
            the maximum will be set by the default max_length.
          chunk_length: The length of audio segments. If it is not None, it will overwrite the
            default chunk_length of the FeatureExtractor.
          clip_timestamps:
            Comma-separated list start,end,start,end,... timestamps (in seconds) of clips to
             process. The last end timestamp defaults to the end of the file.
             vad_filter will be ignored if clip_timestamps is used.
          hallucination_silence_threshold:
            When word_timestamps is True, skip silent periods longer than this threshold
             (in seconds) when a possible hallucination is detected
          hotwords:
            Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.
          language_detection_threshold: If the maximum probability of the language tokens is higher
           than this value, the language is detected.
          language_detection_segments: Number of segments to consider for the language detection.
        Returns:
          A tuple with:

            - a generator over transcribed segments
            - an instance of TranscriptionInfo
        r   Fr   r   r   rV  Nr   r   z0VAD filter kept the following audio segments: %sz, c              3      K   | ]9}d t          |d         z            dt          |d         z            dV  :dS )[r   z -> r   ]N)r   )rx   r   r   s     r(   r   z*WhisperModel.transcribe.<locals>.<genexpr>  sq         "E -U7^m-KLLLL,U5\M-IJJJJ     r*   )r   r   r   ,r   .r   r   r   r   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r   r1   )&r   r   ri   r   r   r   r   r   r   r   r   r   r   r   rt   r   r   r   isEnabledForloggingDEBUGdebugr2  r0   r.   splitrw   r>   r   r   r   r@   r   r  r  generate_segmentsr  r]   )5r'   r   r^   r   r   rA   rB   rC   rD   rE   rF   r<   rI   rG   rH   rJ   rK   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   r   r   rW   r   rX   rY   rZ   r   r   r`   ra   speech_chunksr  r   r   r   rb   r_   start_timestampcontent_framesr5   ro   r   r   r   r   s5                                                       @r(   r   zWhisperModel.transcribe  s   n .< 	!
 : 	!K2   !L%,, 	E mDDDE;q>M1%/1A(1K1K	
 	
 	
  	!/S00%!+ND11 >!+!=!=n!=!=1%HHM,:5-,P,P)L/N<a888E!&Q-!?K0 ,>!>??  
 {''66 !!FII     &3    
 
 
 !M))%l)KK! :- '($$ "/377,E///44Q7888(+  
 "*!3a!7 ')??.PP $*@@AAA  ((%c455j10K1M )  	(&   B(    :-  (d2B2B##*,45    #$ J&	
 
 
	 ' !
 !
 !
i!
G!
 X!
 *>	!

  21!
 "6!5!
  21!
 !4 3!
 )D(C!
 (B'A!
 )D(C!
  *+e}EEXK=!
 *>!
  6!!
" *>#!
( #%%iAAA$+!
.  21/!
0 #8"71!
2 ,O3!
4 "6!55!
6 !4 37!
8 &9!
: *>;!
< ,O=!
> -L,K?!
@ XA!
F ))i,
 
  	Y0=-XXH !51")&1
 
 
 ~r*   ro   r7   rp   rq   rr   r5   c           	         g }t                    dk    o d         j        cxk     od         k    nc }fdt          t                              D             }	t          |	          dk    rt          |	          }
|r"|
                    t                               d}|
D ]n}||         }|d         j        z
  }|d         j        z
  }||| j        z  z   }||| j        z  z   }|                    t          ||||                     |}o|r||z  }n|dz
           j        z
  }||| j        z  z  }n||}fdD             }t          |          dk    r+|d         j        k    r|d         j        z
  }|| j        z  }|                    t          ||||z                        ||z  }|||fS )	Nr#   r   c                 j    g | ]/}|d k    r'|         j         k    r|dz
           j         k    -|0S )r   r   timestamp_begin)rx   r   ro   r7   s     r(   r|   z>WhisperModel._split_segments_by_timestamps.<locals>.<listcomp>  sV     "
 "
 "
1uuq	Y666q1u!:::	  ;::r*   r   )r5   r   r   r7   r   c                 *    g | ]}|j         k    |S r1   rh  rx   tokenro   s     r(   r|   z>WhisperModel._split_segments_by_timestamps.<locals>.<listcomp>;  s-       Ui6O-O-O-O-O-Or*   )r   ri  r   r   r   r>  rt   r:  )r'   ro   r7   rp   rq   rr   r5   current_segmentsr   consecutive_timestampsslices
last_slicecurrent_slicesliced_tokensstart_timestamp_positionend_timestamp_position
start_timeend_timelast_timestamp_positionr`   
timestampss    ``                  r(   r   z*WhisperModel._split_segments_by_timestamps   s    KK1Ui.G!U!U!U!U6RT:!U!U!U!U 	 "
 "
 "
 "
 "
3v;;''"
 "
 "
 %&&**011F& +c&kk***J!' + + &z-'? @+8+;i>W+W()6r):Y=V)V&":T=P"PP  ')?$BU)UU ''!($,	     +

& D$ :>*Y-FF ( /$2CCC (H   #)  J :""z"~9R'R'R*4R.9;T*T'2T5HH##%#h.!	     L D'>>>r*   r   r   r   c              #      67K   |j         d         dz
  }t          | j        j        z            }t	          |j        t                    r2d |j        r|j                            d          ng D             |_         fd|j        D             }t          |          dk    r|	                    d           t          |          dz  dk    r|	                    |           t          t          |d d d         |dd d                             }	d6d}
d}|	|         d         }g }d}|j        {t	          |j        t                    rGd	|j                                        z   }|                    |          }|                    |           n|                    |j                   t!          |d
|           }d}|t          |	          k     r|	|         \  }}||k    r|}||k     r|}||k    r'|dz  }|t          |	          k     r|	|         d         }\| j        j        z  }t          | j        j        z    j        j        z            }t%           j        j        ||z
  ||z
            }|d d |||z   f         }| j        j        z  }t'          |          } j                            t,          j                  r( j                            dt3          |                     ||d          }|dk    s|                     |          }|j        r[ j                            |          }|d         d         \  }}|dd         }|j                            |          |_        ||_          !                    |||j"        |dk    r|j#        nd |j$                  } %                    ||||          \  } }!}"}#|j&        S| j'        |j&        k    }$|j(        |!|j(        k    rd}$|$r- j                            d| j'        |j&                   ||z  }a| j)        d         }%|}&dtT          dt          fd7dtV          tT                   dtX          f67fd}'dtZ          tT                   dtV          tT                   fd}( .                    ||%||||          \  })}}*|j/        rԉ 0                    |)g||||j1        |j2        |           |*s.tg          |)          }+|+|+|k    rti          |+ j5        z            }|j6        b|j6        }, |(|)          }-|-8 |'|-          r-|-d         |z
  }.|.|,k    r|&ti          |. j5        z            z   }|}/to          t          |)                    D ]}0|)|0         }|d         s |'|          r |(|)|0dz   d                    }1|1|1d         d         d         }2n||z   }2|d         |/z
  |,k    p|d         |,k     p|d         |z
  dk     }3|2|d         z
  |,k    p |'|1          p||d         z
  dk     }4|3rJ|4rHti          tq          |dz   |d                    j5        z            }||d         z
  |,k     r|}g |)|0d <    n	|d         }/tg          |)          }+|+|+}|)D ]}|d         }%|9                    |%          }5|d         |d         k    s|5                                sF|                    |%           |
dz  }
tu          |
|&|d         |d         |5|%|"|!|#| j'        |j/        rd  |d         D             nd !          V  |j;        r|"|j<        k    r7|j;        r! j                            d"|"|j<                   t          |          }|=                    t%          ||          |&z
   j        j        z             |t          |	          k     |>                                 d S )#Nr   r   c                 ,    g | ]}t          |          S r1   )r.   )rx   tss     r(   r|   z2WhisperModel.generate_segments.<locals>.<listcomp>[  s.     ' ' ' b		' ' 'r*   r[  c                 >    g | ]}t          |j        z            S r1   )r  rw   )rx   r{  r'   s     r(   r|   z2WhisperModel.generate_segments.<locals>.<listcomp>d  s6     "
 "
 "
35E"t--.."
 "
 "
r*   r   r#   u*   "'“¿([{-"'.。,，!！?？:：”)]}、 seconds)r
  unitr  rh   zProcessing segment at %srf  )rQ   rN   rZ   Fz$No speech threshold is met (%f > %f)r    r   c                     |                      dd          }| d         | d         z
  }d}|dk     r|dz  }|dk     r|d|z
  dz  z  }|d	k    r||d	z
  z  }|S )
Nr!   rh   r   r   g333333?r   g/$?          @)get)r    r!   r`   scores       r(   word_anomaly_scorez:WhisperModel.generate_segments.<locals>.word_anomaly_score  s{    "hh}c::;g6%%SLEe##eh."44Ec>>X^+Er*   r   c                     | | d         sdS fd| d         D             }|d d         }t          fd|D                       }|dk    p|dz   t          |          k    S )Nr;   Fc                 (    g | ]}|d          v|S )r    r1   )rx   wpunctuations     r(   r|   zNWhisperModel.generate_segments.<locals>.is_segment_anomaly.<locals>.<listcomp>  s'    UUUq&	8T8T8T8T8Tr*   r   c              3   .   K   | ]} |          V  d S Nr1   )rx   r  r  s     r(   r   zMWhisperModel.generate_segments.<locals>.is_segment_anomaly.<locals>.<genexpr>  s/      AAa..q11AAAAAAr*   r  g{Gz?)r   r   )r   r;   r  r  r  s      r(   is_segment_anomalyz:WhisperModel.generate_segments.<locals>.is_segment_anomaly  s    ?''*:? 5UUUUGG$4UUUbqb	AAAA5AAAAAz?UT\SZZ%??r*   r   c                 6    t          d | D             d           S )Nc              3   *   K   | ]}|d          
|V  dS )r;   Nr1   )rx   ss     r(   r   zMWhisperModel.generate_segments.<locals>.next_words_segment.<locals>.<genexpr>  s+      ??1AgJ?Q??????r*   )next)r   s    r(   next_words_segmentz:WhisperModel.generate_segments.<locals>.next_words_segment  s!    ?????FFFr*   rn   )rj   r   r;   r  r   r7   c                 &    g | ]}t          d i |S r  r  r  s     r(   r|   z2WhisperModel.generate_segments.<locals>.<listcomp>V  s"    CCC$CCCr*   )r4   r5   r   r   r6   r7   r<   r8   r9   r:   r;   zBReset prompt. prompt_reset_on_temperature threshold is met %f > %f)?r   r.   r   time_per_framer   rX   r0   r`  r   r   r   r~   rM   stripr   extendr   nb_max_framesminr   r   r\  r]  r^  r_  r   rV   ri   r   ro   r   r^   language_coder   rQ   rN   rZ   generate_with_fallbackrH   r:   rG   r   rt   r
   r[   r	   r   rS   r   rT   rU   r   r  rw   rY   r   maxru   r3   rJ   rK   r  r  )8r'   r   ro   r   r   r   rd  content_durationseek_points
seek_clipsidxclip_idxr5   
all_tokensprompt_reset_sincerM   initial_prompt_tokensr  rj   seek_clip_startseek_clip_endrp   window_end_timerq   r   rr   r   r   r   r_   r^   r   r   r8   r<   r9   should_skipr7   previous_seekr  r  rm  r   last_word_end	thresholdfirst_segmentgaphal_last_endsinext_segmenthal_next_startsilence_beforesilence_afterr6   r  r  s8   `                                                     @@r(   ra  zWhisperModel.generate_segmentsO  s
      "+a/ $2H2W!WXXg-s33 	' ' .G+11#666' ' 'G#"
 "
 "
 "
9@9P"
 "
 "
 {q  q!!!{a1$$~...,0CCaC +add"344-
 -

 E(#A&
!-'0#66 :!$w'='C'C'E'E!E(1(8(8(H(H%!!"78888!!'"8999*DTUUU #
 Z((-7-A*O]~-- .o%%&}$$Ac*oo--%h/2D!7!FFK#.<<(78 O &4%$ L
 qqq$)<"<<=G+d.D.SS!'**G{''66 !!.0@0M0M   ));)<)<=Oaxx>1!%W!5!5# 3*44^DD7>qz!}4 4)!B$/%.%8%D%D^%T%T	"*2	'__#*#=)-w~~ ) %  F ++NFIwWW! *6$3g6QQ .:#g&@@@ #(K 	K%%>-3   L(D)!,F M
 
% 
 
 
 
@HTN @t @ @ @ @ @ @ @GT$Z GHTN G G G G 22#')!1 3  	 ' & A:((%&" 0/*? )    / M$+,<$=$=M$0][5P5P$]T5K%KLL :F ' GI %7$67G$H$HM$05G5G5V5V0+G4{B??#05t?U9U3V3V#VD$ $9L#C(8$9$9:: 6 6"22"6&w/ %$--g66 &+=+= 0a :, ,L  ,71=g1Fq1I'1R1<?O1O ' 0< ?) K !H#*7#3i#?!H#*7#3k#AC#G + !/ ?) K !J#5#5l#C#C!J#2WU^#Cc#I *
  . &- &',$'a9I$J$J&*&<%=(" (" $4gen#Dy#P#P+9D8: 0 5 %'.u~ '(8 9 9 ,,9)+   * ''//7#wu~55TZZ\\5!!&)))q&!'*! + +&7#)#8 #2"CC''2BCCCC!      & 65!DDD5 K%%\#;   &)__"KK^T**]:(78  I Z((P 	

r*   c                     | j         j        dk    ot          | j         j                  dk    }|j        dk    rt          j        |d          }t          |          }| j                             ||          S )Ncudar   r#   r   )to_cpu)	ri   r  r   r  ndimr   expand_dimsget_ctranslate2_storager   )r'   r   r  s      r(   r   zWhisperModel.encodeo  sq     "f,QTZ5L1M1MPQ1Q=A~h22H*844z  & 999r*   r   c                    d }g }g }t          t          |j        | j        z                      }|j        t          |          |j        z   }	n| j        }	|	| j        k    rFt          dt          |           d|	t          |          z
   d|	 d| j         d| j         d          |j        D ]}
|
dk    rd|j	        d|
d	}n|j
        |j        d
} | j        j        ||gf|j        |j        |j        |	dd|j        |j        |d	|d         }|j        d         }t          |          }|j        d         ||j        z  z  }||dz   z  }|                    |                                          }t/          |          }|||
|f}|                    |           d}|j        E||j        k    r%d}| j                            d|
||j                   n|                    |           |j        /||j        k     r$d}| j                            d|
||j                   |j        $|j        |j        k    r|j        ||j        k     rd}|s n.t?          |p|d           }|d         |d         |
|d         f}|S )Nr   r   r   r   r   r   r   r   )rA   num_hypothesessampling_topkr   )rA   rC   T)	rD   rE   rF   r   r   r   rO   rP   max_initial_timestamp_indexFzFCompression ratio threshold is not met with temperature %.1f (%f > %f)zDLog probability threshold is not met with temperature %.1f (%f < %f)c                     | d         S )Nr   r1   )xs    r(   <lambda>z5WhisperModel.generate_with_fallback.<locals>.<lambda>  s
    1 r*   keyr  ) r>   r  rR   r>  rW   r   r   r   rL   rB   rA   rC   ri   r   rD   rE   rF   rO   rP   r   r   ru   r  rv   r   rI   r   r_  rG   rH   r:   r  )r'   r   r   ro   r   decode_resultall_resultsbelow_cr_threshold_resultsr  r   r<   kwargsr   r7   r   r   r8   r6   r9   needs_fallbacks                       r(   r  z#WhisperModel.generate_with_fallbackz  s    %'"&)'/$2EEFF'
 '
# !-Vw'==JJJ''Qs6{{ Q QF+Q Q,6Q Q 7;oQ Q ?CoQ Q Q   #/ `	 `	KQ!"&-o%&,7	  ")!2 ' 0 
 )TZ(  '5#*#=%,%A%"&*&5 ' 7,G    F )!,F &kkG -*gw7M.MNK%15K##F++1133D 5d ; ; !	M }---"N2>$w'JJJ%)NK%%`#);	    /55mDDD *6'"<<<!%!!Z.	   +7)G,GGG.:'"<<<!&!   *9k~~  M
 a a a 	M r*   r   c                    g }|s|r|s|                     |j                   |rq|so|                    d|                                z             }t	          |          | j        dz  k    r|d | j        dz  dz
           }|                    |           |r)|                    || j        dz  dz
   d                     |                    |j                   |r|                     |j                   |r|                    d|                                z             }t	          |          | j        dz  k    r|d | j        dz  dz
           }|s|                     |j	                   |                    |           |S )Nr}  r#   r   )
r   sot_prevr   r  r   r   r  sot_sequenceno_timestampsri  )	r'   ro   r   rQ   rN   rZ   r   hotwords_tokensprefix_tokenss	            r(   r   zWhisperModel.get_prompt  s     	Nx 	N 	NMM),--- / /"+"2"239I9I3I"J"J''4?a+???&56P18Lq8P6P&QOo... No10Dq0H.I.K.KLMMMi,--- 	3MM)1222 	)%,,S6<<>>-ABBM=!!T_%999 -.H10Dq0H.H I% 9i7888MM-(((r*   r   
num_framesrj   c                    t          |          dk    rd S g }g }	|D ]d}
fd|
D             }|                    t          t          j                            |                               |	                    |           e|                     |||          }g }|D ]\}t          j        d |D                       }||	                                         }t          |          dk    rt          j
        |          nd}t          dt          |                    }|dz  }t          |          dk    rd}t          dt          |                    D ]{}||         d	         ||         d
         z
  |k    rX||         d         |v r||         d
         |z   ||         d	<   N||dz
           d         |v r||         d	         |z
  ||         d
<   |t          |||           |                    ||f           ^t          |          D ]x\  }}
d}|
d         d         | j        z  }||         \  }}t          |
          D ]>\  }}d}g }|t          ||                   k     r|t          |	|         |                   k     r||         |         }|d         r`|                    t#          |d         t%          ||d
         z   d          t%          ||d	         z   d          |d                              |t          |d                   z  }|dz  }|t          ||                   k     r|t          |	|         |                   k     t          |          dk    r|d         d	         |z
  |dz  k    r|d         d	         |d         d
         z
  |k    s7t          |          dk    r|d         d	         |d         d
         z
  |dz  k    rt          |          dk    re|d         d	         |d         d
         z
  |k    rDt'          |d         d	         dz  |d         d	         |z
            }|x|d         d	<   |d         d
<   t'          d|d         d	         |z
            |d         d
<   |d
         |d         d	         k     rX|d
         dz
  |d         d
         k    r=t'          dt          |d         d	         |z
  |d
                             |d         d
<   n|d         d
         |d
<   |d	         |d         d
         k    rJ|d	         dz   |d         d	         k     r/t'          |d         d
         |z   |d	                   |d         d	<   n|d         d	         |d	<   |d	         }|||         |         d<   @z|S )Nr   c                 8    g | ]}fd |d         D             S )c                 *    g | ]}|j         k     |S r1   )eotrk  s     r(   r|   z?WhisperModel.add_word_timestamps.<locals>.<listcomp>.<listcomp>0  s%    RRR5EIM<Q<Q<Q<Q<Qr*   r7   r1   )rx   ry   ro   s     r(   r|   z4WhisperModel.add_word_timestamps.<locals>.<listcomp>/  sC        SRRRJx$8RRR  r*   c                 0    g | ]}|d          |d         z
  S )r   r   r1   r  s     r(   r|   z4WhisperModel.add_word_timestamps.<locals>.<listcomp><  s%    CCCetG},CCCr*   rh   gffffff?r#   u   .。!！?？r   r   r   r    r5   r!   )r    r   r   r!   r7      r   r   r;   )r   r   r   	itertoolschainfrom_iterablefind_alignmentr   arraynonzeromedianr  r.   r   merge_punctuationsr   rw   rt   r  r  )r'   r   ro   r   r  rT   rU   rj   text_tokenstext_tokens_per_segmentr   segment_tokens
alignmentsmedian_max_durations	alignmentword_durationsmedian_durationr   sentence_end_marksr   segment_idx
word_indexrp   subsegment_idxry   saved_tokensr;   timingboundarys     `                          r(   r   z WhisperModel.add_word_timestamps  so    x==AF"$ 	; 	;G   ")  N tIO$A$A.$Q$QRRSSS#**>::::(({NJ
 

  "# 	I 	IIXCCCCC N ,N,B,B,D,DEN-0-@-@1-D-D	.)))#  "#u_'='=>>O*Q.L >""Q&&%3" q#i..11 W WA |E*Yq\'-BB\QQ$Q</3EEE2;A,w2G,2VIaL//&q1u-f59KKK4=aL4G,4VIaL1y*>@STTT '',(GHHHH$-h$7$7 J	G J	G KJ!!*V,t/EEK,@,M)O\.7.@.@ FG FG*
  3z+'>#?#???LSV+K8HT T E E (4Z@Ff~  %+F^&+K&/,I1&M&M$)+u*Eq$I$I,2=,A	     !Cx(8$9$99L!OJ! !3z+'>#?#???LSV+K8HT T E E( u::>> Qx-.0?!0CD D a%(7*;;lJJJJNN %a%(72C ClUVFV V V  JJNN %a%(72C Cl R R'* %a! 3U1Xe_|5S( (H CKJE!HUOeAhw.?,/58E?\3Q,R,Ra) #7+eAhuo==&w/#5a8III,/a/ A:gCVWW- -a))
 /4Ahw.?
7+ #5)E"Ig,>>>&u-3eBi6FFF+.!"Ig.@*UBS, ,b	%(( -2"Ie,<
5),6u,=)AF%n5g>>MFGN %$r*      r  median_filter_widthc                 4   t          |          dk    rg S | j                            ||j        |||          }g }t	          ||          D ]\  }}	|j        |j        }
t          j        d |
D                       }t          j        d |
D                       }|	                    |	|j
        gz             \  }}t          |          dk    r|                    g            t          j        t          j        d |d d         D                       d          }t          |          dk    r|                    g            t          j        t          j        |          dd	                              t                     }||         | j        z  }||d d                  }||dd                   }fd
t	          |d d         |dd                    D             }|                    d t	          |||||          D                        |S )Nr   )r  c                     g | ]
}|d          S r   r1   rx   pairs     r(   r|   z/WhisperModel.find_alignment.<locals>.<listcomp>      $D$D$DT!W$D$D$Dr*   c                     g | ]
}|d          S )r   r1   r  s     r(   r|   z/WhisperModel.find_alignment.<locals>.<listcomp>  r  r*   r   c                 ,    g | ]}t          |          S r1   r   rx   ts     r(   r|   z/WhisperModel.find_alignment.<locals>.<listcomp>  s    <<<a3q66<<<r*   r   )r   r   )constant_valuesc                 N    g | ]!\  }}t          j        ||                   "S r1   )r   mean)rx   r   jtext_token_probss      r(   r|   z/WhisperModel.find_alignment.<locals>.<listcomp>  sA     " " "Aq (1-.." " "r*   c           	      B    g | ]\  }}}}}t          |||||           S ))r    r7   r   r   r!   )rt   )rx   r    r7   r   r   r!   s         r(   r|   z/WhisperModel.find_alignment.<locals>.<listcomp>  sR        >feS+ !%#$/    r*   )r   ri   alignr  r~   r  r  r   r  split_to_word_tokensr  r   padcumsumdiffastyper[   r=  )r'   ro   r  r   r  r  r   return_listr   
text_tokenr  text_indicestime_indicesr;   word_tokensword_boundariesjumps
jump_timesstart_times	end_timesword_probabilitiesr  s                        @r(   r  zWhisperModel.find_alignment  s    {q  I*""" 3 # 
 
 "%g{";"; 0	 0	FJ%6*J8$D$D$D$D$DEEL8$D$D$D$D$DEEL!*!?!?im_," "E; ;1$$ ""2&&& f	<<;ss+;<<<==v O ?##q((""2&&&F27<00&!LLLSS E &e,t/EEJ$_SbS%9:K"?122#67I" " " " 4oabb6IJJ" " "
   BE{KDVB B      r*   c           
         ||
J d            |g|r9t          ||          }t          ||          \  }}	t          j        |d          }|d|| j        j        z           }|                     |          }|dd|| j        j        z  f         }i t          d|j        d         | j        j                  D ]}
| 	                    t          |d|
|
| j        j        z   f                             }| j                            |          d         }d |D             }|d         \  }}||k    r nS                    |g                               |           t          fd	          }t          |                   }|||fS )
a  
        Use Whisper to detect the language of the input audio or features.

        Arguments:
            audio: Input audio signal, must be a 1D float array sampled at 16khz.
            features: Input Mel spectrogram features, must be a float array with
                shape (n_mels, n_frames), if `audio` is provided, the features will be ignored.
                Either `audio` or `features` must be provided.
            vad_filter: Enable the voice activity detection (VAD) to filter out parts of the audio
                without speech. This step is using the Silero VAD model.
            vad_parameters: Dictionary of Silero VAD parameters or VadOptions class (see available
                parameters and default values in the class `VadOptions`).
            language_detection_threshold: If the maximum probability of the language tokens is
                higher than this value, the language is detected.
            language_detection_segments: Number of segments to consider for the language detection.

        Returns:
            language: Detected language.
            language_probability: Probability of the detected language.
            all_language_probs: List of tuples with all language names and probabilities.
        Nz.Either `audio` or `features` must be provided.r   r   .r   c                 ,    g | ]\  }}|d d         |fS )r#   rf  r1   )rx   rl  probs      r(   r|   z0WhisperModel.detect_language.<locals>.<listcomp>"  s)    !S!S!S-5$52;"5!S!S!Sr*   c                 .    t          |                    S r  r  )langdetected_language_infos    r(   r  z.WhisperModel.detect_language.<locals>.<lambda>-  s    %;D%A!B!B r*   r  )r   r   r   r   r   	n_samplesr  r   r   r   r   ri   r   
setdefaultr   r  )r'   r   r   r   r   r   r   rb  r  r   r   r   r   rb   r^   r_   r  s                   @r(   r   zWhisperModel.detect_language  s   > !5!5; "6!55  = 5e^ L L0>um0T0T-o|!<<<P-0F0PPPE --e44HU.1G1UUUU
 "$q(.,d.D.RSS 	I 	IA![[HS!a$2H2V.V*V%VWXX N j00@@CG "T!S7!S!S!S-?-B*H*#&BBB"--h;;BBCWXXXX &BBBB  H $''=h'G#H#H -/AAAr*   )
r  r   r  r   r   NFNNNr  )FNN)r  )NNFNr   r   )&r+   r,   r-   r0   r   r>   r	   r
   r[   rt   rk   propertyrE  r8  r   r   r   r.   r   r   r   r3   r]   r   r   r   r@   r/  StorageViewra  r   r0  WhisperGenerationResultr  r   r   r  r   r1   r*   r(   r  r  l  s        ./%'+!&"&59e ee e CcN+	e
 e e e  }e e e 3-e !sDy!12e e e eN OT#Y O O O XO $    * #' " !$%$%E
 E
 E
 8;.2/2+/-0>B $#02t#('* %$2#E" <@(,&*36;?"&8;+,WS SS(BJ./S 3-S 	S
 S S S S S "S "S 5$u+uUCZ/@@AS( &.e_)S* %UO+S, &e_-S. %)/S0 &+1S2 !sHSM'9!:;3S4 5S6 7S8 "$s),9S: !;S<  %=S> ?S@ "ASB !CSD ESF GSH !tZ'7!89ISJ !KSL smMSN sDK/0OSP *2%QSR 3-SST '/uoUSV &)WSX 
x "33	4YS S S SjM?M? S	M? 	M?
 M?  M? M? 
d3iM? M? M? M?j =A^ ^*^ ^ &	^ !!89^ 
'	^ ^ ^ ^@		:rz 	:k.E 	: 	: 	: 	:@#/@ S	@ 	@
 &@ 
{!95%N	O@ @ @ @L $) $"&! !! c! !	!
 ! 3-! 
c! ! ! !FA%t*A% A% $/	A%
 A% "A% !A%  %A% 
A% A% A% A%R $%D DD #YD $/	D
 D !D 
dD D D DP '+)- 26+,.1IB IB
#IB 2:&IB 	IB
 dJ./IB &)IB ',IB 
sE4c5j 122	3IB IB IB IB IB IBr*   r  r   rb  r   r   c              #   6  K   t          ||          }| D ]}|j        rg }|j        D ]~}|j        |j        z   dz  }|                    |          }|                    |j        |          |_        |                    |j        |          |_        |                    |           |d         j        |_        |d         j        |_        ||_        n@|                    |j                  |_        |                    |j        d          |_        |V  d S )Nr#   r   r   T)is_end)r   r;   r   r   get_chunk_indexget_original_timer   )	r   rb  r   ts_mapr   r;   r    middlechunk_indexs	            r(   r  r  4  s!     
 !>>F  = 	ME # #*tx/14$44V<<#55dj+NN
!33DHkJJT""""!!HNGM)-GK!GMM #44W]CCGM 227;t2LLGK' r*   r   c                 l    t          j        |           } t          j                            |           } | S r  )r   ascontiguousarrayr/  r  
from_array)r   s    r(   r  r  Q  s-    "7++G%0099GNr*   r6   c                     |                      d          }t          |          t          t          j        |                    z  S )NrH  )r   r   zlibcompress)r6   
text_bytess     r(   rv   rv   W  s6    W%%Jz??Sz!:!:;;;;r*   ro   rP   c                    d|v r'd |D             }|                     | j                   n7|t          |          dk    rg }nt          |t                    s
J d            |                     | j        | j        | j        | j        | j	        | j
        g           t          t          t          |                              S )Nr   c                     g | ]
}|d k    |S r   r1   r  s     r(   r|   z)get_suppressed_tokens.<locals>.<listcomp>a  s    @@@a1r*   r   zsuppress_tokens must be a list)r  non_speech_tokensr   r   r   r   	translatesotr  sot_lm	no_speechr  sortedset)ro   rP   s     r(   r  r  \  s     
_@@o@@@y:;;;;		 C$8$8A$=$=/400RR2RRR0 M	
	 	 	 O,,--...r*   r  	prependedappendedc                    t          |           dz
  }t          |           dz
  }|dk    r| |         }| |         }|d                             d          rO|d                                         |v r3|d         |d         z   |d<   |d         |d         z   |d<   d|d<   g |d<   n|}|dz  }|dk    d}d}|t          |           k     r| |         }| |         }|d                             d          s=|d         |v r3|d         |d         z   |d<   |d         |d         z   |d<   d|d<   g |d<   n|}|dz  }|t          |           k     d S d S )Nr#   r   r   r    r}  r7   r+  )r   
startswithr  endswith)r  r/  r0  r   r  previous	followings          r(   r  r  v  s   IAIA
q&&Q<aL	F&&s++ 	0@0F0F0H0HI0U0U ( 09V3D DIf"*8"4y7J"JIh!HV!#HXA	Q q&& 	
A	A
c)nn

Q<aL	((-- 	)F2Cx2O2O'/)F2CCHV!)(!3i6I!IHX "If"$IhA	Q c)nn





r*   )=r  rL  r]  r,  r#  dataclassesr   r   inspectr   mathr   typingr   r   r	   r
   r   r   warningsr   r/  numpyr   r3  r   faster_whisper.audior   r    faster_whisper.feature_extractorr   faster_whisper.tokenizerr   r   faster_whisper.utilsr   r   r   r   faster_whisper.vadr   r   r   r   r   r3   r@   r]   rf   r  rt   r>   r  r   r  r  r0   r.   rv   r  r  r1   r*   r(   <module>rA     s         				  ) ) ) ) ) ) ) )             C C C C C C C C C C C C C C C C                         : : : : : : : : = = = = = = ? ? ? ? ? ? ? ? V V V V V V V V V V V V                           ,        :        z) z) z) z) z) z) z) z)zEB EB EB EB EB EB EB EBP&w:  g	   :RZ K4K    < < < < < <
//3Z/ d3i/ / / /4$t*   PT      r*   