
    n ii                    
   d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZ		 d dl
Z
d dlZd dlZn# e$ r Y nw xY wd dlmZ d dlmZ d dlmZmZmZmZmZmZmZ ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        d	Z ej!        j"        ej!        j#        ej!        j$        ej!        j#        dZ%ej&        j'        ej&        j(        dZ)i Z*d	 Z+ G d
 de          Z, G d de j-                  Z. e+d           G d de.                      Z/ e+d           G d de/                      Z0 e+d           G d de/                      Z1 e+d           G d de/                      Z2 e+d           G d de/                      Z3 e+d           G d de/                      Z4 e+d            G d! d"e.                      Z5 e+d#           G d$ d%e.                      Z6 e+d&           G d' d(e.                      Z7 e+d)           G d* d+e.                      Z8 e+d,           G d- d.e.                      Z9 e+d/           G d0 d1e/                      Z: e+d2           G d3 d4e/                      Z; e+d5           G d6 d7e/                      Z< e+d8           G d9 d:e.                      Z= e+d;           G d< d=e=                      Z> e+d>           G d? d@e.                      Z? e+dA           G dB dCe.                      Z@ e+dD           G dE dFe.                      ZA e+dG           G dH dIe.                      ZB e+dJ           G dK dLe.                      ZC e+dM           G dN dOe.                      ZD e+dP           G dQ dRe.                      ZE e+dS           G dT dUe.                      ZF e+dV           G dW dXe.                      ZG e+dY           G dZ d[e.                      ZH e+d\           G d] d^e.                      ZI e+d_           G d` daeI                      ZJ e+db           G dc dde.                      ZK e+de           G df dge.                      ZL e+dh           G di dje.                      ZM e+dk           G dl dme.                      ZN e+dn           G do dpe.                      ZOdq ZPeQdrk    r
 eP             g dsg dtg dug dvg dwg dxg dyg dzg d{g d|g d}d~ZRdS )    N)ListOptional)utils)	Converter)attention_speccommon_spec
model_spectransformer_specwav2vec2_specwav2vec2bert_specwhisper_spec)	gelu	gelu_fastgelu_newgelu_pythongelu_pytorch_tanh
quick_gelurelusiluswish)linearsullama3longrope)gemmgemvc                       fd}|S )z5Registers a model loader for this configuration name.c                 ,     |             t           <   | S N)_MODEL_LOADERS)clsconfig_names    c/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/ctranslate2/converters/transformers.py	decoratorz"register_loader.<locals>.decorator<   s    &)cee{#
     )r"   r$   s   ` r#   register_loaderr'   9   s$         r%   c                       e Zd ZdZ	 	 	 	 	 	 ddedee         deee                  dedee         d	ed
efdZd Z	d Z
d Zd ZdS )TransformersConverterz/Converts models from Hugging Face Transformers.NFmodel_name_or_pathactivation_scales
copy_filesload_as_float16revisionlow_cpu_mem_usagetrust_remote_codec                 h    || _         || _        || _        || _        || _        || _        || _        dS )a  Initializes the converter.

        Arguments:
          model_name_or_path: Name of the pretrained model to download, or path to the
            directory containing the pretrained model.
          activation_scales: Path to the pre-computed activation scales. Models may
            use them to rescale some weights to smooth the intermediate activations
            and improve the quantization accuracy. See
            https://github.com/mit-han-lab/smoothquant.
          copy_files: List of filenames to copy from the Hugging Face model to the
            converted model directory.
          load_as_float16: Load the model weights as float16. More precisely, the model
            will be loaded with ``from_pretrained(..., torch_dtype=torch.float16)``.
          revision: Revision of the model to download from the Hugging Face Hub.
          low_cpu_mem_usage: Enable the flag ``low_cpu_mem_usage`` when loading the model
            with ``from_pretrained``.
          trust_remote_code: Allow converting models using custom code.
        N)_model_name_or_path_activation_scales_copy_files_load_as_float16	_revision_low_cpu_mem_usage_trust_remote_code)selfr*   r+   r,   r-   r.   r/   r0   s           r#   __init__zTransformersConverter.__init__F   sA    8 $6 "3% /!"3"3r%   c                    t          j                    5  t          j                            | j        | j                  }|j        j        }t          
                    |          }|Mt          d|dd                    t          t                                                              d          t          t          |j                  }t          j        }d| j        rt           j        nt          |dd           i}| j        r
| j        |d<   | j        r
| j        |d<   | j        r
| j        |d	<    | j        || j        fi |}i }| j        r
| j        |d	<    | j        || j        fi |}	 |||	          }
| j        r1t          j        | j        d
          }|                    |
|           | j        r2| j        D ]*}|
                    |                     |                     +|
cd d d            S # 1 swxY w Y   d S )N)r0   z8No conversion is registered for the model configuration z  (supported configurations are: , )torch_dtyper.   r/   r0   cpu)map_location)torchno_gradtransformers
AutoConfigfrom_pretrainedr2   r8   	__class____name__r    get
ValueErrorjoinsortedkeysgetattrarchitecture_nameAutoTokenizerr5   float16r6   r7   
load_modelload_tokenizerr3   loadsmooth_activationr4   register_fileget_model_file)r9   configr"   loadermodel_classtokenizer_classkwargsmodeltokenizer_kwargs	tokenizerspecr+   filenames                r#   _loadzTransformersConverter._loadj   s   ]__ 7	 7	!,<<(D<S =  F !*3K#''44F~ j #{{DIIf^5H5H5J5J.K.K$L$L$L$LN   ",0HIIK*8O ,>EMM ==	F ~ 4%)^z"& F.2.E*+& F.2.E*+#DOK1ITTVTTE!& P8<8O !45++!9 =M I 6%++D& B$)J+%% % %! ((/@AAA F $ 0 F FH&&t':':8'D'DEEEEo7	 7	 7	 7	 7	 7	 7	 7	 7	 7	 7	 7	 7	 7	 7	 7	 7	 7	s   GG::G>G>c                      |j         |fi |S r   rE   )r9   rY   r*   r[   s       r#   rQ   z TransformersConverter.load_model   s    *{*+=HHHHHr%   c                      |j         |fi |S r   rc   )r9   rZ   r*   r[   s       r#   rR   z$TransformersConverter.load_tokenizer   s    ../ALLVLLLr%   c                    t           j                            | j                  r&t           j                            | j        |          }n9	 t          j        | j        |          }n# t
          j        j        $ r d }Y nw xY w|t           j        	                    |          st          d|d| j                  |S )N)repo_idr`   zFile z does not exist in model )ospathisdirr2   rJ   huggingface_hubhf_hub_downloadr   EntryNotFoundErrorisfilerI   )r9   r`   rh   s      r#   rV   z$TransformersConverter.get_model_file   s    7==122 	7<< 8(CCDD&6 4x   #(;    <rw~~d33<*88T557  
 s   A( (B B)NNFNFF)rG   
__module____qualname____doc__strr   r   boolr:   ra   rQ   rR   rV   r&   r%   r#   r)   r)   C   s        99
 ,0*. %"&"'"'"4 "4"4 $C="4 T#Y'	"4
 "4 3-"4  "4  "4 "4 "4 "4H8 8 8tI I IM M M    r%   r)   c                       e Zd ZdZed             Zej        d             Zd Z	d Z
d Zd Zd Zej        j        fd	Zd
 Zd Zd ZdS )ModelLoaderzRBase class for loading Transformers models into a CTranslate2 model specification.c                     d S r   r&   r9   s    r#   rN   zModelLoader.architecture_name   s    tr%   c                     t                      r   NotImplementedErrorr9   r\   s     r#   get_model_speczModelLoader.get_model_spec   s    !###r%   c                     |                      |          }|                     |j        ||           |                     ||          }|                     ||           |S r   )r{   
set_configrW   get_vocabularyset_vocabulary)r9   r\   r^   r_   tokenss        r#   __call__zModelLoader.__call__   s]    ""5))UI666$$UI66D&)))r%   c                     d t          |                                                                d           D             S )Nc                     g | ]\  }}|S r&   r&   ).0token_s      r#   
<listcomp>z.ModelLoader.get_vocabulary.<locals>.<listcomp>   s,     
 
 
q 
 
 
r%   c                     | d         S N   r&   )items    r#   <lambda>z,ModelLoader.get_vocabulary.<locals>.<lambda>   s
    Q r%   )key)rK   	get_vocabitemsr9   r\   r^   s      r#   r~   zModelLoader.get_vocabulary   sS    
 
"##%%++--3G3G  
 
 
 	
r%   c                     d S r   r&   r9   r_   r   s      r#   r   zModelLoader.set_vocabulary       r%   c                     d S r   r&   r9   rW   r\   r^   s       r#   r}   zModelLoader.set_config   r   r%   c                 6    |j         |_        |j        |_        d S r   weightgammabiasbetar9   r_   modules      r#   set_layer_normzModelLoader.set_layer_norm   s    ]
K			r%   c                 0   |t           j        j        k    r|j        |_        n$|j        |_        |j        |_        |j        |_        t          |t          j                  r |j                            dd          |_        |j        |j        |_        d S d S Nr   r   )r   QuantizationCT2r   qweightscalesweight_scaleqzerosweight_zero
isinstancerC   Conv1D	transposer   )r9   r_   r   
quant_types       r#   
set_linearzModelLoader.set_linear   s    1555 -DKK .DK &D%}Dfl122 	6+//155DK;"DIII #"r%   c                     |j         |_         d S r   )r   r   s      r#   set_embeddingszModelLoader.set_embeddings   s    mr%   c                 x    |j         |_        t          |dd          }|dk    r|j        |d          |_        d S d S )Noffsetr   r   	encodingsrM   r9   r_   r   r   s       r#   set_position_encodingsz"ModelLoader.set_position_encodings   sB    1--A::!^FGG4DNNN :r%   c                      t          d          )Nz7No activation smoothing logic is defined for this modelrx   )r9   r_   r+   s      r#   rT   zModelLoader.smooth_activation   s    !E
 
 	
r%   N)rG   rn   ro   rp   propertyrN   abcabstractmethodr{   r   r~   r   r}   r   r   r   r   r   r   r   rT   r&   r%   r#   rt   rt      s        \\  X 	$ $ $  
 
 
          3>2J2N $ $ $ $$ $ $5 5 5
 
 
 
 
r%   rt   
BartConfigc                   `     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
dd	Zd
 Z xZS )
BartLoaderc                     dS )NBartForConditionalGenerationr&   rv   s    r#   rN   zBartLoader.architecture_name  s    --r%   c                    t           j                            |j        j        |j        j        f|j        j        |j        j        t          |j        j	                 t          |j        dd                    }|                     |j        |j        j                   |                     |j        |j        j                   |                     |j        j        |j                   t          |dd           }|M|                                                                dk    r#|                                |j        j        _        |S )Nnormalize_embeddingTpre_norm
activationlayernorm_embeddingfinal_logits_biasr   )r
   TransformerSpecfrom_configrW   encoder_layersdecoder_layersencoder_attention_headsnormalize_before_SUPPORTED_ACTIVATIONSactivation_functionrM   set_encoderencoderr\   set_decoderdecoderr   
projectionlm_headnonzeronumelsqueezer   )r9   r\   r_   r   s       r#   r{   zBartLoader.get_model_spec  s   /;;\(%,*EFL0\2-el.NO '6KT R R < 
 
 	u{':;;;u{':;;;/???#E+>EE(->-F-F-H-H-N-N-P-PTU-U-U+<+D+D+F+FDL#(r%   c                     t                                          ||          }|j        j        t	          |          k     r|d |j        j                 }|S r   )superr~   rW   
vocab_sizelenr9   r\   r^   r   rF   s       r#   r~   zBartLoader.get_vocabulary  sL    ''y99<"S[[005el556Fr%   c                 Z    |                     |           |                    |           d S r   register_source_vocabularyregister_target_vocabularyr   s      r#   r   zBartLoader.set_vocabulary  0    ''///''/////r%   c                     |j         |_         |j        |_        |j        |_        |                    |j        j                  |_        d S r   )	bos_token	eos_token	unk_tokenconvert_ids_to_tokensrW   decoder_start_token_iddecoder_start_tokenr   s       r#   r}   zBartLoader.set_config#  sI    $.$.$.%.%D%DL/&
 &
"""r%   c                    |                      ||           t          |j        |j                  D ]\  }}|                     |j        |j        d           |                     |j        j        |j	                   | 
                    |j        j        |j                   | 
                    |j        j        |j                   |                     |j        j        |j                   d S NTself_attention)set_common_layersziplayerlayersset_attentionr   	self_attnr   
layer_normself_attn_layer_normr   ffnlinear_0fc1linear_1fc2final_layer_norm)r9   r_   r   
layer_specr   s        r#   r   zBartLoader.set_encoder+  s    tW---!$TZ!@!@ 	S 	SJ)#    
 )4*  
 OOJN3UY???OOJN3UY???
 95;QRRRR	S 	Sr%   c                    |                      ||           t          |j        |j                  D ]\  }}|                     |j        |j        d           |                     |j        j        |j	                   t          |d          rG|                     |j        |j        d           |                     |j        j        |j                   |                     |j        j        |j                   |                     |j        j        |j                   |                     |j        j        |j                   d S )NTr   encoder_attnF)r   r   r   r   r   r   r   r   r   r   hasattr	attentionr   encoder_attn_layer_normr   r   r   r   r   r   r   )r9   r_   r   r   r   s        r#   r   zBartLoader.set_decoder=  s^   tW---!$TZ!@!@ 	S 	SJ)#    
 )4*  
 un-- 	""(&#( #   
 ##(31  
 OOJN3UY???OOJN3UY???
 95;QRRRR1	S 	Sr%   Fc                 0   d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   |r!t          j        |j        d         |           nPt          j        |j        d         |d d                    t          j        |j        d         |dd                     |                     |j        d         |j                   d S )Nc                 4    g | ]}t          j                    S r&   r   
LinearSpecr   r   s     r#   r   z,BartLoader.set_attention.<locals>.<listcomp>[  !    CCCQ.00CCCr%      r   r      )	ranger   q_projk_projv_projr   fuse_linearr   out_projr9   r_   r   r   split_layerss        r#   r   zBartLoader.set_attentionZ  s    CC%((CCCQ)9:::Q)9:::Q)9::: 	@dk!nl;;;;dk!nl2A2.>???dk!nl122.>???B);<<<<<r%   c                 8   dd l }t          |d          s.|j        j        r|                    |j        j                  nd}n|j        }||_        |                     |j	        |j
                   |                     t          |j        t                    r|j        d         n|j        |j                   t          |d          r |                     |j        |j                   t          |d          r"|                     |j        |j                   d S d S )Nr   embed_scale      ?r   r   )mathr   rW   scale_embeddingsqrtd_modelr  scale_embeddingsr   position_encodingsembed_positionsr   r   
embeddingslistembed_tokensr   r   r   )r9   r_   r   r  r  s        r#   r   zBartLoader.set_common_layersh  s1   v}-- 	- =0		&-/000 K !,K +##D$;V=STTT dot44%""_	
 	
 	
 6<(( 	D1BCCC6011 	V 8&:TUUUUU	V 	Vr%   F)rG   rn   ro   r   rN   r{   r~   r   r}   r   r   r   r   __classcell__rF   s   @r#   r   r      s        . . X.  &    0 0 0
 
 
S S S$S S S:= = = =V V V V V V Vr%   r   MarianConfigc                   T     e Zd Zed             Z fdZd Z fdZ fdZd Z	 xZ
S )MarianMTLoaderc                     dS )NMarianMTModelr&   rv   s    r#   rN   z MarianMTLoader.architecture_name  s    r%   c                     d|j         _        d|j         _        t                                          |          }|                     |           |S NF)rW   r   r   r   r{   _remove_pad_weights)r9   r\   r_   rF   s      r#   r{   zMarianMTLoader.get_model_spec  sG    (-%+0(ww%%e,,  &&&r%   c                 N    |j         |_         |j        |_        |j         |_        d S r   )r   r   r   r   s       r#   r}   zMarianMTLoader.set_config  s+    $.$. &/%8"""r%   c                 Z    d|_         t                                          ||           d S NT)start_from_zero_embeddingr   r   r9   r_   r   rF   s      r#   r   zMarianMTLoader.set_decoder  s+    )-&D'*****r%   c                     t                                          ||          }|d         dk    r|                                 |S )Nr  z<pad>)r   r~   popr   s       r#   r~   zMarianMTLoader.get_vocabulary  s?     ''y99":  JJLLLr%   c                    |j         j        d         |j        j        |j        j        g}|d         j        j        d         dz
  }|D ]}|j        j        d         |dz   k    r|j        d d         |_        t          |t          j                  rA|	                                r-|j
        j        d         |dz   k    r|j
        d d         |_
        d S )Nr   r   r  )r   r  r   r   r   shaper   r   r  has_biasr   )r9   r_   vocab_specsnew_vocab_size
vocab_specs        r#   r'  z"MarianMTLoader._remove_pad_weights  s    L#A&L#L#
 %Q.4Q7!;% 	7 	7J &q)^a-???$.$5crc$:
!:{'=>>7''))7 O)!,0BBB",/#2#"6
	7 	7r%   )rG   rn   ro   r   rN   r{   r}   r   r~   r'  r  r  s   @r#   r"  r"    s          X    9 9 9+ + + + +    7 7 7 7 7 7 7r%   r"  M2M100Configc                   D     e Zd Zed             Z fdZd Z fdZ xZS )M2M100Loaderc                     dS )NM2M100ForConditionalGenerationr&   rv   s    r#   rN   zM2M100Loader.architecture_name  s    //r%   c                 v    d|j         _        d|j         _        t                                          |          S )NTF)rW   r   r   r   r{   )r9   r\   rF   s     r#   r{   zM2M100Loader.get_model_spec  s/    (,%+0(ww%%e,,,r%   c                 8    |j         |j        d          |_        d S r   )weightsr   r   r   s      r#   r   z#M2M100Loader.set_position_encodings  s    8r%   c                    t                                          ||          }|d         |j        k    r-|                    |j        |                                           |j        D ]}||vr|                    |           t          |d|j	        j
        t          |          z
            }|dk    r|d t          |          D             z  }|S )Nr  num_madeup_wordsr   c                     g | ]}d |z  S )zmadeupword%dr&   r   is     r#   r   z/M2M100Loader.get_vocabulary.<locals>.<listcomp>  s    KKKa~)KKKr%   )r   r~   r   insertunk_token_idr.  additional_special_tokensappendrM   rW   r   r   r  )r9   r\   r^   r   r   r>  rF   s         r#   r~   zM2M100Loader.get_vocabulary  s    ''y99 ":,,,MM)0&**,,???8 	% 	%EF""e$$$")5<+BS[[+P
 
 aKK59I3J3JKKKKFr%   )	rG   rn   ro   r   rN   r{   r   r~   r  r  s   @r#   r7  r7    sy        0 0 X0- - - - -
9 9 9        r%   r7  MBartConfigc                   *    e Zd Zed             Zd ZdS )MBartLoaderc                     dS )NMBartForConditionalGenerationr&   rv   s    r#   rN   zMBartLoader.architecture_name  s    ..r%   c                     |j         |_         |j        |_        |j        |_        |j        j        dv r	d |_        d S |j        |_        d S )N)MBartTokenizerN)r   r   r   rW   rZ   r   r   s       r#   r}   zMBartLoader.set_config  sU    $.$.$. <'+CCC)-F&&&)2)<F&&&r%   NrG   rn   ro   r   rN   r}   r&   r%   r#   rH  rH    s<        / / X/	= 	= 	= 	= 	=r%   rH  PegasusConfigc                   *    e Zd Zed             Zd ZdS )PegasusLoaderc                     dS )NPegasusForConditionalGenerationr&   rv   s    r#   rN   zPegasusLoader.architecture_name      00r%   c                 f    |j         |_        |j        |_        |j        |_        |j         |_        d S r   )	pad_tokenr   r   r   r   r   s       r#   r}   zPegasusLoader.set_config  s4    $.$.$.%.%8"""r%   NrM  r&   r%   r#   rP  rP    s<        1 1 X19 9 9 9 9r%   rP  	OPTConfigc                   \     e Zd Zed             Zd Zd Zd Zd Z fdZ	d Z
 fdZ xZS )		OPTLoaderc                     dS )NOPTForCausalLMr&   rv   s    r#   rN   zOPTLoader.architecture_name      r%   c                 x   t           j                            |j        j        |j        j        |j        j        t          |j        j                 |j        j	        |j        j
        k              }|                     |j        |j        j                   |                     |j        j        |j                   |S )N)r   r   project_in_out)r
   TransformerDecoderModelSpecr   rW   num_hidden_layersnum_attention_headsdo_layer_norm_beforer   r   word_embed_proj_dimhidden_sizer   r   r\   r   r   r   r9   r\   r_   s      r#   r{   zOPTLoader.get_model_spec  s    ;GGL*L,\6-el.NO <;u|?WW H 
 
 	u{':;;;/???r%   c                 &   t          |j        j                  D ]v\  }}d|z  }t          j        |j        j        |j        j        d         |d|z                      t          j        |j        j        |j        j	        |d|z                      wd S )Nzmodel.decoder.layers.%dr   z%s.self_attn.q_projz%s.fc1)
	enumerater   r   r   rT   r   r   r   r   r   )r9   r_   r+   rA  r   layer_scopes         r#   rT   zOPTLoader.smooth_activation  s    !$,"455 	 	HAu3a7K#$/$+A.!"7+"EF   #	$	"!(["89   	 	r%   c                 0    |                     |           d S r   register_vocabularyr   s      r#   r   zOPTLoader.set_vocabulary         (((((r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r   r   r   r   s       r#   r}   zOPTLoader.set_config#  )    $.$.$.r%   c                 :   t                                          ||           |j         |                     |j        |j                   |j         |                     |j        |j                   |j        "|                     |j        |j                   d S d S r   )r   r   
project_inr   project_outr   r   r   r,  s      r#   r   zOPTLoader.set_decoder(  s    D'***)OODOW-?@@@*OOD,g.ABBB#/1IJJJJJ 0/r%   c                     d|_         |                     |j        |j                   |                     |j        |j                   d S r&  )r  r   r  r  r   r  r  r   s      r#   r   zOPTLoader.set_common_layers2  sG     %##D$;V=STTTDOV-@AAAAAr%   c                    t                                          ||          }d}t          |          dz  dk    rId                    |          }||vr|                    |           |dz  }t          |          dz  dk    I|S )Nr      zmadeupword{:04d}r   )r   r~   r   formatrE  )r9   r\   r^   r   rA  symbolrF   s         r#   r~   zOPTLoader.get_vocabulary7  s    ''y99&kkAo""'..q11FV##f%%%FA	 &kkAo"" r%   )rG   rn   ro   r   rN   r{   rT   r   r}   r   r   r~   r  r  s   @r#   rX  rX    s            X      ) ) )/ / /
K K K K KB B B

 
 
 
 
 
 
 
 
r%   rX  GPTBigCodeConfigc                   L     e Zd Zed             Zd Zd Z fdZd Zd Z	 xZ
S )GPTBigCodeMHALoaderc                     dS )NGPTBigCodeForCausalLMr&   rv   s    r#   rN   z%GPTBigCodeMHALoader.architecture_nameF  s    &&r%   c                 *   t           j                            |j        j        |j        j        dt          |j        j                 d          }|                     |j	        |j
                   |                     |j	        j        |j                   |S )NT)r   r   multi_query_attentionr
   r^  r   rW   n_layern_headr   r   r   r   transformerr   r   r   rd  s      r#   r{   z"GPTBigCodeMHALoader.get_model_specJ  s    ;GGL L-el.NO"& H 
 
 	u'8999/???r%   c                 0    |                     |           d S r   ri  r   s      r#   r   z"GPTBigCodeMHALoader.set_vocabularyW  rk  r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S Nz<extra_id_%d>r   r~   rW   r   r   r  rE  r9   r\   r^   r   	extra_idsrA  rF   s         r#   r~   z"GPTBigCodeMHALoader.get_vocabularyZ  g    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zGPTBigCodeMHALoader.set_configc  rn  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j	                   t          |j        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S NFr   r   r  r   r  wter   r  wper   r   ln_fr   r   hr   ln_1r   r   attnc_attnc_projr   ln_2r   mlpc_fcr   r9   r_   r   r   r   s        r#   r   zGPTBigCodeMHALoader.set_decoderh  J    %DOVZ888##D$;VZHHHDOV[999!$TZ!:!: 	G 	GJ
 9 DejQQQOOJ5<Q?ARSSSOOJ5<Q?ARSSS
 95:FFFOOJN3UY^DDDOOJN3UY5EFFFF	G 	Gr%   )rG   rn   ro   r   rN   r{   r   r~   r}   r   r  r  s   @r#   ry  ry  D  s        ' ' X'  ) ) )    / / /
G G G G G G Gr%   ry  
GPT2Configc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )
GPT2Loaderc                     dS )NGPT2LMHeadModelr&   rv   s    r#   rN   zGPT2Loader.architecture_namey        r%   c                 (   t           j                            |j        j        |j        j        dt          |j        j                           }|                     |j	        |j
                   |                     |j	        j        |j                   |S )NT)r   r   r~  rd  s      r#   r{   zGPT2Loader.get_model_spec}  s}    ;GGL L-el.NO	 H 
 
 	u'8999/???r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zGPT2Loader.set_vocabulary  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zGPT2Loader.set_config  rn  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j	                   t          |j        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  r  r  s        r#   r   zGPT2Loader.set_decoder  r  r%   N	rG   rn   ro   r   rN   r{   r   r}   r   r&   r%   r#   r  r  w  sn        ! ! X!
 
 
) ) )/ / /
G G G G Gr%   r  
GPTJConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )
GPTJLoaderc                     dS )NGPTJForCausalLMr&   rv   s    r#   rN   zGPTJLoader.architecture_name  r  r%   c           
      p   t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j        |j        j        |j        j                   |                     |j
        j        |j                   |S NTFr   r   
rotary_dimrotary_interleaveparallel_residualshared_layer_norm)r
   r^  r   rW   r  r  r   r   r  r   r   r  r   r   r   rd  s      r#   r{   zGPTJLoader.get_model_spec  s    ;GGL L-el.NO|.#"" H 	
 	
 	LL#L		
 	
 	
 	/???r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zGPTJLoader.set_vocabulary  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zGPTJLoader.set_config  rn  r%   c                 6   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]5\  }}|                     |j
        |j                   |j        j        j        }|j        j        j        }|j        j        j        }	t#          j        |||          }t#          j        |||          }t'          j        |||	f          |j        j        d         _        |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   7d S r  )r  r   r  r  r   r   r  r   r   r  r  r  r  r	  r   r
  r  r   permute_for_sliced_rotaryrA   catr   r   r   r  r   r   r  fc_inr   fc_out)
r9   r_   r   r  	num_headsr   r   qwkwvws
             r#   r   zGPTJLoader.set_decoder  s]    %DOVZ888DOV[999!$TZ!:!: 	G 	GJ
 <ejIII")B")B")B0Y
KKB0Y
KKB9>BB<9P9PJ%,Q/6OOJ5<Q?ATUUUOOJN3UY_EEEOOJN3UY5EFFFF	G 	Gr%   Nr  r&   r%   r#   r  r    sn        ! ! X!  *) ) )/ / /
G G G G Gr%   r  CodeGenConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )CodeGenLoaderc                     dS )NCodeGenForCausalLMr&   rv   s    r#   rN   zCodeGenLoader.architecture_name      ##r%   c           
         t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }d}t          |j        d          r|j        j
        dv rd}|                     |j        |j        |j        j        |j        j        |j        j        |           |                     |j        j        |j                   |S )	NTFr     head_dim)      rt  )mp_num)r
   r^  r   rW   r  r  r   r   r  r   r  r   r   r  n_embdr   r   r   )r9   r\   r_   r  s       r#   r{   zCodeGenLoader.get_model_spec  s    ;GGL L-el.NO|.#"" H 	
 	
 5<,, 	1F*1T1T FLL#LL 	 	
 	
 	
 	/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zCodeGenLoader.get_vocabulary  sg    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zCodeGenLoader.set_vocabulary  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zCodeGenLoader.set_config
  rn  r%   c                 6   d|_         |                     |j        |j                   |                     |j        |j                   t          j        d|dz            	                    dd          j
                                                                        }||z  t          j        fd|D                       }t          |j        |j                  D ]:\  }	}
|                     |	j        |
j                   |
j        j        j        }||d d f         }|                    dd          \  }}}t1          j        |||          }t1          j        |||          }t          j        |||f          |	j        j        d         _        |                     |	j        j        d         |
j        j                   |                     |	j        j        |
j         j!                   |                     |	j        j"        |
j         j#                   <d S )NFr   r  r  c                 L    g | ] }t          j        |z  |d z   z            !S )r   )rA   arange)r   rA  	local_dims     r#   r   z-CodeGenLoader.set_decoder.<locals>.<listcomp>  s2    XXX!U\!i-!a%9)<==XXXr%   dimr   )$r  r   r  r  r   r   r  npr  reshapeTflattentolistrA   r  r   r   r  r  r  r  qkv_projr   chunkr   r  r   r   r   r  r   r   r  r  r   r  )r9   r_   r   r  r  	embed_dimr  base_permutationpermutationr   r   r  new_qkv_projr  r  r  r  s                   @r#   r   zCodeGenLoader.set_decoder  s    %DOVZ888DOV[9999Q
33;;BBBDLLNNUUWW'	iXXXXGWXXX
 
 "%TZ!:!: 	G 	GJ
 <ejIII z*1H $KN3L%++A1+55JBB 0Y
KKB0Y
KKB9>BB<9P9PJ%,Q/6OOJ5<Q?ATUUUOOJN3UY_EEEOOJN3UY5EFFFF-	G 	Gr%   rG   rn   ro   r   rN   r{   r~   r   r}   r   r  r  s   @r#   r  r    s        $ $ X$  <    ) ) )/ / /
!G !G !G !G !G !G !Gr%   r  GPTNeoXConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )GPTNeoXLoaderc                     dS )NGPTNeoXForCausalLMr&   rv   s    r#   rN   zGPTNeoXLoader.architecture_name5  r  r%   c                    t           j                            |j        j        |j        j        dt          |j        j                 t          |j        j	        |j        j
        |j        j        z  z            d|j        j        d          }|                     |j        |j        |j        j                   |                     |j        j        |j                   |S r  )r
   r^  r   rW   r_  r`  r   
hidden_actint
rotary_pctrc  use_parallel_residualr   r   gpt_neoxr   r   	embed_outrd  s      r#   r{   zGPTNeoXLoader.get_model_spec9  s    ;GGL*L,-el.EF'<+u|/OOQ  $#l@# H 
 
 	u~u|7WXXX/AAAr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zGPTNeoXLoader.get_vocabularyL  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zGPTNeoXLoader.set_vocabularyU  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zGPTNeoXLoader.set_configX  rn  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]	\  }}t          |d          rA|                     |j        |j                   |                     |j        |j                   nJ|                     |j        j        |j                   |                     |j        j        |j                   |j        j        j        }|j        j        j        }|                    |dd|j        d                                       dd                              d|j        d                   }|                    |dd                              dd                              d          }||j        j        d         _        ||j        j        d         _        |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S )NFinput_layer_normr  r  r   r   ) r  r   r  embed_inr   r   r   r   r   r   r   r  input_layernormpost_attention_layer_normpost_attention_layernormr   r   r   query_key_valuer   r   r  r0  swapaxesr   r   denser   r  dense_h_to_4hr   dense_4h_to_h)r9   r_   r   r  r   r   qkv_wqkv_bs           r#   r   zGPTNeoXLoader.set_decoder]  s3    %DOV_===DOV-DEEE!$TZ!?!? 	N 	NJz#566 ##J$?AVWWW##8%:X    ##-8%:O   ##N-u/M   O3:EO38E iBB@@!QU[_-- 
 MM)Q33<<QBBJJ2NNE9>J%,Q/67<J%,Q/4OOJ5<Q?AVWWWOOJN3UY5LMMMOOJN3UY5LMMMM=	N 	Nr%   r  r  s   @r#   r  r  3  s        $ $ X$  &    ) ) )/ / /
#N #N #N #N #N #N #Nr%   r  WhisperConfigc                   l     e Zd Zed             Zd Zd Zd Z fdZd Z	 fdZ
 fdZd	 Zd
 Z xZS )WhisperLoaderc                     dS )NWhisperForConditionalGenerationr&   rv   s    r#   rN   zWhisperLoader.architecture_name  rS  r%   c                 b   t          j        |j        j        |j        j        |j        j        |j        j                  }|                     |j        |j	        j                   | 
                    |j        |j	        j                   |                     |j        j        |j                   |S r   )r   WhisperSpecrW   r   r   r   decoder_attention_headsr   r   r\   r   r   r   r   proj_outrd  s      r#   r{   zWhisperLoader.get_model_spec  s    'L'L0L'L0	
 
 	u{':;;;u{':;;;/@@@r%   c                 X    g dfdt          |j        |j                  D             S )N)z<|endoftext|>z<|startoftranscript|>z<|translate|>z<|transcribe|>z<|startoflm|>z<|startofprev|>z<|nocaptions|>z<|notimestamps|>c                 "    g | ]\  }}|v	|S r&   r&   )r   token_idr   non_lang_special_tokenss      r#   r   z>WhisperLoader._get_lang_ids_from_tokenizer.<locals>.<listcomp>  s7     
 
 
% 333 
 433r%   )r   additional_special_tokens_idsrD  )r9   r^   r  s     @r#   _get_lang_ids_from_tokenizerz*WhisperLoader._get_lang_ids_from_tokenizer  sV    	#
 	#
 	#

 
 
 
#&73$ $
 
 
 	
r%   c                    t          |dd           }|p|j        |_        |j        |_        t          |d          r|j        |_        t          |d          r+t          |j        	                                          |_
        nF|j        j        |_        |j        j        |_        t                              |j                  |_        t          |dd           |                     |          |_
        |j        _|j        j        }|j        j        }t%          t'          j        t+          |dz  |          t+          |                              |_        d S d S )Ngeneration_configalignment_heads
lang_to_idlang_idsr  )rM   suppress_tokenssuppress_idsbegin_suppress_tokenssuppress_ids_beginr   r  rK   r  valuesr  rW   _WHISPER_ALIGNMENT_HEADSrH   name_or_pathr  r   r  r  	itertoolsproductr  )r9   rW   r\   r^   
gen_config
num_layersr  s          r#   r}   zWhisperLoader.set_config  sD   U$7>>
!","<F(2(HF%z#455 D)3)C&z<00 I"()>)E)E)G)G"H"H"',">F(-(JF%%=%A%A%BT%U%UF"6:t,,4"??	JJFO!)4J<I%)!*/:66)$$ & &F"""	 *)r%   c           	          t                                          ||          }|                    d t          |j        j        t          |          z
            D                        |S )Nc              3   &   K   | ]}d |dz  z  V  dS )z<|%.2f|>g{Gz?Nr&   r@  s     r#   	<genexpr>z/WhisperLoader.get_vocabulary.<locals>.<genexpr>  s?       
 
 !d(#
 
 
 
 
 
r%   )r   r~   extendr  rW   r   r   r   s       r#   r~   zWhisperLoader.get_vocabulary  so    ''y99 	 
 
5<2S[[@AA
 
 
 	
 	
 	

 r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zWhisperLoader.set_vocabulary  rk  r%   c                     |                      |j        |j                   |                      |j        |j                   t                                          ||           d S r   )
set_conv1dconv1conv2r   r   )r9   r_   r   rF   s      r#   r   zWhisperLoader.set_encoder  sS    
GM222
GM222D'*****r%   c                     |                      |j        |j                   t                                          ||           d S r   )r   r  r  r   r   r,  s      r#   r   zWhisperLoader.set_decoder  s>    DOW-ABBBD'*****r%   c                     |                      |j        |j                   |                     |j        |j                   d S r   )r   r  r  r   r   r   s      r#   r   zWhisperLoader.set_common_layers  s?    ##D$;V=STTTDOV->?????r%   c                 6    |j         |_         |j        |_        d S r   r   r   r   s      r#   r  zWhisperLoader.set_conv1d  s    mK			r%   )rG   rn   ro   r   rN   r{   r  r}   r~   r   r   r   r   r  r  r  s   @r#   r  r    s        1 1 X1  
 
 
(  :	 	 	 	 	) ) )+ + + + +
+ + + + +@ @ @             r%   r  Wav2Vec2Configc                   d     e Zd Zed             Zd Zd Zd Zd Zd Z	d Z
d Z fd	Zd
 Z xZS )Wav2Vec2Loaderc                     dS )NWav2Vec2ForCTCr&   rv   s    r#   rN   z Wav2Vec2Loader.architecture_name  r[  r%   c                    t          |j        j        dd          }t          j        |j        j        j        |j        j        j        j        |j        j        j        j        |j	        j
        j        d         |          }|j        j        j        D ]M}|j        |_        |j        |_        |j        j        |_        |j        j        |_        |j        j        |_        N|                     |j        ||j        j                   |S Nreturn_hiddenFr   )rM   wav2vec2rW   r   Wav2Vec2Specnum_feat_extract_layersr   r_  r`  r   r   r0  r   r   r   r   r   feed_forwardintermediate_act_fnactivation_fnintermediate_denser   output_denser   r   )r9   r\   r-  r_   r   s        r#   r{   zWav2Vec2Loader.get_model_spec  s     5NN)N!9N");N")=M &q)
 
 ^+2 	8 	8E#oEO).)9E&"'"4"HE*=EI*7EIIuen.CDDDr%   c                     d S r   r&   r   s       r#   r}   zWav2Vec2Loader.set_config      r%   c                 *    |                                 S r   r   r   s      r#   r~   zWav2Vec2Loader.get_vocabulary      ""$$$r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zWav2Vec2Loader.set_vocabulary	  rk  r%   c                    |j         d         j        j        |j        j        _        |j         d         j        j        |j        j        _        |                     |j        j        |j         d         j                   t          |j        |j         dd                    D ]Q\  }}|j        j        |j        _        |j        j        |j        _        |                     |j        |j                   Rd S r   )	conv_layersconvr   feat_layer0r   r   r   r   
feat_layer)r9   r_   feature_extractor
spec_layermodule_layers        r#   set_feature_extractorz$Wav2Vec2Loader.set_feature_extractor  s    '8'DQ'G'L'S$%6%B1%E%J%O"'):)Fq)I)T	
 	
 	
 ),O.:122>)
 )
 	P 	P$J &2%6%=JO"#/#4#9JO 
 5|7NOOOO	P 	Pr%   c                     |                      |j        |j                   |                     |j        |j                   d S r   r   fp_layer_normr   r   fp_projectionr   r9   r_   feature_projections      r#   set_feature_projectionz%Wav2Vec2Loader.set_feature_projection  @    D.0B0MNNN*,>,IJJJJJr%   c                 >   |j         j        j        j                                        |j         j        j        _        |j         j        j                                        |j         j        j        _        |j                                         D ] }|j                                        |_        !|                     t          j        dd|j	        f                     |j         j        j        |j         j        _        |j         j        j        |j         j        _        d S r   )
pos_conv_embedr>  r   datafloatr   
parametersrA   randnrc  )r9   r_   r   rW   params        r#   set_pos_conv_embedz!Wav2Vec2Loader.set_pos_conv_embed  s     "'.399;; 	#*/ 180F0K0P0V0V0X0X#(-+6688 	, 	,E))++EJJu{Aq&2D+EFFGGG*1*@*E*L '(/(>(C(H %%%r%   c                    |                      ||j        j                   |                     ||j        j                   |                     ||j        j        |           t                                          ||j        j                   t          |j        j
        dd          }|s"|                     |j        |j                   d S d S Nr-  F)rD  r.  rA  rK  rJ  rT  r   r   r   rM   rW   r   r   )r9   r_   r\   rW   r-  rF   s        r#   r   zWav2Vec2Loader.set_encoder*  s    ""4)IJJJ##D%.*KLLLen&<fEEED%."8999 5NN 	9OODL%-88888	9 	9r%   c                 F    |                      |j        |j                   d S r   )r   r   r   s      r#   r   z Wav2Vec2Loader.set_common_layers3  s#    DOV->?????r%   )rG   rn   ro   r   rN   r{   r}   r~   r   rD  rK  rT  r   r   r  r  s   @r#   r(  r(    s            X   *  % % %) ) )P P PK K KI I I9 9 9 9 9@ @ @ @ @ @ @r%   r(  Wav2Vec2BertConfigc                   j    e Zd Zed             Zd Zd Zd Zd Zd Z		 ddZ
d	 Zd
 Zd Zd Zd ZdS )Wav2Vec2BertLoaderc                     dS )NWav2Vec2BertForCTCr&   rv   s    r#   rN   z$Wav2Vec2BertLoader.architecture_name9  r  r%   c                    t          |j        j        dd          }t          j        |j        j        j        |j        j        j        |j        j        j	        d         |          }| 
                    |j        |           |S r,  )rM   wav2vec2_bertrW   r   Wav2Vec2BertSpecnum_adapter_layersr_  r   r   r0  r   r   )r9   r\   r-  r_   s       r#   r{   z!Wav2Vec2BertLoader.get_model_spec=  su     3 :OUSS 1&9&8M &q)	
 
 	u---r%   c                     d S r   r&   r   s       r#   r}   zWav2Vec2BertLoader.set_configH  r7  r%   c                 *    |                                 S r   r9  r   s      r#   r~   z!Wav2Vec2BertLoader.get_vocabularyK  r:  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   z!Wav2Vec2BertLoader.set_vocabularyN  rk  r%   c                     |                      |j        |j                   |                     |j        |j                   d S r   rF  rI  s      r#   rK  z)Wav2Vec2BertLoader.set_feature_projectionQ  rL  r%   Nc                 h   d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   t          j        |j        d         |           |                     |j        d         |j                   |s|rk|j	        j
        |_        t          j        d                              |          |_        t          j        d                              |          |_        d S d S )Nc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z4Wav2Vec2BertLoader.set_attention.<locals>.<listcomp>X  r  r%   r  r   r   r  r  int32)r  r   linear_qlinear_klinear_vr   r  r   
linear_outdistance_embeddingr   !relative_asymmetric_position_keysr  dtypetyperelative_left_max_positionrelative_right_max_position)r9   r_   r   left_max_positionright_max_positionr  s         r#   r   z Wav2Vec2BertLoader.set_attentionU  s    DC%((CCCQ);<<<Q);<<<Q);<<<$+a.,777B)=>>> 	 2 	5>5Q5XD2.0hw.?.?.D.DEV.W.WD+/1x/@/@/E/E"0 0D,,,	 	r%   c                 <   t          ||          D ]	\  }}|                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j	        |j        j
                   |                     |j        |j        ||           |                     |j        |j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j	        |j        j
                   |                     |j         |j!                   d S r   )"r   r   enc_ffn1_layer_normffn1_layer_normr   enc_ffn1r   ffn1r4  r   r5  r   enc_attnr   enc_attn_layer_normr   enc_conv_layer_normconv_moduler   r  enc_conv_pointwise_conv1pointwise_conv1enc_conv_depthwise_convdepthwise_convenc_conv_depthwise_layer_normdepthwise_layer_normenc_conv_pointwise_conv2pointwise_conv2enc_ffn2_layer_normffn2_layer_normenc_ffn2ffn2enc_final_layer_normr   )r9   spec_layersr   rr  rs  slayerr   s          r#   set_wav2vec2bert_encoderz+Wav2Vec2BertLoader.set_wav2vec2bert_encodere  s    !f55 	U 	UMFE :E<QRRROOFO4ej6STTTOOFO4ej6MNNN2CEW    :E<VWWW*E,=,H   OO/1B1R   OO.0A0P   4!6   OO/1B1R    :E<QRRROOFO4ej6STTTOOFO4ej6MNNN ;U=STTTT7	U 	Ur%   c                 ^   t          ||          D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j	        |j
                   |                     |j        |j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r   )r   r   adpt_residual_layer_normresidual_layer_normr  adpt_residual_convresidual_convadpt_attn_layer_normr   adpt_attn_convself_attn_convr   adpt_attn_layerr   adpt_ffn_layer_normffn_layer_normr   adpt_ffnr   r   r4  r   r5  )r9   r  r   r  r   s        r#   set_wav2vec2bert_adapterz+Wav2Vec2BertLoader.set_wav2vec2bert_adapter  s    f55 
	N 
	NMFE/1J   OOF5u7JKKK ;U=WXXXOOF153GHHHv5uGGG :E<PQQQOOFO4ei6RSSSOOFO4ei6LMMMM
	N 
	Nr%   c                    |                      ||j        j                   |                     |j        |j        j        j        |j        j        j        |j        j        j	                   | 
                    |j        |j        j        j                   t          |j        j        dd          }|s"|                     |j        |j                   d S d S rV  )rK  r^  rJ  r  r   r   r   rW   left_max_position_embeddingsright_max_position_embeddingsr  adapter_layersadapterrM   r   r   )r9   r_   r\   r-  s       r#   r   zWav2Vec2BertLoader.set_encoder  s    ##D%*=*PQQQ%%'.&C&D		
 	
 	
 	%%!4!<!C	
 	
 	
   3 :OUSS 	9OODL%-88888	9 	9r%   c                 H    |j         |_         |j        |j        |_        d S d S r   r%  r   s      r#   r  zWav2Vec2BertLoader.set_conv1d  s(    m;"DIII #"r%   c                 H    |j         |_        |j        |j        |_        d S d S r   r   r   s      r#   r   z!Wav2Vec2BertLoader.set_layer_norm  s(    ]
;"DIII #"r%   )NN)rG   rn   ro   r   rN   r{   r}   r~   r   rK  r   r  r  r   r  r   r&   r%   r#   rZ  rZ  7  s        $ $ X$	 	 	  % % %) ) )K K K
 KO    U U U@N N N9 9 9$ $ $
$ $ $ $ $r%   rZ  T5Configc                   n     e Zd Zed             Zd Z fdZd Zd ZddZ	d Z
d	 Zd
 ZddZd Z xZS )T5Loaderc                     dS )NT5ForConditionalGenerationr&   rv   s    r#   rN   zT5Loader.architecture_name  s    ++r%   c           	         t           j                            |j        j        |j        j        f|j        j        dt          |j        j                 |j        j	        dd          }| 
                    |j        |j                   | 
                    |j        |j        d           |                     |j        j        |j                   |j        j        r|j        j        dz  |j        _        |S )NT)r   r   ffn_glurelative_attention_biasrms_norm)
is_decoderg      )r
   r   r   rW   r  num_decoder_layersr  r   dense_act_fnis_gated_act	set_stackr   r   r   r   r   tie_word_embeddingsr  scale_outputsrd  s      r#   r{   zT5Loader.get_model_spec  s    /;;\$el&EFL"-el.GHL-$( < 
 
 	t|U]333t|U]tDDD/???<+ 	D).)=t)CDL&r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zT5Loader.get_vocabulary  r  r%   c                 Z    |                     |           |                    |           d S r   r   r   s      r#   r   zT5Loader.set_vocabulary  r   r%   c                     |j         |_        |j        |_        |j        |_        t	          |j        d          r&|                    |j        j                  |_        d S |j         |_        d S )Nr   )	rU  r   r   r   r   rW   r   r   r   r   s       r#   r}   zT5Loader.set_config  sq    $.$.$.5<!9:: 	=)2)H)H3* *F&&& *3)<F&&&r%   Fc                    |                      |j        |j                   |                     t	          |j        t                    r|j        d         n|j        |j                   d|_        t          t          |j        |j                            D ]\  }\  }}|                     |j        |j        d                    |dk    r4|j        d         j        }|j        |j        _        |j        |j        _        |r&|                     |j        |j        d                    |                     |j        |j        d                    d S )Nr   Fr   r  )r   r   r   r   r   r  r  r  r  rf  r   r   blockset_self_attentionr   r  relative_attention_max_distanceset_cross_attentionr   set_ffnr   )r9   r_   r   r  rA  r   r  first_self_attentions           r#   r  zT5Loader.set_stack  sI   DOV-DEEE dot44%""_	
 	
 	
 !&&/DJ0M0M&N&N 	: 	:"A"
E##J$=u{1~NNN1uu'+z!}'C$(@ )A )H )I  O(()=u{1~NNNLLR9999!	: 	:r%   c                    t          |d          rK|                     |j        |j        j                   |                     |j        |j        j                   n%|                     |j        |j        j                   |                     |j        |j        j	                   | 
                    |j        |j                   d S )Nlinear_0_noact)r   r   r   DenseReluDensewi_0r  wi_1wir   wor   r   r   s      r#   r  zT5Loader.set_ffn  s    4)** 	EOODM6+@+EFFFOOD/1F1KLLLLOODM6+@+CDDDv'<'?@@@DOV->?????r%   c                     |                      ||j        d           |                     |j        |j                   d S r   )r   SelfAttentionr   r   r   s      r#   r  zT5Loader.set_self_attention  sA    4!5dKKKDOV->?????r%   c                 |    |                      ||j                   |                     |j        |j                   d S r   )r   EncDecAttentionr   r   r   s      r#   r  zT5Loader.set_cross_attention  s<    4!7888DOV->?????r%   c                    d|_         d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   |r!t          j        |j        d         |           nPt          j        |j        d         |d d                    t          j        |j        d         |dd                     |                     |j        d         |j	                   |j
        rD|j        j        |_        t          j        d                              |j                  |_        d S d S )	Nr  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z*T5Loader.set_attention.<locals>.<listcomp>  r  r%   r  r   r   r  r  rg  )queries_scaler  r   qkvr   r  r   ohas_relative_attention_biasr  r   r  rn  ro  r  r  s        r#   r   zT5Loader.set_attention  sG    CC%((CCCQ555Q555Q555 	@dk!nl;;;;dk!nl2A2.>???dk!nl122.>???B5550 	+4+L+SD(358G3D3D3I3I94 4D000	 	r%   c                     |j         |_        d S r   r   r   r9   r_   r   s      r#   r   zT5Loader.set_layer_norm%      &


r%   r  )rG   rn   ro   r   rN   r{   r~   r   r}   r  r  r  r  r   r   r  r  s   @r#   r  r    s        , , X,  (    0 0 0	= 	= 	=: : : :>@ @ @@ @ @@ @ @   ,' ' ' ' ' ' 'r%   r  	MT5Configc                   $    e Zd Zed             ZdS )	MT5Loaderc                     dS )NMT5ForConditionalGenerationr&   rv   s    r#   rN   zMT5Loader.architecture_name+  s    ,,r%   N)rG   rn   ro   r   rN   r&   r%   r#   r  r  )  s-        - - X- - -r%   r  BloomConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )BloomLoaderc                     dS )NBloomForCausalLMr&   rv   s    r#   rN   zBloomLoader.architecture_name2      !!r%   c           	      "   t           j                            |j        j        |j        j        dt          j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j                   |S )NT)r   r   r   alibialibi_use_positive_positions)r
   r^  r   rW   r  r  r   
ActivationGELUTanhr   r   r  r   r   r   rd  s      r#   r{   zBloomLoader.get_model_spec6  s    ;GGL L"-6 $)- H 
 
 	u'8999/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zBloomLoader.get_vocabularyE  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zBloomLoader.set_vocabularyN  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zBloomLoader.set_configQ  rn  r%   c                 *   d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   t          |j
        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j        |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r  r   r  word_embeddingsr   r   word_embeddings_layernormr   r  r   r   r  r   r  set_qkv_linearr   r  r  r   r  r   r  r   r  r  r   r  r  s        r#   r   zBloomLoader.set_decoderV  s    %DOV-CDDDD4f6VWWWDOV[999!$TZ!:!: 	N 	NJ)4e6K   )03$4$.  
 OO)03U5I5O   )5+I   OOJN3UY5LMMMOOJN3UY5LMMMM#	N 	Nr%   c                 v   |j         }|                    |dd|j        d                   }|                    dd          }|                    d|j        d                   }|j        }|                    |dd          }|                    dd          }|                    d          }||_         ||_        d S )Nr  r  r   r   )r   r  r0  r   r   )r9   r_   r   r  r   r   s         r#   r  zBloomLoader.set_qkv_linearo  s    	1b&,r2BCC!!!Q''FL$455{||Iq"--~~a##||B			r%   )rG   rn   ro   r   rN   r{   r~   r   r}   r   r  r  r  s   @r#   r  r  0  s        " " X"      ) ) )/ / /
N N N2      r%   r  	MPTConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )	MPTLoaderc                     dS NAutoModelForCausalLMr&   rv   s    r#   rN   zMPTLoader.architecture_name      %%r%   c                     t           j                            |j        j        |j        j        dt          j        j        d          }| 	                    |j
        |j                   |S )NT)r   r   r  )r
   r^  r   rW   n_layersn_headsr   r  GELUr   r   r  rd  s      r#   r{   zMPTLoader.get_model_spec  sa    ;GGL!L "-2 H 
 
 	u'8999r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zMPTLoader.get_vocabulary  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zMPTLoader.set_vocabulary  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zMPTLoader.set_config  rn  r%   c                     |                      |j        |j                   |                     |j        |j                   d|_        |j        j        |j        _        t          |j
        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r   r  r  r   r   norm_fr  r   r   r   r   blocksr   norm_1r   r   r  Wqkvr  r   norm_2r   up_projr   	down_projr  s        r#   r   zMPTLoader.set_decoder  s>   DOVZ888DOV];;; %!%!7!$TZ!?!? 	J 	JJ
 9 DelSSSOOJ5<Q?QQQOOJ5<Q?ATUUU
 95<HHHOOJN3UY5FGGGOOJN3UY5HIIII	J 	Jr%   c                 Z    |j         |_        t          j        |j                  |_        d S r   )r   r   rA   
zeros_liker   r   s      r#   r   zMPTLoader.set_layer_norm  s#    ]
$TZ00			r%   )rG   rn   ro   r   rN   r{   r~   r   r}   r   r   r  r  s   @r#   r  r  ~  s        & & X&
 
 
    ) ) )/ / /
J J J 1 1 1 1 1 1 1r%   r  GemmaConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )GemmaLoaderc                     dS )NGemmaForCausalLMr&   rv   s    r#   rN   zGemmaLoader.architecture_name  r  r%   c                 @   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          j                            |||dk    rt          j        j	        nt          j        j
        dddddt          |j         dd	          ||j         j        
          }|                     |j        |j                   |                     |j        j        |j                   |j         j        dz  |j        j        _        |S )Nnum_key_value_headshidden_activationr   r   Tr   F
rope_theta'  )	r   r   r  r  r  r  rotary_basenum_heads_kvr        ?rW   r_  r`  rM   r
   r^  r   r   r  r  r  r  r   r   r\   r   r   r   rc  r  multiply_by_sqrt_depthr9   r\   r  r  r  activation_configr_   s          r#   r{   zGemmaLoader.get_model_spec  s   \3
L4	u|-BINN9$$L#L-/B
 
  ;GG %.. &++ +4#lEBB%\* H 
 
$ 	u{333/???9>9QSV9V6r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r~   zGemmaLoader.get_vocabulary      ''y99L+c&kk9	y!! 	/ 	/AMM/A-....<"S[[005el556Fr%   c                 0    |                     |           d S r   ri  r   s      r#   r   zGemmaLoader.set_vocabulary  rk  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r   r   r   rW   rms_norm_epslayer_norm_epsilonr   s       r#   r}   zGemmaLoader.set_config  6    $.$.$.$)L$=!!!r%   c                 ,    |j         |_        d|_        d S r*  r   r   layer_norm_use_residualr  s      r#   r   zGemmaLoader.set_layer_norm      &
'+$$$r%   c                    d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          |j	        |j
                  D ]\  }}|                     |j        j        |j                   |                     |j        j        |j                   |j        j        j        }|j        j        j        }|j        j        j        }|j        j        j        }t+          j        |||g          |j        j        d         _        ||j        j        d         _        |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   tA          |d           tA          |d           tC          j"                     d S NTFr   r   r   r  )#r  r+  r   r  r  r   r   normr   r   r   r   r  r   r  r   r	  r   r
  r  o_projrA   r  r   r   r   r  	gate_projr  r  r   r  delattrgccollect	r9   r_   r   r   r   wqwkwvr  s	            r#   r   zGemmaLoader.set_decoder  s    $).&DOV-@AAADOV[999!$TZ!?!? 	 	J)4e6K   )5+I   '.B'.B'.B'.B9>BB<9P9PJ%,Q/69;J%,Q/6OOJN3UY5HIIIOOJN959;LMMMOOJN3UY5HIIIE;'''E5!!!JLLLL-	 	r%   rG   rn   ro   r   rN   r{   r~   r   r}   r   r   r  r  s   @r#   r  r    s        " " X"! ! !F	 	 	 	 	) ) )> > >, , ,      r%   r  Gemma2Configc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )Gemma2Loaderc                     dS )NGemma2ForCausalLMr&   rv   s    r#   rN   zGemma2Loader.architecture_name  s    ""r%   c                 B   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          j                            |||dk    rt          j        j	        nt          j        j
        dddddt          |j         dd	          ||j         j        d
          }|                     |j        |j                   |                     |j        j        |j                   |j         j        dz  |j        j        _        |S )Nr  r	  r   r   Tr   Fr
  r  )
r   r   r  r  r  r  r  r  r  pre_post_layer_normr  r  r  s          r#   r{   zGemma2Loader.get_model_spec  s!   \3
L4	u|-BINN9$$L#L-/B
 
  ;GG %.. &++ +4#lEBB%\* $! H 
 
& 	u{333/???9>9QSV9V6r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r~   zGemma2Loader.get_vocabulary@  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zGemma2Loader.set_vocabularyK  rk  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r  r   s       r#   r}   zGemma2Loader.set_configN  r  r%   c                 ,    |j         |_        d|_        d S r*  r  r  s      r#   r   zGemma2Loader.set_layer_normT  r  r%   c                 V   d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          |j	        |j
                  D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |j        j        j        }|j        j        j        }|j        j        j        }|j        j        j        }t3          j        |||g          |j        j        d         _        ||j        j        d         _        |                     |j        j        |j         j!                   |                     |j        j"        |j         j#                   |                     |j        j$        |j         j%                   tM          |d           tM          |d           tO          j(                     d S r   ))r  r+  r   r  r  r   r   r!  r   r   r   r  r  r  r  pre_feedforward_layer_normpre_feedforward_layernormpost_feedforward_layer_normpost_feedforward_layernormr   r	  r   r
  r  r"  rA   r  r   r   r   r   r   r  r#  r  r  r   r  r$  r%  r&  r'  s	            r#   r   zGemma2Loader.set_decoderX  s    $).&DOV-@AAADOV[999!$TZ!?!? 	 	J
 ;U=RSSS4e6T   5u7V   68X   '.B'.B'.B'.B9>BB<9P9PJ%,Q/69;J%,Q/6OOJN3UY5HIIIOOJN959;LMMMOOJN3UY5HIIIE;'''E5!!!JLLLL;	 	r%   r+  r  s   @r#   r.  r.    s        # # X#" " "H	 	 	 	 	) ) )> > >, , ,# # # # # # #r%   r.  LlamaConfigc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )LlamaLoaderc                     dS )NLlamaForCausalLMr&   rv   s    r#   rN   zLlamaLoader.architecture_name  r  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd           }|r|                    d          p|d         }t
                              |          }|d         }|Et          d|d         dd                    t
                                                              nd }d	}t          |j         d
d           }	|	rd }
|	j	        dk    rt                              |	j                  }
|
Dt          d|	j	        dd                    t                                                              |	j        }|	j        }nt          j        j        }
d }d }t"          j                            ||t          j        j        ddddd||t          |j         dd          ||
||          }|                     |j        |j        |
           |                     |j        j        |j                   |t8          j        j        k    r3|j        j        D ]&}|d         |j         _!        |d         |j         _"        '|S )Nr  rope_scalingro  	rope_typefactorRoPE scaling type 'T' is not yet implemented. The following RoPE scaling types are currently supported: r<   r   quantization_configawqQuantization type 'T' is not yet implemented. The following Quantization types are currently supported: Tr   Fr
  r  )r   r   r  r  r  r  rotary_scaling_typerotary_scaling_factorr  r  r   quant_group_size
quant_bitslow_freq_factorhigh_freq_factor)#rW   r_  r`  rM   rH   _SUPPORTED_ROPE_SCALINGry   rJ   rL   quant_method_SUPPORTED_QUANTIZATIONversion
group_sizebitsr   r   r   r
   r^  r   r  SWISHr   r   r\   r   r   r   r   RotaryScalingTypeLlama3r   r   rotary_low_freq_factorrotary_high_freq_factor)r9   r\   r  r  r  rB  rC  rK  rL  rG  r   rM  rN  r_   r   s                  r#   r{   zLlamaLoader.get_model_spec  s   \3
L4	u|-BINN9$$Lu|^TBB 	&$((00ML4MI"9"="=i"H"H$0$:!"*)) $F+++TYY7N7S7S7U7U-V-V-VX   + #'$%!%el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7lEBB%!-! H 
 
$ 	u{J???/??? ."B"III+  >J%?$; @L&@$<< r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r~   zLlamaLoader.get_vocabulary  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zLlamaLoader.set_vocabulary  rk  r%   c                     |j         |_         |j        |_        |j        |j        nd|_        |j        j        |_        d S N r  r   s       r#   r}   zLlamaLoader.set_config  sH    $.$.#,#6#BI 	 %*L$=!!!r%   c                     |j         |_        d S r   r  r  s      r#   r   zLlamaLoader.set_layer_norm  r  r%   c                 T   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]D\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t(          j        j        k    r&t/          j        |j
        j        d         |           n?|t(          j        j        k    rdnd}t/          j        |j
        j        d         ||           |                     |j
        j        d         |j        j        |           |                     |j        j        |j        j        |           |                     |j        j         |j        j!        |           |                     |j        j"        |j        j#        |           tI          |d           tI          |d	           tK          j&                     Fd S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z+LlamaLoader.set_decoder.<locals>.<listcomp>  !    GGGK244GGGr%   r  r   r   r   r  r   r  'r  r   r  r  r   r   r!  r   r   r   r   r  r   r  r  r   r   r	  r
  r  r   r   r   r   r  r   AWQ_GEMMfuse_linear_prequantr"  r   r  r#  r  r  r   r  r$  r%  r&  r9   r_   r   r   r   r   r  cc_dims           r#   r   zLlamaLoader.set_decoder  s    %DOV-@AAADOV[999!$TZ!?!? ,	 ,	J)4e6K   )5+I   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLLY,	 ,	r%   rG   rn   ro   r   rN   r{   r~   r   r}   r   r   r   r   r   r  r  s   @r#   r>  r>  ~  s        " " X"K K KZ	 	 	 	 	) ) )> > >' ' ' 4?3K3O 1 1 1 1 1 1 1 1r%   r>  MistralConfigc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )MistralLoaderc                     dS )NMistralForCausalLMr&   rv   s    r#   rN   zMistralLoader.architecture_name   r  r%   c                 V   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          |j         dd           }|rpt                              |d                   }|d         }|Et          d|d         dd	                    t                                                              nd }d
}t          |j         dd           }	|	r|	j	        dk    rt                              |	j                  }
|
Dt          d|	j	        dd	                    t                                                              |	j        }|	j        }nt          j        j        }
d }d }t"          j                            ||t          j        j        ddddd||t          |j         dd          |||
|||j         j                  }|                     |j        |j        |
           |                     |j        j        |j                   |S )Nr  sliding_windowr   rB  ro  rD  rE  rF  r<   r   rG  rH  rI  rJ  TFr
  r  )r   r   r  r  r  r  rK  rL  r  r  rr  r   rM  rN  r  re  )rW   r_  r`  rM   rQ  rH   ry   rJ   rL   rR  rS  rT  rU  rV  r   r   r   r
   r^  r   r  rW  r  r   r   r\   r   r   r   )r9   r\   r  r  r  rr  rB  rK  rL  rG  r   rM  rN  r_   s                 r#   r{   zMistralLoader.get_model_spec$  sT   \3
L4	u|-BINN9$$L /?CCu|^TBB 	&"9"="=l6>R"S"S$0$:!"*)) $F+++TYY7N7S7S7U7U-V-V-VX   + #'$%!%el4I4PP 	"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7lEBB%)!-!\*# H 
 
( 	u{zJJJ/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zMistralLoader.get_vocabularyi  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zMistralLoader.set_vocabularyr  rk  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r  r   s       r#   r}   zMistralLoader.set_configu  r  r%   c                     |j         |_        d S r   r  r  s      r#   r   zMistralLoader.set_layer_norm{  r  r%   c                 T   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]D\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t(          j        j        k    r&t/          j        |j
        j        d         |           n?|t(          j        j        k    rdnd}t/          j        |j
        j        d         ||           |                     |j
        j        d         |j        j        |           |                     |j        j        |j        j        |           |                     |j        j         |j        j!        |           |                     |j        j"        |j        j#        |           tI          |d           tI          |d	           tK          j&                     Fd S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-MistralLoader.set_decoder.<locals>.<listcomp>  rd  r%   r  r   re  r   r  r   r  rf  ri  s           r#   r   zMistralLoader.set_decoder~  s    %DOV-@AAADOV[999!$TZ!?!? +	 +	J)4e6K   )5+I   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLLW+	 +	r%   rk  r  s   @r#   rn  rn    s        $ $ X$C C CJ    ) ) )> > >' ' ' 4?3K3O 0 0 0 0 0 0 0 0r%   rn  Qwen2Configc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )Qwen2Loaderc                     dS )NQwen2ForCausalLMr&   rv   s    r#   rN   zQwen2Loader.architecture_name  r  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd           }|r|                    d          p|d         }t
                              |          }|d         }|Et          d|d         dd                    t
                                                              nd }d	}t          j
                            ||t          j        j        d
d
d
dd||t          |j         dd          |          }	|                     |	j        |j                   |                     |	j        j        |j                   |	S )Nr  rB  ro  rC  rD  rE  rF  r<   r   Tr   Fr
  r  )
r   r   r  r  r  r  rK  rL  r  r  )rW   r_  r`  rM   rH   rQ  ry   rJ   rL   r
   r^  r   r   r  rW  r   r   r\   r   r   r   )
r9   r\   r  r  r  rB  rC  rK  rL  r_   s
             r#   r{   zQwen2Loader.get_model_spec  s   \3
L4	u|-BINN9$$Lu|^TBB 	&$((00ML4MI"9"="=i"H"H$0$:!"*)) $F+++TYY7N7S7S7U7U-V-V-VX   + #'$%!;GG"-3# 3"7lEBB% H 
 
 	u{333/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zQwen2Loader.get_vocabulary  sg    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zQwen2Loader.set_vocabulary  rk  r%   c                     |j         |j         n|j        |_         |j        |_        |j        |j        nd|_        |j        j        |_        d S r_  )r   rU  r   r   rW   r  r  r   s       r#   r}   zQwen2Loader.set_config  sc     ". $ 	
 %.#,#6#BI 	 %*L$=!!!r%   c                     |j         |_        d S r   r  r  s      r#   r   zQwen2Loader.set_layer_norm  r  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j                   |                     |d         |j        j                   |                     |d         |j        j                   t)          j        |j
        j        d         |           |                     |j
        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   t?          |d           t?          |d           tA          j!                     d S )	NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z+Qwen2Loader.set_decoder.<locals>.<listcomp>	  rd  r%   r  r   r   r  r   r  )"r  r   r  r  r   r   r!  r   r   r   r   r  r   r  r  r   r   r	  r
  r  r   r  r   r"  r   r  r#  r  r  r   r  r$  r%  r&  )r9   r_   r   r   r   r  s         r#   r   zQwen2Loader.set_decoder  s    %DOV-@AAADOV[999!$TZ!?!? 	 	J)4e6K   )5+I   HGeAhhGGGLOOLOU_-CDDDOOLOU_-CDDDOOLOU_-CDDDj7>qA<PPPOO)03&  
 OOJN3UY5HIIIOOJN959;LMMMOOJN3UY5HIIIE;'''E5!!!JLLLL3	 	r%   r+  r  s   @r#   r{  r{    s        " " X") ) )V    ) ) )
> 
> 
>' ' '      r%   r{  MixFormerSequentialConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )MixFormerSequentialLoaderc                     dS r  r&   rv   s    r#   rN   z+MixFormerSequentialLoader.architecture_name  r  r%   c           
      Z   t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j        d         j                   |S )NTFr  r  r   r   r  r  r  r  r  )r
   r^  r   rW   r  r  r   r   r  r   r   r   r   r   r   rd  s      r#   r{   z(MixFormerSequentialLoader.get_model_spec#  s    ;GG|+l)-el.NO|.#"" H 	
 	
 	u|444/b1A1HIIIr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   z(MixFormerSequentialLoader.get_vocabulary3  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   z(MixFormerSequentialLoader.set_vocabulary<  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   z$MixFormerSequentialLoader.set_config?  rn  r%   c                    d|_         |                     |j        |d         j                   |                     |j        |d         j                   t          |j        |dd                   D ]\  }}|                     |j	        |j                   | 
                    |j        j        d         |j        j                   | 
                    |j        j        d         |j        j                   | 
                    |j        j        |j        j                   | 
                    |j        j        |j        j                   d S )NFr   r  r   )r  r   r  r  r   r   lnr   r   r  r   r   r   mixerr  r  r   r   r  r   r   r   r  s        r#   r   z%MixFormerSequentialLoader.set_decoderD  s    %DOVAY];;;DOVBZ];;;!$TZ"!>!> 	D 	DJ
 <ehGGGOOJ5<Q?AQRRROOJ5<Q?AUVVVOOJN3UY]CCCOOJN3UY]CCCC	D 	Dr%   r  r  s   @r#   r  r    s        & & X&       ) ) )/ / /

D 
D 
D 
D 
D 
D 
Dr%   r  	PhiConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )	PhiLoaderc                     dS r  r&   rv   s    r#   rN   zPhiLoader.architecture_nameS  r  r%   c           
         t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j        j                   |                     |j
        j        |j        j                   |S )NTFr  )r
   r^  r   rW   r  r  r   r   r  r   r   r  r   r   r   r   r   r   r  rd  s      r#   r{   zPhiLoader.get_model_specW  s    ;GG|+l)-el.NO|.#"" H 	
 	
 	u'8999/1EFFFDL3U]5EFFFr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zPhiLoader.get_vocabularyh  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zPhiLoader.set_vocabularyq  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zPhiLoader.set_configt  rn  r%   c                 F   d|_         |                     |j        |j        j                   t          |j        |j                  D ]\  }}|                     |j	        |j
                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r  r   r  embdr  r   r   r  r   r  r  r   r   r   r  r  r  r   r   r  r   r   r   r  s        r#   r   zPhiLoader.set_decodery  s     %DOV[_===!$TZ!:!: 	D 	DJ
 <ehGGGOOJ5<Q?AQRRROOJ5<Q?AUVVVOOJN3UY]CCCOOJN3UY]CCCC	D 	Dr%   r  r  s   @r#   r  r  Q  s        & & X&  "    ) ) )/ / /
	D 	D 	D 	D 	D 	D 	Dr%   r  
Phi3Configc                   X     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
d Z xZS )	
Phi3Loaderc                     dS r  r&   rv   s    r#   rN   zPhi3Loader.architecture_name  r  r%   c                 (   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          |j         dd          }t          |j         dd           }|r~t                              |d                   }|                    dd          }	|Et          d	|d         d
d                    t                                                              nd }d}	t          j
                            ||t          j        j        ddddd||	t          |j         dd          |||          }
|                     |
j        |j                   |                     |
j        j        |j                   |
S )Nr   original_max_position_embeddingsr   max_position_embeddingsrB  ro  rD  r   rE  rF  r<   TFr
  r  )r   r   r  r  r  r  rK  rL  r  r  r  r  )rW   r_  r`  rM   rQ  rH   ry   rJ   rL   r
   r^  r   r   r  rW  r   r   r\   r   r   r   )r9   r\   r  r  r  r  r  rB  rK  rL  r_   s              r#   r{   zPhi3Loader.get_model_spec  s   \3
L4	u|-BINN9$$L+2L<a,
 ,
( #*%,8QST"U"Uu|^TBB 	&"9"="=l6>R"S"S$0$4$4Xq$A$A!"*)) $F+++TYY7N7S7S7U7U-V-V-VX   + #'$%!;GG"-3# 3"7lEBB-M$;% H 
 
" 	u{333/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zPhi3Loader.get_vocabulary  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zPhi3Loader.set_vocabulary  rk  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   rm  r   s       r#   r}   zPhi3Loader.set_config  rn  r%   c                     |j         |_        d S r   r  r  s      r#   r   zPhi3Loader.set_layer_norm  r  r%   c                     t          j        |t           j                  |_        t          j        |t           j                  |_        d S )N)rn  )rA   tensorfloat32rotary_scaling_long_factorrotary_scaling_short_factor)r9   r_   r  r  s       r#   set_rotary_embeddingsz Phi3Loader.set_rotary_embeddings  sL     +0,&em+
 +
 +
' ,1<'u},
 ,
 ,
(((r%   c                 4   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]\  }}|                     |j
        j        |j                   |                     |j        j        |j                   |                     |j
        j        d         |j        j                   |                     |j
        j        d         |j        j                   |j        j        j        K|j        j        j        :|                     |j
        |j        j        j        |j        j        j                   |j        j        j                            dd          \  }}||j        j        _        ||j        j        _        |                     |j        j        |j        j                   t?          |d           t?          |d           tA          j!                     d S )NFr   r   r  r  r   r  )"r  r   r  r  r   r   r!  r   r   r   r   r  r   r  r   r   r   r  r"  
rotary_emblong_factorshort_factorr  r  gate_up_projr   r  r   r  r   r  r$  r%  r&  )r9   r_   r   r   r   r#  r  s          r#   r   zPhi3Loader.set_decoder  s    %DOV-@AAADOV[999!$TZ!?!? 	 	J)4e6K   )5+I   OO)03U_5M   OOJ5<Q?AWXXX*6BO.;G**-O.:O.;   "'!7!>!D!DQA!D!N!NIw-6JN#*3:JN)0OOJN3UY5HIIIE;'''E5!!!JLLLL;	 	r%   )rG   rn   ro   r   rN   r{   r~   r   r}   r   r  r   r  r  s   @r#   r  r    s        & & X&. . .`    ) ) )/ / /
' ' '
 
 
" " " " " " "r%   r  RWConfigc                   Z     e Zd Zed             Zd Zd Z fdZd Zd Z	d Z
d
d	Z xZS )RWLoaderc                     dS r  r&   rv   s    r#   rN   zRWLoader.architecture_name 	  r  r%   c                     |j         j        | _        |j         j        | _        t          |j         dd           | _        d| _        d S )N	n_head_kvnum_kv)rW   r  _num_layersr  
_num_headsrM   _num_heads_kv_num_kv_attrrz   s     r#   get_falcon_speczRWLoader.get_falcon_spec	  s?     </,-$U\;EE$r%   c                    |                      |           t          |j        dd          rd}n| j        }t          j                            | j        | j        dt          j
        j        |j        j        dd|j        j        rdnd d|j        j        |dk    |          }|                     |j        |j                   |                     |j        j        |j                   |S )Nmulti_queryFr   Tr   )
r   r   r  r  scale_alibir  r  r  r  r  )r  rM   rW   r  r
   r^  r   r  r  r   r  r  r  rotaryparallel_attnr   r   r  r   r   r   )r9   r\   r  r_   s       r#   r{   zRWLoader.get_model_spec
	  s    U###5<66 	.LL-L;GGO"-2,$)-!L/9qqT##l8*a/% H 
 
 	u'8999/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zRWLoader.get_vocabulary%	  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zRWLoader.set_vocabulary.	  rk  r%   c                 N    |j         |_        |j         |_         |j         |_        d S r   )r   r   r   r   s       r#   r}   zRWLoader.set_config1	  rn  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]\  }}t          |d          rA|                     |j        |j                   |                     |j        |j                   n{t          |d          r!|                     |j        |j                   nJ|                     |j        j        |j                   |                     |j        j        |j                   t)          |j        | j                  }|dk    r1|                     |j        j        d         |j        j                   nN|                     |j        j        d         |j        j        |j        j        ||j        j        k     r|nd            |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                    d S )NFln_attnr  r   r   )!r  r   r  r  r   r   r  r   r   r  r   r  r  r  ln_mlpr  r  r   r   r  rM   r  r   r   r  r  r  r  r   r  r  r   r  )r9   r_   r   r   r   r  s         r#   r   zRWLoader.set_decoder6	  sL    %DOV-CDDDDOV[999!$TZ!:!: !	N !	NJui(( ##J$?OOO##J$H%,WWWW%899 ##J$@%BWXXXX##-8%:O   ##N-u/M   U143DEEF{{-4Q7(8   
 ##-4Q7(8(2$u';'EEEFF4	   OO)03U5I5O   OOJN3UY5LMMMOOJN3UY5LMMMMC!	N !	Nr%   Nc                 :   |j         }|[|                    |dd|j        d                   }|                    dd          }|                    d|j        d                   }n|j        d         ||dz  z   z  }|                    d||z  dz   ||j        d                   }|                    ||z  ddgd          \  }}}	t          j        |                    ||z  d          |                    ||z  d          |	                    ||z  d          g          }||_         |j        |j        }
|C|
                    |dd          }
|
                    dd          }
|
                    d          }
n|
                    d||z  dz   |          }
|
                    ||z  ddgd          \  }}}	t          j        |                    ||z            |                    ||z            |	                    ||z            g          }
|
|_        d S d S )Nr  r  r   r   r  r  )r   r  r0  r   splitrA   r  r   )r9   r_   r   r  r  r   r  r  r  r  r   s              r#   r  zRWLoader.set_qkv_linear^	  s)   >^^Iq"fl26FGGF%%a++F^^BR(899FF|A9vz+ABH^^I'!+Xv|B7G F llI$7A#>AlFFGAq!YIIi(2B77IIfx/44IIfx/44 F ;";D~||Iq"55~~a++||B''||B	V(;a(?JJ**i6&91a%@a*HH1ay		)h"677		&8"344		&8"344  DIII% #"r%   r   )rG   rn   ro   r   rN   r  r{   r~   r   r}   r   r  r  r  s   @r#   r  r    s        & & X&% % %  6    ) ) )/ / /
&N &N &NP) ) ) ) ) ) ) )r%   r  FalconConfigc                       e Zd Zd ZdS )FalconLoaderc                     |j         j        | _        |j         j        | _        t          |j         dd           | _        d| _        d S )Nnum_kv_heads)rW   r_  r  r`  r  rM   r  r  rz   s     r#   r  zFalconLoader.get_falcon_spec	  s?     <9,:$U\>4HH*r%   N)rG   rn   ro   r  r&   r%   r#   r  r  	  s#        + + + + +r%   r  DistilBertConfigc                   6    e Zd Zed             Zd Zd Zd ZdS )DistilBertLoaderc                     dS )NDistilBertModelr&   rv   s    r#   rN   z"DistilBertLoader.architecture_name	  r  r%   c                 &   t          j        |j        j        |j        j        dt
          |j        j                 d          }t          j        |          }d|j        _	        | 
                    |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   t'          |j        j        |j        j                  D ]\  }}d t-          d          D             }|                     |d         |j        j                   |                     |d         |j        j                   |                     |d         |j        j                   t9          j        |j        j        d         |           |                     |j        j        d         |j        j                    |                     |j        j!        |j"                   |                     |j#        j$        |j#        j%                   |                     |j#        j&        |j#        j'                   |                     |j#        j!        |j(                   |S )	NFTr   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z3DistilBertLoader.get_model_spec.<locals>.<listcomp>	  rd  r%   r  r   r  ))r
   TransformerEncoderSpecrW   r  r  r   r   TransformerEncoderModelSpecr   r  r   r  r  r   r  position_embeddingsr   r   	LayerNormr   r   r  r  r   r   q_link_linv_linr   r  r   r   out_linr   sa_layer_normr   r   lin1r   lin2output_layer_normr9   r\   encoder_specr_   r   r   r  s          r#   r{   zDistilBertLoader.get_model_spec	  sP   '>L!L -el.EF $
 
 
  ;
 
 ).%L#A&(8(H	
 	
 	
 	##L+U-=-Q	
 	
 	
 	L,e.>.H	
 	
 	
 "%T\%79J9P!Q!Q 	T 	TJGGeAhhGGGLOOLOU_-BCCCOOLOU_-BCCCOOLOU_-BCCCj7>qA<PPPOO)03U_5L   )4e6I   OOJN3UY^DDDOOJN3UY^DDD
 95;RSSSSr%   c                 0    |                     |           d S r   ri  r   s      r#   r   zDistilBertLoader.set_vocabulary	  rk  r%   c                 ,    |j         |_         d|_        d S )Ng-q=)r   r  r   s       r#   r}   zDistilBertLoader.set_config	  s    $.$)!!!r%   N)rG   rn   ro   r   rN   r{   r   r}   r&   r%   r#   r  r  	  s[        ! ! X!* * *X) ) )* * * * *r%   r  
BertConfigc                   F     e Zd Zed             Zd Z fdZd Zd Z xZ	S )
BertLoaderc                     dS )N	BertModelr&   rv   s    r#   rN   zBertLoader.architecture_name	  s    {r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }t          j        |dt          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )NabsoluteFTr  r   r   r   num_source_embeddingsembeddings_mergepooling_layerpooling_activationr   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-BertLoader.get_model_spec.<locals>.<listcomp>	  rd  r%   r  )0rW   position_embedding_typer
   r  r_  r`  r   r  r   EmbeddingsMergeADDr  r  Tanhr   r  r   r  r  token_type_embeddingsr   r  r  r   r   r  r   pooler_densepoolerr  r   r   r  r   r9   queryr   valuer   r  r   r   outputr   r   r   intermediater   r  s          r#   r{   zBertLoader.get_model_spec	  s   |3zAAAA'>L*L,-el.EF $"#(8<
 
 
  ;*5:
 
 
 ).%L#A&(8(H	
 	
 	
 	L#A&(8(N	
 	
 	
 	##L+U-=-Q	
 	
 	
 	L,e.>.H	
 	
 	
 	)5<+=>>>!$T\%79L!M!M 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r~   zBertLoader.get_vocabulary
  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zBertLoader.set_vocabulary
  rk  r%   c                 @    |j         |_         |j        j        |_        d S r   r   rW   layer_norm_epsr  r   s       r#   r}   zBertLoader.set_config
       $.$)L$?!!!r%   )
rG   rn   ro   r   rN   r{   r~   r   r}   r  r  s   @r#   r  r  	  s          X6 6 6p    ) ) )@ @ @ @ @ @ @r%   r  XLMRobertaConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )XLMRobertaLoaderc                     dS )N#XLMRobertaForSequenceClassificationr&   rv   s    r#   rN   z"XLMRobertaLoader.architecture_name
  s    44r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j        j                   |                     |j        j        d         |j        j        j                   |                     |j        j        |j        j        j                   |                     |j        j        |j        j        j                   |r*|                     |j        |j        j        j                   t?          |j        j         |j        j        j                   D ]\  }}d	 tC          d
          D             }|                     |d         |j"        j#        j$                   |                     |d         |j"        j#        j%                   |                     |d         |j"        j#        j&                   tO          j(        |j)        j*        d         |           |                     |j)        j*        d         |j"        j+        j                   |                     |j)        j,        |j"        j+        j                   |                     |j-        j.        |j/        j                   |                     |j-        j0        |j+        j                   |                     |j-        j,        |j+        j                   |S )Nr  FTr  r  r  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z3XLMRobertaLoader.get_model_spec.<locals>.<listcomp>M
  rd  r%   r  )1rW   r  r
   r  r_  r`  r   r  r   r  r  robertar  r  r  r  r   r  r   r  r  r  r   r  r  r   r   r  r   r  r  r   r   r  r   r9   r  r   r  r   r  r   r   r  r   r   r   r  r   r9   r\   r  r  r_   r   r   r  s           r#   r{   zXLMRobertaLoader.get_model_spec"
  s   |3zAAAA'>L*L,-el.EF $"#(8<
 
 
 ='!MM M;'*5:
 
 
 ).%L#A&(@(P	
 	
 	
 	L#A&(@(V	
 	
 	
 	##L+M$8	
 	
 	
 	L,em.F.P	
 	
 	
  	KOOD-u}/C/IJJJ!$T\%79N9T!U!U 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                 0    |                     |           d S r   ri  r   s      r#   r   zXLMRobertaLoader.set_vocabulary`
  rk  r%   c                 @    |j         |_         |j        j        |_        d S r   r  r   s       r#   r}   zXLMRobertaLoader.set_configc
  r  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S Npadding_idxr   r   r   r   s       r#   r   z'XLMRobertaLoader.set_position_encodingsg
  F    22A::!^FQJLL9DNNN :r%   N	rG   rn   ro   r   rN   r{   r   r}   r   r&   r%   r#   r  r  
  sm        5 5 X5< < <|) ) )@ @ @: : : : :r%   r  RobertaConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )RobertaLoaderc                     dS )NRobertaModelr&   rv   s    r#   rN   zRobertaLoader.architecture_namep
  s    ~r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |r%|                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )Nr  FTr  r  r  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z0RobertaLoader.get_model_spec.<locals>.<listcomp>
  rd  r%   r  0rW   r  r
   r  r_  r`  r   r  r   r  r  r  r  r  r  r   r  r   r  r  r  r   r  r  r   r   r  r   r  r  r   r   r  r   r9   r  r   r  r   r  r   r   r  r   r   r   r  r   r  s           r#   r{   zRobertaLoader.get_model_spect
     |3zAAAA'>L*L,-el.EF $"#(8<
 
 
 <!MM M;'*5:
 
 
 ).%L#A&(8(H	
 	
 	
 	L#A&(8(N	
 	
 	
 	##L+0	
 	
 	
 	L,e.>.H	
 	
 	
  	COOD-u|/ABBB!$T\%79L!M!M 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                 0    |                     |           d S r   ri  r   s      r#   r   zRobertaLoader.set_vocabulary
  rk  r%   c                 @    |j         |_         |j        j        |_        d S r   r  r   s       r#   r}   zRobertaLoader.set_config
  r  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S r  r   r   s       r#   r   z$RobertaLoader.set_position_encodings
  r  r%   Nr  r&   r%   r#   r  r  n
  sm          X< < <|) ) )@ @ @: : : : :r%   r  CamembertConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )CamembertLoaderc                     dS )NCamembertModelr&   rv   s    r#   rN   z!CamembertLoader.architecture_name
  r[  r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |r%|                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )Nr  FTr  r  r  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z2CamembertLoader.get_model_spec.<locals>.<listcomp>
  rd  r%   r  r  r  s           r#   r{   zCamembertLoader.get_model_spec
  r  r%   c                 0    |                     |           d S r   ri  r   s      r#   r   zCamembertLoader.set_vocabulary  rk  r%   c                 @    |j         |_         |j        j        |_        d S r   r  r   s       r#   r}   zCamembertLoader.set_config  r  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S r  r   r   s       r#   r   z&CamembertLoader.set_position_encodings  r  r%   Nr  r&   r%   r#   r#  r#  
  sm            X < < <|) ) )@ @ @: : : : :r%   r#  c            	      P   t          j        t           j                  } |                     ddd           |                     dd           |                     d	d
d           |                     dd           |                     ddd           |                     ddd           t	          j        |            |                                 }t          |j        |j	        |j
        |j        dv |j        |j        |j                  }|                    |           d S )N)formatter_classz--modelTzaName of the pretrained model to download, or path to a directory containing the pretrained model.)requiredhelpz--activation_scaleszPath to the pre-computed activation scales. Models may use them to rescale some weights to smooth the intermediate activations and improve the quantization accuracy. See https://github.com/mit-han-lab/smoothquant.)r.  z--copy_files+zWList of filenames to copy from the Hugging Face model to the converted model directory.)nargsr.  z
--revisionz<Revision of the model to download from the Hugging Face Hub.z--low_cpu_mem_usage
store_truezNEnable the flag low_cpu_mem_usage when loading the model with from_pretrained.)actionr.  z--trust_remote_codez*Allow converting models using custom code.)rP   int8_float16)r+   r,   r-   r.   r/   r0   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr)   r\   r+   r,   quantizationr.   r/   r0   convert_from_args)parserargs	converters      r#   mainr?    s   $ >  F F	     :     	     K     ]    
 9     '''D%
0?)-HH00  I %%%%%r%   __main__))r   r   )r  r   )r     r  r   r  r   r  r  r  r  r  r  ))r  r  rB  rD  rE  rF  )r  rA  )rE  r     rA  r   )rA  rA  )rA  rH  )rC  )r  r  )r  r  rG  rI  )rA  r  )rA  r  )rA     ))rJ  rJ  rH  r   )rH  r  )rH  rt  )rt  r  )rt  rA  rt  rH  	   r   )rN  r  )rN  rt  )rN  
   )rO  r   )rO  r   )rO  r  )rO  r  )rO  rJ  )rO     rP  r  rP  r  )
)rA  r  )rA  rN  )rt  r   )rt  r  rL  )rt  rt  rM  )rN  rH  )rN  rN  )rO  rA  )rR  )   r   )rS     )rS  rS     r  )   r   )rW  r  )rW  rN  )   rT  rX  rS  )   rH  )rZ  rO  )rZ  rV     r   )r\  r  )r\  rN  )r\  rS  )   rT  ))   rV  rU  )rV  rV  rW  r   r[  )   r  )	)rN     rQ  rR  )rP  rX  )   rH  )rb  rP  )rb  rX  )r`  r  )r`  rV  ))rO  rT  )r^  rX  )rW  rP  )rW  rT  )rW  r^  )rX  rV  )rX  rW  )rZ  r  )rZ  rP  )rZ  ra  ra  rP  )r]  r  )r]  r  )rb  r  )rb  rN  )rb  rT  )r`  rA  )r`  rH  )r`  r^  )   rA  )   r   )re  rT  )   rV  )
rK  )rO  rX  )rT  rZ  )r^  rT  r_  rY  rc  )r]  r  )   r   )rd  rJ  )zopenai/whisper-tiny.enzopenai/whisper-tinyzopenai/whisper-base.enzopenai/whisper-basezopenai/whisper-small.enzopenai/whisper-smallzopenai/whisper-medium.enzopenai/whisper-mediumzopenai/whisper-largezopenai/whisper-large-v2zopenai/whisper-large-v3)Sr   r4  r%  r  rg   typingr   r   numpyr  rj   rA   rC   ImportErrorctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   r	   r
   r   r   r   r  r  r  GELUSigmoidRELUrW  r   rX  LinearSurY  rQ  r   rg  AWQ_GEMVrS  r    r'   r)   ABCrt   r   r"  r7  rH  rP  rX  ry  r  r  r  r  r  r(  rZ  r  r  r  r  r  r.  r>  rn  r{  r  r  r  r  r  r  r  r  r  r#  r?  rG   r  r&   r%   r#   <module>rt     s   



  				     				 ! ! ! ! ! ! ! !    	LLL 	 	 	D	 ) ( ( ( ( ( 6 6 6 6 6 6                  "''0&/).$/8(4"'"(#)
 
  .5

*
-.503	   $-$-  
   x x x x xI x x xv?
 ?
 ?
 ?
 ?
#' ?
 ?
 ?
D @V @V @V @V @V @V @V @VF   47 47 47 47 47Z 47 47 ! 47n       :   ! B = = = = =* = =  =" !!	9 	9 	9 	9 	9J 	9 	9 "!	9 C C C C C
 C C CL #$$/G /G /G /G /G+ /G /G %$/Gd %G %G %G %G %G %G %G %GP 5G 5G 5G 5G 5G 5G 5G 5Gp !!VG VG VG VG VGK VG VG "!VGr !!LN LN LN LN LNK LN LN "!LN^ !!a  a  a  a  a J a  a  "!a H !""K@ K@ K@ K@ K@Z K@ K@ #"K@\ %&&q$ q$ q$ q$ q$ q$ q$ '&q$h y' y' y' y' y'{ y' y' y'x - - - - - - - - J J J J J+ J J  JZ 41 41 41 41 41 41 41 41n \ \ \ \ \+ \ \  \~   d d d d d; d d ! dN \ \ \ \ \+ \ \  \~ !!O O O O OK O O "!Od h h h h h+ h h  hV ,--0D 0D 0D 0D 0D 0D 0D .-0Df 0D 0D 0D 0D 0D 0D 0D 0Df u u u u u u u up H H H H H{ H H HV   + + + + +8 + + ! + #$$6* 6* 6* 6* 6*{ 6* 6* %$6*r K@ K@ K@ K@ K@ K@ K@ K@\ #$$N: N: N: N: N:{ N: N: %$N:b !!N: N: N: N: N:K N: N: "!N:b "##N: N: N: N: N:k N: N: $#N:b7& 7& 7&t zDFFF	 	 	 LKKFFF	 	 	     *  ! ! !( VUU
 
 
     2     c} }   s   / 77