
    n i*                     Z   d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZ ej        j        ej        j        ej        j        dZh dZ G d d	e
          Zd
 Zd Zd Zd Zd Zd Zd Zd ZddZd Zd ZddZ d dZ!d Z"d!dZ#d"dZ$d Z%e&dk    r e%             dS dS )#    N)List)utils)	Converter)common_spectransformer_spec)gelureluswish>    dnndc                   4    e Zd ZdZdedee         fdZd ZdS )MarianConverterz$Converts models trained with Marian.
model_pathvocab_pathsc                 "    || _         || _        dS )zInitializes the Marian converter.

        Arguments:
          model_path: Path to the Marian model (.npz file).
          vocab_paths: Paths to the vocabularies (.yml files).
        N)_model_path_vocab_paths)selfr   r   s      ]/var/www/html/auto_sub_bot/venv/lib/python3.11/site-packages/ctranslate2/converters/marian.py__init__zMarianConverter.__init__   s     &'    c           
          t          j        | j                  }t          |          }t	          t          t          | j                            }|d         }d|d         v }|d         }t          j	                    } ||d         dk    d            ||d         d	k    d
            ||d          d            ||t          v d|dd                    t                                                    d            ||t          v d|dd                    t                    d           |r= ||d         dk    o%|d         dk    o|                    dd          dk    d           n< ||d         dk    o%|d         dk    o|                    dd          dk    d           |                                 |d         }|dk    rdnt!          |          dz
  }d|v }	t"          j                            |d         |d         f|d          |t          |         |d|	!          }
t)          |
|           |
                    |d"                    |
                    |d                    d#|
j        _        |
S )$Nztransformer-ffn-activationr   ztransformer-preprocessztransformer-postprocess-embtypetransformerz#Option --type must be 'transformer'ztransformer-decoder-autoregzself-attentionz=Option --transformer-decoder-autoreg must be 'self-attention'ztransformer-no-projectionz3Option --transformer-no-projection is not supportedz$Option --transformer-ffn-activation z. is not supported (supported activations are: z, )z%Option --transformer-postprocess-emb z) is not supported (supported values are: ztransformer-postprocessdaztransformer-postprocess-topr   zUnsupported pre-norm Transformer architecture, expected the following combination of options: --transformer-preprocess n --transformer-postprocess da --transformer-postprocess-top ndanzUnsupported post-norm Transformer architecture, excepted the following combination of options: --transformer-preprocess '' --transformer-postprocess dan --transformer-postprocess-top ''z"transformer-guided-alignment-layerlast   z	enc-depthz	dec-depthztransformer-heads)pre_norm
activationalignment_layeralignment_headslayernorm_embeddingr   T)nploadr   _get_model_configlistmap
load_vocabr   r   ConfigurationChecker_SUPPORTED_ACTIVATIONSjoinkeys_SUPPORTED_POSTPROCESS_EMBgetvalidateintr   TransformerSpecfrom_configset_transformer_specregister_source_vocabularyregister_target_vocabularyconfigadd_source_eos)r   modelr;   vocabsr$   r#   postprocess_embcheckr%   r'   
model_specs              r   _loadzMarianConverter._load#   s   ())"5))c*d&7889989
&!9:: !>?*,,fVn-/TUUU015EEK	
 	
 	
 	233A	
 	
 	
 	000 zz499%;%@%@%B%BCCCCE	
 	
 	
 	999		*D E E E EG	
 	
 	
  	E/0C7 I45=IJJ<bAASH2		 	 	 	 E/0B6 H45>HJJ<bAARG3		 	 	 	 !EF /6 9 9""s??S?SVW?W!_4%5AAK &"56&'-j9+ 3 B 
 

 	Z///--fQi888--fRj999+/
(r   N)__name__
__module____qualname____doc__strr   r   rB    r   r   r   r      sT        ..(3 (T#Y ( ( ( (I I I I Ir   r   c                 v    | d         }|d d                                          }t          j        |          }|S )Nzspecial:model.ymlr!   )tobytesyaml	safe_load)r=   r;   s     r   r*   r*   o   s:    &'FCRC[  ""F^F##FMr   c           	         t          | d          5 }g }d }d }t          |          D ]\  }}|                    d          }|s|                    d          r|dd          }n&||dd          }n|                    dd          \  }}||                    d          rq|                    d          r\t          j        d	d
|          }|dd         }|                    d          r&t          t          |dd          d                    }nJ|                    d          r5|                    d          r |dd         }|
                    dd          }|h	 t          |                                          }n*# t          $ r}t          d|dz   |fz            |d }~ww xY w|                    ||f           d }d }	 d d d            n# 1 swxY w Y   d t          |d           D             S )Nzutf-8)encodingz
z?    :r"   "z\\([^x])z\1r!   z\x   )base'z''z"Unexpected format at line %d: '%s'c                     g | ]\  }}|S rH   rH   ).0_tokens      r   
<listcomp>zload_vocab.<locals>.<listcomp>   s    KKKhaEKKKr   c                     | d         S )Nr   rH   )items    r   <lambda>zload_vocab.<locals>.<lambda>   s
    $q' r   )key)open	enumeraterstrip
startswithrsplitendswithresubchrr5   replacestrip
ValueErrorappendsorted)pathvocabtokensrX   idxilinees           r   r-   r-   v   s   	dW	%	%	% ' '' #	 #	GAt;;v&&D t$$ 1QRR"122h![[a00
s ##C(( 	5U^^C-@-@ 	5F;u==E!!B$KE''.. = #Cabb	$;$;$; < <%%c** 5u~~c/B/B 5!!B$KE!MM$44Eciikk**CC!   $<At}L 
 sEl+++G#		' ' ' ' ' ' ' ' ' ' ' ' ' ' 'R LK&5I5I"J"J"JKKKKs6   EG!!FG
F*F%%F** GGGc                 ^    t          | j        |d           t          | j        |d           d S )Nencoderdecoder)set_transformer_encoderrt   set_transformer_decoderru   )specweightss     r   r8   r8      s0    DL'9===DL'9=====r   c           	          t          | ||           t          | j                  D ]\  }}t          ||d||dz   fz             d S )N%s_l%dr"   )set_common_layersr_   layerset_transformer_encoder_layerrx   ry   scoperp   
layer_specs        r   rv   rv      sb    dGU+++"4:.. V V:%j'8uaRSen;TUUUUV Vr   c           	          d| _         t          | ||           t          | j                  D ]\  }}t	          ||d||dz   fz             t          | j        |d|z  | j        j                   d S )NTr{   r"   z%s_ff_logit_out)reuse_weight)	start_from_zero_embeddingr|   r_   r}   set_transformer_decoder_layer
set_linear
projection
embeddingsweightr   s        r   rw   rw      s    %)D"dGU+++"4:.. V V:%j'8uaRSen;TUUUUE!_+	     r   c                 x   | j         }t          |t                    s|g}t          |d         ||           t	          | j        ||d         j        j        d                    t          | d          rt          | j
        |d|z  d           t          | d          rt          | j        |d	|z             d S d S )
Nr   r"   )dimr'   z%s_embTr#   
layer_normz%s_top)r   
isinstancer+   set_embeddingsset_position_encodingsposition_encodingsr   shapehasattrset_layer_normr'   r   )rx   ry   r   embeddings_specss       r   r|   r|      s    &-- .,-#A&777.>q.A.H.Nq.Q    t*++ 
$u		
 	
 	
 	
 t\"" CtE1ABBBBBC Cr   c                 n    t          | j        |d|z             t          | j        |d|z  d           d S )N%s_ffn%s_selfTself_attention)set_ffnffnset_multi_head_attentionr   rx   ry   r   s      r   r~   r~      sM    DHgx%/000Wi%&7     r   c                     t          | j        |d|z             t          | j        |d|z  d           t          | j        |d|z             d S )Nr   r   Tr   z
%s_context)r   r   r   r   	attentionr   s      r   r   r      sd    DHgx%/000Wi%&7    T^WlU6JKKKKKr   Fc                 >   d t          d          D             }t          |d         ||d           t          |d         ||d           t          |d         ||d           |r!t          j        | j        d         |           nb|d         j        | j        d         _        |d         j        | j        d         _        t          j        | j        d         |dd                     t          | j        d	         ||d
           t          | j        |d|z             d S )Nc                 4    g | ]}t          j                    S rH   )r   
LinearSpec)rV   rW   s     r   rY   z,set_multi_head_attention.<locals>.<listcomp>   s!    ???K*,,???r      r   qr"   krO   vr!   oz%s_Wo)	ranger   r   fuse_linearlinearr   biasset_layer_norm_autor   )rx   ry   r   r   split_layerss        r   r   r      s   ??eAhh???L|A444|A444|A444 <$+a.,7777 ,Q 6A*1o2A$+a.,qrr*:;;;t{2444'E/BBBBBr   c                     t          | j        |d|z             t          | j        ||d           t          | j        ||d           d S )Nr   12)r   r   r   linear_0linear_1r   s      r   r   r      sM    (U2BCCCt}guc222t}guc22222r   c                 r    	 t          | ||d           d S # t          $ r t          | ||           Y d S w xY w)NTr   )r   KeyErrorr   s      r   r   r      sZ    -tWed;;;;;; - - -tWe,,,,,,-s    66c                     |rdnd}||d|                                          | _        ||d|                                          | _        d S )N_prer   	_ln_scale_ln_bias)squeezegammabeta)rx   ry   r   r#   suffixs        r   r   r      sZ    'VVRFEEE66:;CCEEDJ%%%89AACCDIIIr   r   c                    |                     |d|          }||                     |d||          }n|                                }|| _        |                     |d|          }||                                | _        d S d S )N_W_Wt_b)r3   	transposer   r   r   )rx   ry   r   r   r   r   r   s          r   r   r     s    [[UUUFF344F~%%%8,GG!!##DK;;555&&122DLLNN			 r   c                     |                     d|z            | _        | j        |                     d          | _        d S d S )Nz%s_WembWemb)r3   r   r   s      r   r   r     sA    ++i%/00DK{kk&)) r   c                 V    |                     dt          |                    | _        d S )NWpos)r3   #_make_sinusoidal_position_encodings	encodings)rx   ry   r   s      r   r   r     s$    [[)LS)Q)QRRDNNNr      c                    t          j        |          }t          j        ddt          j        |           dz  z  | z            }t          j        |d          t          j        |d          z  }t          j        |          }t          j        |d d dd df                   |d d d | dz  f<   t          j        |d d dd df                   |d d | dz  d f<   |S )Ni'  rO   r"   r   )r(   arangepowerexpand_dims
zeros_likesincos)r   num_positions	positions
timescalesposition_enctables         r   r   r     s    	-((I%binn&9!:S!@AAJ>)Q//".Q2O2OOLM,''E6,qqq!$Q$w"788E!!!ZsaxZ-6,qqq!$Q$w"788E!!!SAXZZ-Lr   c                  T   t          j        t           j                  } |                     ddd           |                     dddd	           t	          j        |            |                                 }t          |j        |j	                  }|
                    |           d S )
N)formatter_classz--model_pathTzPath to the model .npz file.)requiredhelpz--vocab_paths+z'List of paths to the YAML vocabularies.)r   nargsr   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr   r   r   convert_from_args)parserargs	converters      r   mainr   '  s    $ >  F ,J     6	     '''D1ABBI%%%%%r   __main__)F)r   N)N)r   )'r   rd   typingr   numpyr(   rK   ctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   
ActivationGELUSigmoidRELUSWISHr/   r2   r   r*   r-   r8   rv   rw   r|   r~   r   r   r   r   r   r   r   r   r   r   rC   rH   r   r   <module>r      s:    				            ( ( ( ( ( ( 6 6 6 6 6 6 ; ; ; ; ; ; ; ; "."'#)   211 V V V V Vi V V Vr  +L +L +L\> > >
V V V  C C C(  L L LC C C C"3 3 3- - -D D D D# # # #* * *S S S S   & & && zDFFFFF r   