
    C1i*                        S SK r S SKrS SKJr  S SKrS SKrS SKJr  S SK	J
r
  S SKJrJr  \R                  R                  \R                  R                   \R                  R"                  S.r1 Skr " S S	\
5      rS
 rS rS rS rS rS rS rS rSS jrS rS rSS jr SS jr!S r"SS jr#SS jr$S r%\&S:X  a  \%" 5         gg)     N)List)utils)	Converter)common_spectransformer_spec)gelureluswish>    dnndc                   6    \ rS rSrSrS\S\\   4S jrS rSr	g)	MarianConverter   z$Converts models trained with Marian.
model_pathvocab_pathsc                     Xl         X l        g)zInitializes the Marian converter.

Arguments:
  model_path: Path to the Marian model (.npz file).
  vocab_paths: Paths to the vocabularies (.yml files).
N_model_path_vocab_paths)selfr   r   s      W/home/james-whalen/.local/lib/python3.13/site-packages/ctranslate2/converters/marian.py__init__MarianConverter.__init__   s     &'    c           
      d   [         R                  " U R                  5      n[        U5      n[	        [        [        U R                  5      5      nUS   nSUS   ;   nUS   n[        R                  " 5       nU" US   S:H  S5        U" US   S	:H  S
5        U" US   (       + S5        U" U[        ;   SU< SSR                  [        R                  5       5      < S35        U" U[        ;   SU< SSR                  [        5      < S35        U(       a;  U" US   S:H  =(       a$    US   S:H  =(       a    UR                  SS5      S:H  S5        O:U" US   S:H  =(       a$    US   S:H  =(       a    UR                  SS5      S:H  S5        UR                  5         US   nUS:X  a  SO[!        U5      S-
  nSU;   n	["        R$                  R'                  US   US   4US    U[        U   USU	S!9n
[)        X5        U
R+                  US"   5        U
R-                  US   5        S#U
R.                  l        U
$ )$Nztransformer-ffn-activationr   ztransformer-preprocessztransformer-postprocess-embtypetransformerz#Option --type must be 'transformer'ztransformer-decoder-autoregzself-attentionz=Option --transformer-decoder-autoreg must be 'self-attention'ztransformer-no-projectionz3Option --transformer-no-projection is not supportedz$Option --transformer-ffn-activation z. is not supported (supported activations are: z, )z%Option --transformer-postprocess-emb z) is not supported (supported values are: ztransformer-postprocessdaztransformer-postprocess-topr   zUnsupported pre-norm Transformer architecture, expected the following combination of options: --transformer-preprocess n --transformer-postprocess da --transformer-postprocess-top ndanzUnsupported post-norm Transformer architecture, excepted the following combination of options: --transformer-preprocess '' --transformer-postprocess dan --transformer-postprocess-top ''z"transformer-guided-alignment-layerlast   z	enc-depthz	dec-depthztransformer-heads)pre_norm
activationalignment_layeralignment_headslayernorm_embeddingr   T)nploadr   _get_model_configlistmap
load_vocabr   r   ConfigurationChecker_SUPPORTED_ACTIVATIONSjoinkeys_SUPPORTED_POSTPROCESS_EMBgetvalidateintr   TransformerSpecfrom_configset_transformer_specregister_source_vocabularyregister_target_vocabularyconfigadd_source_eos)r   modelr>   vocabsr'   r&   postprocess_embcheckr(   r*   
model_specs              r   _loadMarianConverter._load#   sn   (()"5)c*d&7&78989
&!9:: !>?**,fVn-/TU015EEK	
 	233A	
 	00 499%;%@%@%BCE	
 	99		*D EG	
 /0C7 I45=IJJ<bASH2		 /0B6 H45>HJJ<bARG3		 	 !EF /6 9"s??SVW?W!_4%55AAK &"56&'-j9+ 3 B 

 	Z/--fQi8--fRj9+/
(r   r   N)
__name__
__module____qualname____firstlineno____doc__strr   r   rE   __static_attributes__ r   r   r   r      s"    .(3 (T#Y (Ir   r   c                 b    U S   nUS S R                  5       n[        R                  " U5      nU$ )Nzspecial:model.ymlr$   )tobytesyaml	safe_load)r@   r>   s     r   r-   r-   o   s4    &'FCR[  "F^^F#FMr   c           	         [        U SS9 n/ nS nS n[        U5       GHK  u  pVUR                  S5      nU(       d  M   UR                  S5      (       a  USS  nOUb  USS  nOUR	                  SS5      u  p4Ub  UR                  S5      (       a`  UR                  S5      (       aJ  [        R                  " S	S
U5      nUSS nUR                  S5      (       a  [        [        USS  SS95      nOCUR                  S5      (       a-  UR                  S5      (       a  USS nUR                  SS5      nUc  GM   [        UR                  5       5      nUR                  XC45        S nS nGMN     S S S 5        [        WS S9 VVs/ s H  u  pUPM	     snn$ ! [         a  n[        SUS-   U4-  5      UeS nAff = f! , (       d  f       NT= fs  snnf )Nzutf-8)encodingz
z?    :r%   "z\\([^x])z\1r$   z\x   )base'z''z"Unexpected format at line %d: '%s'c                     U S   $ )Nr   rN   )items    r   <lambda>load_vocab.<locals>.<lambda>   s    $q'r   )key)open	enumeraterstrip
startswithrsplitendswithresubchrr8   replacestrip
ValueErrorappendsorted)	pathvocabtokenstokenidxilinee_s	            r   r0   r0   v   s   	dW	% 'GA;;v&Dt$$QR"12h![[a0
 ##C((U^^C-@-@FF;u=E!!BKE''.. #Cab	$; <%%c**u~~c/B/B!!BKE!MM$4Eciik*C sl+G (	 
&R #)5I"JK"JhaE"JKK " $<At}L? 
&	%R Ls6   D(F;9FF;G
F8F33F88F;;
G	c                 `    [        U R                  US5        [        U R                  US5        g )Nencoderdecoder)set_transformer_encoderrx   set_transformer_decoderry   )specweightss     r   r;   r;      s"    DLL'9=DLL'9=r   c           	      ~    [        XU5        [        U R                  5       H  u  p4[        XASX#S-   4-  5        M     g )N%s_l%dr%   )set_common_layersra   layerset_transformer_encoder_layerr|   r}   scopers   
layer_specs        r   rz   rz      s7    dU+"4::.%j8uRSen;TU /r   c           	          SU l         [        XU5        [        U R                  5       H  u  p4[	        XASX#S-   4-  5        M     [        U R                  USU-  U R                  R                  S9  g )NTr   r%   z%s_ff_logit_out)reuse_weight)	start_from_zero_embeddingr   ra   r   set_transformer_decoder_layer
set_linear
projection
embeddingsweightr   s        r   r{   r{      sh    %)D"dU+"4::.%j8uRSen;TU / E!__++	r   c                 r   U R                   n[        U[        5      (       d  U/n[        US   X5        [	        U R
                  XS   R                  R                  S   S9  [        U S5      (       a  [        U R                  USU-  SS9  [        U S5      (       a  [        U R                  US	U-  5        g g )
Nr   r%   )dimr*   z%s_embTr&   
layer_normz%s_top)r   
isinstancer.   set_embeddingsset_position_encodingsposition_encodingsr   shapehasattrset_layer_normr*   r   )r|   r}   r   embeddings_specss       r   r   r      s    &--,-#A&7q.A.H.H.N.Nq.Q t*++$$u		
 t\""tE1AB #r   c                 j    [        U R                  USU-  5        [        U R                  USU-  SS9  g )N%s_ffn%s_selfTself_attention)set_ffnffnset_multi_head_attentionr   r|   r}   r   s      r   r   r      s3    DHHgx%/0Wi%&7r   c                     [        U R                  USU-  5        [        U R                  USU-  SS9  [        U R                  USU-  5        g )Nr   r   Tr   z
%s_context)r   r   r   r   	attentionr   s      r   r   r      sJ    DHHgx%/0Wi%&7 T^^WlU6JKr   c                 \   [        S5       Vs/ s H  n[        R                  " 5       PM     nn[        US   XS5        [        US   XS5        [        US   XS5        U(       a%  [        R
                  " U R                  S   U5        OiUS   R                  U R                  S   l        US   R                  U R                  S   l        [        R
                  " U R                  S   USS  5        [        U R                  S   XS	5        [        U R                  US
U-  5        g s  snf )N   r   qr%   krU   vr$   oz%s_Wo)ranger   
LinearSpecr   r   fuse_linearlinearr   biasset_layer_norm_autor   )r|   r}   r   r   rv   split_layerss         r   r   r      s    6;Ah?hK**,hL?|A4|A4|A4$++a.,7 ,Q 6 6A*1o22A$++a.,qr*:;t{{24'E/B @s   D)c                     [        U R                  USU-  5        [        U R                  XS5        [        U R                  XS5        g )Nr   12)r   r   r   linear_0linear_1r   s      r   r   r      s7    (U2BCt}}gc2t}}gc2r   c                 T     [        XUSS9  g ! [         a    [        XU5         g f = f)NTr   )r   KeyErrorr   s      r   r   r      s,    -ted; -te,-s    ''c                     U(       a  SOSnX< SU< 3   R                  5       U l        X< SU< 3   R                  5       U l        g )N_prer   	_ln_scale_ln_bias)squeezegammabeta)r|   r}   r   r&   suffixs        r   r   r      s?    VRFE6:;CCEDJ%89AACDIr   c                     UR                  U< SU< 35      nUc  UR                  U< SU< 3U5      nOUR                  5       nXPl        UR                  U< SU< 35      nUb  UR                  5       U l        g g )N_W_Wt_b)r6   	transposer   r   r   )r|   r}   r   r   r   r   r   s          r   r   r     sm    [[UF34F~%8,G!!#K;;5&12DLLN	 r   c                 ~    UR                  SU-  5      U l        U R                  c  UR                  S5      U l        g g )Nz%s_WembWemb)r6   r   r   s      r   r   r     s6    ++i%/0DK{{kk&) r   c                 D    UR                  S[        U5      5      U l        g )NWpos)r6   #_make_sinusoidal_position_encodings	encodings)r|   r}   r   s      r   r   r     s    [[)LS)QRDNr   c                    [         R                  " U5      n[         R                  " SS[         R                  " U 5      S-  -  U -  5      n[         R                  " US5      [         R                  " US5      -  n[         R                  " U5      n[         R
                  " US S 2SS S24   5      US S 2S U S-  24'   [         R                  " US S 2SS S24   5      US S 2U S-  S 24'   U$ )Ni'  rU   r%   r   )r+   arangepowerexpand_dims
zeros_likesincos)r   num_positions	positions
timescalesposition_enctables         r   r   r     s    		-(I%biin&9!:S!@AJ>>)Q/"..Q2OOLMM,'E66,q!$Q$w"78E!ZsaxZ-66,q!$Q$w"78E!SAXZ-Lr   c                  <   [         R                  " [         R                  S9n U R                  SSSS9  U R                  SSSSS	9  [        R
                  " U 5        U R                  5       n[        UR                  UR                  5      nUR                  U5        g )
N)formatter_classz--model_pathTzPath to the model .npz file.)requiredhelpz--vocab_paths+z'List of paths to the YAML vocabularies.)r   nargsr   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr   r   r   convert_from_args)parserargs	converters      r   mainr   '  s    $$ >>F ,J   6	   'D1A1ABI%r   __main__)F)r   N)N)i   )'r   rf   typingr   numpyr+   rQ   ctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   
ActivationGELUSigmoidRELUSWISHr2   r5   r   r-   r0   r;   rz   r{   r   r   r   r   r   r   r   r   r   r   r   r   rG   rN   r   r   <module>r      s     	    ( 6 ; ""..""''##))  2 Vi Vr+L\>
VC(LC"3-D#*S&& zF r   