
    h                     j   S SK r S SKJr  S SKJr  S SKrS SKJr  SSKJ	r	  SSK
Jr  SS	KJrJrJrJrJrJrJr  \R(                  " S
SSS.S9\" SSSSSS9\" SSS9\" SSSSS9\" SSSSS9\" SSSSS9\" SSSS S9\" SS!S"S#S94S$\R*                  S%\S&\S'\\   S(\\   S)\\   S*\S+\4S, jj5       rS- rg).    N)Path)Optional)msg   )pretrain)load_config   )ArgOptappimport_codeparse_config_overrides	setup_gpushow_validation_errorr   T)allow_extra_argsignore_unknown_options)context_settings.zPath to config fileF)helpexistsdir_okay
allow_dashz+Directory to write weights to on each epoch)r   z--codez-czNPath to Python file with additional code (registered functions) to be importedz--resume-pathz-rz;Path to pretrained weights from which to resume pretrainingz--epoch-resumez-erzuThe epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files.z--gpu-idz-gzGPU ID or -1 for CPUz--skip-lastz-LzSkip saving model-last.binctxconfig_path
output_dir	code_pathresume_pathepoch_resumeuse_gpu	skip_lastc           
         [        U R                  5      n[        U5        [        XXE5        [	        U5        [
        R                  " SU 35        [        U5         [        XSS9n	SSS5        W	R                  5       n
U
R                  S5      (       d  [
        R                  " SSS9  UR                  5       (       d(  UR                  S	S
9  [
        R                  " SU 35        U	R                  US-  5        [
        R                  " S5        [!        U
UUUUSUS9  [
        R                  " S5        g! , (       d  f       N= f)a  
Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,
using an approximate language-modelling objective. Two objective types
are available, vector-based and character-based.

In the vector-based objective, we load word vectors that have been trained
using a word2vec-style distributional similarity algorithm, and train a
component like a CNN, BiLSTM, etc to predict vectors which match the
pretrained ones. The weights are saved to a directory after each epoch. You
can then pass a path to one of these pretrained weights files to the
'spacy train' command.

This technique may be especially helpful if you have little labelled data.
However, it's still quite experimental, so your mileage may vary.

To load the weights back in during 'spacy train', you need to ensure
all settings are the same between pretraining and training. Ideally,
this is done by using the same config file for both commands.

DOCS: https://spacy.io/api/cli#pretrain
zLoading config from: F)	overridesinterpolateNpretrainingz/The [pretraining] block in your config is emptyr	   exitsT)parentszCreated output directory: z
config.cfgz)Saved config file in the output directory)r   r   r   silentr    zSuccessfully finished pretrain)r   argsr   verify_cli_argsr   r   infor   r   r#   getfailr   mkdirgoodto_diskr   )r   r   r   r   r   r   r   r    config_overrides
raw_configconfigs              L/home/james-whalen/.local/lib/python3.13/site-packages/spacy/cli/pretrain.pypretrain_clir5      s   J .chh7	K[GgHH$[M23	{	+ 

 
, ##%F::m$$B!L&-j\:;zL01HH89! HH-.1 
,	+s   D77
Ec                    U (       a$  [        U 5      S:w  a+  U R                  5       (       d  [        R                  " SU SS9  UR                  5       (       a[  UR	                  5        Vs/ s H  oDPM     sn(       a6  U(       a  [        R
                  " SS5        O[        R
                  " SS5        Ub  UR                  5       (       a  [        R                  " S	S
S9  [        R                  " S[        U5      5      nU(       d  U(       d  [        R                  " SS
S9  g U(       d!  US:  a  [        R                  " SU S3S
S9  g g g g s  snf )N-zConfig file not foundr	   r%   zOutput directory is not empty.z}If you're resuming a run in this directory, the old weights for the consecutive epochs will be overwritten with the new ones.zOutput directory is not empty. zuIt is better to use an empty directory or refer to a new output path, then the new directory will be created for you.zI--resume-path should be a weights file, but {resume_path} is a directory.Tzmodel\d+\.binz]You have to use the --epoch-resume setting when using a renamed weight file for --resume-pathr   z=The argument --epoch-resume has to be greater or equal to 0. z is invalid)	strr   r   r-   iterdirwarnis_dirresearch)r   r   r   r   p
model_names         r4   r*   r*   [   s   3{+s2;;M;M;O;O(+Q?:+=+=+?@+?a+?@HH0T HH1B
  HH[ YY/[1AB
,HHo q 0HHOP\~]hi !1   As   )E)r<   pathlibr   typingr   typerwasabir   training.pretrainr   utilr   _utilr
   r   r   r   r   r   r   commandContextintboolr5   r*        r4   <module>rM      s6   	     (    *.$O C&;DSXeij3%RS #D(D  @P  !Q"%dOT  IF  #G"%d,<e  KB  #Cr:t2HI%;WX?/	?/ ?/ 	?/
 ~?/ $?/ 3-?/ ?/ ?/	?/D"rL   