
    h                     (   S SK r S SKJr  S SKJr  S SKrS SKrS SKJr  SSK	J
r
  SSKJr  SSKJrJr  S	S
KJrJrJrJrJrJrJr  \R0                  " S5      \" SSS9\" SSSS9\" SSS9\" SSSSS9\" S SSSS9\" SSSSS9\" SSSS S9\" S!S"S#S$S%S9\" SS&S'S(SS)9\" S*S+S,S-S94
S.\S/\S0\S1\S2\S3\S4\\   S5\S6\\   S7\4S8 jj5       rS9\S6\S:S4S; jr\R0                  " S9SSS<.SS=9\" SS>SSS?9\" SS@S9\" SSASBSCS9\" S!S"S#S$S%S9\" SSDSESFS94SG\R<                  SH\SI\SJ\\   S5\SK\4SL jj5       r\R0                  " SMSSS<.SN9\" SS>SSS?9\" SSOS9\" SSASBSCS9\" S!S"S#S$S%S9\" SSDSESFS94SG\R<                  SH\SI\SJ\\   S5\SK\4SP jj5       r SQ r!g)R    N)Path)Optional)msg   )util)Language)convert_vectorsinit_nlp   )ArgOptimport_codeinit_cliparse_config_overrides	setup_gpushow_validation_errorvectors.z(The language of the nlp object to create)helpzVectors file in Word2Vec formatT)r   existszPipeline output directoryz--prunez-pz&Optional number of vectors to prune toz
--truncatez-tzFOptional number of vectors to truncate to when reading in vectors filedefaultz--modez-mzVectors mode: default or floretz--namez-nz?Optional name for the word vectors, e.g. en_core_web_lg.vectorsFz	--verbosez-Vz-VVz/Display more information for debugging purposesz--lexemes-jsonlz-jz+Location of JSONL-formatted attributes file)r   hiddenORTHz--attrz-az?Optional token attribute to use for vectors, e.g. LOWER or NORMlangvectors_loc
output_dirprunetruncatemodenameverbose	jsonl_locattrc
           
         U(       a-  [         R                  R                  [        R                  5        [
        R                  " SU  S35        [         R                  " U 5      " 5       n
Ub  [        X5        [        U
UUUUUU	S9  [
        R                  " S[        U
R                  R                  5       S35        U
R                  U5        [
        R                  " SUR                  5       5        g)zConvert word vectors for use with spaCy. Will export an nlp object that
you can use in the [initialize] block of your config to initialize
a model with vectors.
z(Creating blank nlp object for language ''N)r   r   r    r   r#   zSuccessfully converted z vectorszSaved nlp object with vectors to output directory. You can now use the path to it in your config as the 'vectors' setting in [initialize].)r   loggersetLevelloggingDEBUGr   infoget_lang_classupdate_lexemesr	   goodlenvocabr   to_diskresolve)r   r   r   r   r   r   r    r!   r"   r#   nlps              Q/home/james-whalen/.local/lib/python3.13/site-packages/spacy/cli/init_pipeline.pyinit_vectors_clir4      s    & W]]+HH7vQ?@


d
#
%Cs& HH&s399+<+<'=&>hGHKK
HH	N    r2   returnc                     [         R                  " U5      nU H/  nSU;   a  M  U R                  US      nUR                  " S0 UD6  M1     g )Nsettingsorth )srsly
read_jsonlr/   	set_attrs)r2   r"   	lex_attrsattrslexemes        r3   r,   r,   B   sJ      +I5=)!5!	 r5   )allow_extra_argsignore_unknown_options)context_settingsr   zPath to config file)r   r   
allow_dashz&Output directory for the prepared dataz--codez-czNPath to Python file with additional code (registered functions) to be importedz--gpu-idz-gzGPU ID or -1 for CPUctxconfig_pathoutput_path	code_pathuse_gpuc                    U(       a-  [         R                  R                  [        R                  5        [        U R                  5      n[        U5        [        U5        [        U5         [         R                  " XS9nS S S 5        [        SS9   [        WUS9nS S S 5        WR                  U5        [        R                  " SU 35        g ! , (       d  f       NU= f! , (       d  f       NJ= f)N	overridesF	hint_fillrI   zSaved initialized pipeline to )r   r&   r'   r(   r)   r   argsr   r   r   load_configr
   r0   r   r-   	rE   rF   rG   rH   r!   rI   rL   configr2   s	            r3   init_pipeline_clirT   L   s     W]]+&sxx0I	g	{	+!!+C 
,		/vw/ 
0KKHH-k];< 
,	+	/	/s   +CC 
C 
C.labels)rC   zOutput directory for the labelsc                    U(       a-  [         R                  R                  [        R                  5        UR                  5       (       d  UR                  SS9  [        U R                  5      n[        U5        [        U5        [        U5         [         R                  " XS9nSSS5        [        SS9   [        WUS9nSSS5        [        WU5        g! , (       d  f       N7= f! , (       d  f       N,= f)zGenerate JSON files for the labels in the data. This helps speed up the
training process, since spaCy won't have to preprocess the data to
extract the labels.T)parentsrK   NFrM   rO   )r   r&   r'   r(   r)   r   mkdirr   rP   r   r   r   rQ   r
   _init_labelsrR   s	            r3   init_labels_clirZ   h   s    " W]]+$'&sxx0I	g	{	+!!+C 
,		/vw/ 
0k"	 
,	+	/	/s   C5C&
C#&
C4c                    U R                    Hr  u  p#[        USS 5      bE  X S3-  n[        R                  " XCR                  5        [
        R                  " SU SU 35        MX  [
        R                  " SU S35        Mt     g )N
label_dataz.jsonz!Saving label data for component 'z' to z#No label data found for component 'r%   )pipelinegetattrr;   
write_jsonr\   r   r-   r*   )r2   rG   r    	componentoutput_files        r3   rY   rY      ss    <<9lD1=%&6K[*>*>?HH8eK=QRHH:4&BC (r5   )"r(   pathlibr   typingr   r;   typerwasabir    r   languager   training.initializer	   r
   _utilr   r   r   r   r   r   r   commandstrintboolr4   r,   ContextrT   rZ   rY   r:   r5   r3   <module>ro      s           ;   
) CHIC&GPTU3%@ARD/WX<4|}Ix4UVdHd9z{{D%>op #D*;THu  C  !DFHd1rs'
' ' 	'
 ' ' ' 3-' ' ~' ' 'T" "T "d " 
	*.$O C&;DUYZC&NO #D(D  @P  !Q{D%>opr:t2HI=	= = 	=
 ~= = =
=. 
*.$O C&;DUYZC&GH #D(D  @P  !Q{D%>opr:t2HI#	# # 	#
 ~# # #	#6Dr5   