
    h$                        % S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
JrJrJr  S SKrS SKJr  SSKJrJr  SSKJr  SS	KJrJrJrJr  S
SKJrJrJrJr  \\\\\\S.r \\!\	S\
\   4   4   \"S'   Sr#Sr$ " S S\!\5      r%\RL                  " S5      \" SSSS9\" SSSSS9\" SSSSS9\" S
SS S!S9\" S"S#S$S%S9\" SS&S'S(S)S9\" S"S*S+S,S9\" S"S-S.S/S9\" \#S0S1S2\'" \ RQ                  5       5       3S9\" SS3S4S5SS9\" SS6S7S8S9\" SS9S:S;S94S<\!S=\S>\%S?\)S@\*SA\\!   SB\*SC\*SD\!SE\\   SF\\!   SG\*4SH jj5       r+SIS
S"SS"S"SSS"SSSJ.S<\S=\\!\4   S>\!S?\)S@\*SA\\!   SB\*SC\*SD\!SE\\   SF\\!   SG\*SK\*SL\\   SMS4SN jjr,SO\SP\!SMS4SQ jr-SO\SR\SP\!SMS4SS jr.ST\!SM\\!   4SU jr/SL\S<\S=\\!\4   S>\!SD\!SE\\   4SV jr0S<\4SW jr1g)X    N)Enum)Path)AnyCallableIterableMappingOptionalUnion)Printer   )DocDocBin)docs_to_json)conll_ner_to_docsconllu_to_docsiob_to_docsjson_to_docs   )ArgOptappwalk_directory)	conllubioconlluconllneriobjson.
CONVERTERSauto)r   c                       \ rS rSrSrSrSrg)	FileTypes*   r   spacy N)__name__
__module____qualname____firstlineno__r   r$   __static_attributes__r%       K/home/james-whalen/.local/lib/python3.13/site-packages/spacy/cli/convert.pyr"   r"   *   s    DEr+   r"   convertzInput file or directoryT)helpexists-z!Output directory. '-' for stdout.)r.   
allow_dashr/   r$   z--file-typez-tzType of data to produce)r.   z	--n-sentsz-nz*Number of sentences per doc (0 to disable)Fz--seg-sentsz-szSegment sentences (for -c ner)z--modelz--basez-bzQTrained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)z--morphologyz-mz#Enable appending morphology to tagsz--merge-subtokensz-TzMerge CoNLL-U subtokensz--converterz-czConverter: z	--ner-mapz-nmz6NER tag mapping (as JSON-encoded dict of entity types)z--langz-lz Language (if tokenizer required)z--concatenatez-Cz#Concatenate output to a single file
input_path
output_dir	file_typen_sents	seg_sentsmodel
morphologymerge_subtokens	converterner_maplangconcatenatec                     [        U 5      n U[        S5      :X  a  SOUnUS:H  n[        US9n[        XU 5      n[        XXR                  X5        [        U UUR                  UUUUUUU	U
UUUS9  g)a  
Convert files into json or DocBin format for training. The resulting .spacy
file can be used with the train command and other experiment management
functions.

If no output_dir is specified and the output format is JSON, the data
is written to stdout, so you can pipe them forward to a JSON file:
$ spacy convert some_file.conllu --file-type json > some_file.json

DOCS: https://spacy.io/api/cli#convert
r0   no_print)r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   silentmsgN)r   r   _get_converterverify_cli_argsvaluer-   )r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   rA   rB   s                 r,   convert_clirF   /   s    8 j!J*4S	*A3zJ3F
6
"Csz:ICZ)U//'r+   r   )r4   r5   r6   r7   r8   r9   r;   r<   r=   rA   rB   rA   rB   returnc                   [        U 5      n U(       d	  [        US9nU	b  [        R                  " U	5      OS n	/ n[	        X5       HV  nUR                  SSS9 nUR                  5       nS S S 5        [        U   nU" WUUUUU
UUU	S9	nUR                  UU45        MX     U(       a:  [        R                  R                  U VVs/ s H  u  nnUPM
     snn5      nU U4/nU H  u  nnUS:X  a  [        U5      /n[        U5      nO%[        USS9n[        U5      nUR                  5       nUS	:X  a  [!        UU5        M]  X:w  a2  UR#                  U 5      n[        U5      UR%                  S
U 35      -  nO/[        U5      UR&                  S   -  nUR%                  S
U 35      n[)        UUU5        UR+                  SU SU 35        M     g ! , (       d  f       GNl= fs  snnf )Nr?   rzutf-8encoding)r5   r6   append_morphologyr9   r<   r7   r@   r;   r   T)docsstore_user_datar0   .zGenerated output file (z documents): )r   r   srsly	read_jsonr   openreadr   append	itertoolschainfrom_iterabler   lenr   to_bytes_print_docs_to_stdoutrelative_towith_suffixparts_write_docs_to_filegood)r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   rA   rB   	doc_files	input_locinfile
input_datafuncrM   _all_docsdatalen_docsdbsubpathoutput_files                              r,   r-   r-   c   s   " j!Jv&*1*=eoog&4GI#J:	^^C'^2fJ 3 )$(+

 	)T*+! ;" ??00i1Pi71d$i1PQ (+,	$	4 &'D4yHT48B2wH;;=D!$	2&#//
;":.1D1Dq_1UU":.1DD)55)oFk9=HH.xjk]ST% %' 32" 2Qs   GG

G	rh   output_typec                     US:X  a  [         R                  " SU 5        g [        R                  R                  R                  U 5        g )Nr   r0   )rQ   
write_jsonsysstdoutbufferwrite)rh   rm   s     r,   r[   r[      s2    fd#

%r+   rl   c                 &   UR                   R                  5       (       d  UR                   R                  SS9  US:X  a  [        R                  " X5        g UR                  S5       nUR                  U 5        S S S 5        g ! , (       d  f       g = f)NT)parentsr   wb)parentr/   mkdirrQ   ro   rS   rs   )rh   rl   rm   file_s       r,   r_   r_      sl    $$&&   .f+d#uKK $##s   'B
Brd   c                    U R                  S5      S S nSSS.n[        R                  " S5      n[        R                  " S5      nU H[  nUR                  5       nUR	                  U5      (       a  US==   S-  ss'   UR	                  U5      (       d  MN  US	==   S-  ss'   M]     US   S:X  a
  US	   S:  a  g	US	   S:X  a
  US   S:  a  gg )
N
   r   )r   r   z\S+\|(O|[IB]-\S+)z\S+\s+(O|[IB]-\S+)$r   r   r   )splitrecompilestripsearch)rd   linesformat_guessesiob_rener_relines         r,   autodetect_ner_formatr      s    T"3B'Eq)NZZ,-FZZ./Fzz|==5!Q&!==5!Q&!  e!nU&;a&?e!nU&;a&?r+   c                 "   U[         ;  a  US:X  a  U R                  SU S3SS9  UR                  5       (       d  U R                  SUSS9  US:w  a/  [        U5      R                  5       (       d  U R                  SUSS9  Ub/  [        U5      R                  5       (       d  U R                  SUSS9  UR	                  5       (       a+  [        X5      n[        U5      S	:X  a  U R                  S
USS9  U[        ;  a  U R                  SU 3SS9  g g )Nr0   zCan't write .z4 data to stdout. Please specify an output directory.r   exitszInput file not foundzOutput directory not foundzNER map not foundr   zNo input files in directoryzCan't find converter for )FILE_TYPES_STDOUTfailr/   r   is_dirr   rY   r   )rB   r2   r3   r4   r:   r;   
input_locss          r,   rD   rD      s    ))jC.?I;&Z[ 	 	
 '1=Sj!1!8!8!:!:-zC4=#7#7#9#9$gQ7#J:
z?aHH2JaHH
",YK8B #r+   c           
         UR                  5       (       a  U[        :X  ar  [        US S9n[        [	        U Vs/ s H  oDR
                  SS  PM     sn5      5      n[        U5      S:  a"  SR                  U5      nU R                  SUSS9  US   nO[        X!S9S   nU[        :X  a  UR
                  SS  nUS:X  d  US	:X  az  UR                  S
S9 nUR                  5       nS S S 5        [        W5      n	U	S:X  a  U R                  S5        U	nU$ U	S	:X  a  U R                  S5        U	nU$ U R                  S5        U$ s  snf ! , (       d  f       Ng= f)N)suffixr   r   ,z!All input files must be same typer   r   r   r   utf8rJ   z'Auto-detected token-per-line NER formatz*Auto-detected sentence-per-line NER formatzgCan't automatically detect NER format. Conversion may not succeed. See https://spacy.io/api/cli#convert)r   AUTOr   listsetr   rY   joinr   rS   rT   r   infowarn)
rB   r:   r2   r   loc
file_typesfile_types_strry   rd   converter_autodetects
             r,   rC   rC      sU   '
4@JcZ"HZc::ab>Z"HIJJ:!#!$*!5<nTUV#AJ'
EaHJD%%ab)	EY%/__f_-J .4Z@5(HH>?,I  "U*HHAB,I  HH7
 3 #I .-s   D?E
E)2rV   r~   rp   enumr   pathlibr   typingr   r   r   r   r	   r
   rQ   wasabir   tokensr   r   trainingr   training.convertersr   r   r   r   _utilr   r   r   r   r   str__annotations__r   r   r"   commandtuplekeysintboolrF   r-   r[   r_   r   rD   rC   r%   r+   r,   <module>r      s    	 
   D D     #  1 0  :
GC#x}"4556    T 
 Y #$=dK3%HUYbfgwtB[\q+t2^_%;[\tY$  FY  Z5.$=bc':DG`a}d;uZ__M^G_F`9ab!$UAy  CG  HdHd9[\D/4>cd00 0 	0
 0 0 C=0 0 0 0 d^0 3-0 0 0n !"!<U<Uc4i <U 	<U
 <U <U C=<U <U <U <U d^<U 3-<U <U <U 
'	<U  
!<U~& &# &$ &c  3 4 c hsm &C	CC c4i C 	C
 C d^C6t r+   