
    h                     0   S SK Jr  S SKJr  S SKJrJrJrJrJ	r	  S SK
r
S SKrS SKJr  SSKJrJr  SSKJrJr  SS	KJr  S
SKJrJrJrJrJrJr  SrSrSrSr Sr!\\\"   \\   4   r#S\S\S\\   4S jr$S\S\"S\\"   4S jr%S\\   S\\"   4S jr&\RN                  " S5      \" SSS9\" S\SS9\" S\SS 9\" SS!S"\S9\" S#S$S%S&S9\" SS'S(S)S9\" S*S+S,S-S9\" S
S.S/S0S9\" S
S1S2S3S94	S4\"S5\S6\S7\\   S8\"S9\(S:\)S;\)S<\)4S= jj5       r*S5\S6\S4\"S>\"S;\)S<\)4S? jr+g)@    )chain)Path)IterableListOptionalUnioncastN)msg   )DocDocBin)ensure_path
load_model)Vocab   )ArgOptappimport_code	setup_gpuwalk_directorya  Location of the documents to predict on.
Can be a single file in .spacy format or a .jsonl file.
Files with other extensions are treated as single plain text documents.
If a directory is provided it is traversed recursively to grab
all files to be processed.
The files can be a mixture of .spacy, .jsonl and text files.
If .jsonl is provided the specified field is going
to be grabbed ("text" by default).z&Path to save the resulting .spacy filezNPath to Python file with additional code (registered functions) to be importedz3Use gold preprocessing provided in the .spacy fileszjThe provided output file already exists. To force overwriting the output file, set the --force or -F flag.pathvocabreturnc              #   v   #    [        5       R                  U 5      nUR                  U5       H  nUv   M	     g7f)z!
Stream Doc objects from DocBin.
N)r   	from_diskget_docs)r   r   docbindocs       I/home/james-whalen/.local/lib/python3.13/site-packages/spacy/cli/apply.py_stream_docbinr!   %   s1      X%Fu%	 &s   79fieldc              #      #    [         R                  " U 5       H+  nX;  a  [        R                  " U  SU S3SS9  M%  X!   v   M-     g7f)zS
Stream "text" field from JSONL. If the field "text" is
not found it raises error.
z  does not contain the required 'z' field.r   exitsN)srsly
read_jsonlr
   fail)r   r"   entrys      r    _stream_jsonlr*   .   sG     
 !!$'HHv=eWHMUVW,	 (s   AA	pathsc              #      #    U  H,  n[        US5       nUR                  5       nUv   SSS5        M.     g! , (       d  f       M@  = f7f)z*
Yields strings from text files in paths.
rN)openread)r+   r   fintexts       r    _stream_textsr2   :   s8      $_88:DJ _ _s   A6A
A	 Aapply.zModel name or path)helpT)r4   existsF)r4   dir_okayz--codez-cr1   z
--text-keyz-tkz$Key containing text string for JSONLz--forcez-Fz!Force overwriting the output filez--gpu-idz-gzGPU ID or -1 for CPU.z--batch-sizez-bzBatch size.z--n-processz-nznumber of processors to use.model	data_pathoutput_file	code_pathtext_keyforce_overwriteuse_gpu
batch_size	n_processc	                 P   [        U5      n[        U5      n[        U5      nUR                  5       (       a   U(       d  [        R                  " [        SS9  UR                  5       (       d  [        R                  " SU 3SS9  [        U5        [        U5        [        XXXx5        g)a  
Apply a trained pipeline to documents to get predictions.
Expects a loadable spaCy pipeline and path to the data, which
can be a directory or a file.
The data files can be provided in multiple formats:
    1. .spacy files
    2. .jsonl files with a specified "field" to read the text from.
    3. Files with any other extension are assumed to be containing
       a single document.
DOCS: https://spacy.io/api/cli#apply
r   r$   zCouldn't find data path: N)r   r5   r
   r(   	force_msgr   r   r3   )	r8   r9   r:   r;   r<   r=   r>   r?   r@   s	            r    	apply_clirC   D   s    0 I&Ik*KI&IO!$,YK8B	g	)%:I    
json_fieldc                 F   [        SS9n[        U 5      n[        U5      S:X  a,  UR                  U5        [        R
                  " SU  S35        g [        U5      n[        R                  " SU 35        UR                  n	/ n
/ nU Hl  nUR                  S:X  a  U
R                  [        X5      5        M/  UR                  S:X  a  U
R                  [        X5      5        M[  UR                  U5        Mn     [        U5      S:  a  U
R                  [        U5      5        [        [        [!        U
6 5      n["        R"                  " UR%                  XUS	9S S
9 H  nUR'                  U5        M     UR                  S:X  a  UR)                  S5      nUR                  U5        g )NT)store_user_datar   zDid not find data to process, z  seems to be an empty directory.zLoaded model z.spacyz.jsonl)r?   r@   )disable )r   r   lento_diskr
   warnr   goodr   suffixappendr!   r*   r2   r	   DocOrStrStreamr   tqdmpipeaddwith_suffix)r9   r:   r8   rE   r?   r@   r   r+   nlpr   streams
text_filesr   datagenr   s                  r    r3   r3   h   sa    D)F9%E
5zQ{#{:<	
 	
U
CHH}UG$%IIE$&GJ;;("NN>$67[[H$NN=:;d#  :}Z01>5'?3Gyy9Et 	

3 R!--h7
NN;rD   ),	itertoolsr   pathlibr   typingr   r   r   r   r	   r&   rQ   wasabir
   tokensr   r   utilr   r   r   r   _utilr   r   r   r   r   r   	path_helpout_help	code_help	gold_helprB   strrP   r!   r*   r2   commandboolintrC   r3    rD   r    <module>ri      s     8 8      *  H H&	 4W 
 B	H 
 x}hsm34 e  	 	S 	Xc] 	$ HSM  W S34#Id;Ch? #D(Dy Ie:`ay$=`ar:t2IJ!^TFM46TU J J  J 	 J
 ~ J  J  J  J  J  J  JF& & &  &  	& 
 &  & rD   