
    h                     `   S SK r S SKrS SKrS SKJrJr  S SKrSSKJr  SSK	J
r
  SSKJr  SSKJr  SSKJr  \R"                  " S	/ S
QSS9S\S\4S j5       r\R"                  " S/ SQSS9S\4S j5       r\R"                  " SS/SS9SS\S\S\4S jj5       r " S S5      r " S S5      rS rg)    N)AnyDict   )util)Warnings)Language)Matcher)Docmerge_noun_chunks)	token.depz	token.tagz	token.posT)requiresretokenizesdocreturnc                 *   U R                  S5      (       d  U $ U R                  5        nU R                   H?  nUR                  R                  UR                  R
                  S.nUR                  X#S9  MA     SSS5        U $ ! , (       d  f       U $ = f)zMerge noun chunks into a single token.

doc (Doc): The Doc object.
RETURNS (Doc): The Doc object with merged noun chunks.

DOCS: https://spacy.io/api/pipeline-functions#merge_noun_chunks
DEP)tagdepattrsN)has_annotation
retokenizenoun_chunksrootr   r   merge)r   retokenizernpr   s       R/home/james-whalen/.local/lib/python3.13/site-packages/spacy/pipeline/functions.pyr   r      s{     e$$
		[//BGGKK<Eb. " 
 J	 
	 Js   AB
Bmerge_entities)zdoc.entsztoken.ent_iobztoken.ent_typec                    U R                  5        nU R                   HJ  nUR                  R                  UR                  R                  UR
                  S.nUR                  X#S9  ML     SSS5        U $ ! , (       d  f       U $ = f)zMerge entities into a single token.

doc (Doc): The Doc object.
RETURNS (Doc): The Doc object with merged entities.

DOCS: https://spacy.io/api/pipeline-functions#merge_entities
)r   r   ent_typer   N)r   entsr   r   r   labelr   )r   r   entr   s       r   r   r   %   sk     
	[88CHHLL399UEc/  
 J	 
	 Js   AA66
Bmerge_subtokensr   r#   c                 j   [        U R                  5      nUR                  SUSS.//5        U" U 5      n[        R                  " U VVVs/ s H  u  pEo`XVS-    PM     snnn5      nU R                  5        nU H  n	UR                  U	5        M     SSS5        U $ s  snnnf ! , (       d  f       U $ = f)zMerge subtokens into a single token.

doc (Doc): The Doc object.
label (str): The subtoken dependency label.
RETURNS (Doc): The Doc object with merged subtokens.

DOCS: https://spacy.io/api/pipeline-functions#merge_subtokens
SUBTOK+)r   op   N)r	   vocabaddr   filter_spansr   r   )
r   r#   mergermatches_startendspansr   spans
             r   r%   r%   9   s     SYYF
JJx54567SkGQ#573QRE		[Dd#  
 J	 R		 Js   B7B##
B2c                       \ rS rSrSS\S\4S jjrS\S\4S jrS\\	\
4   4S jr0 4S	\\	\
4   SS
4S jjrS rS rS rS rSrg
)TokenSplitterN   
min_lengthsplit_lengthc                     Xl         X l        g Nr8   r9   )selfr8   r9   s      r   __init__TokenSplitter.__init__O   s    $(    r   r   c           	          U R                   S:  a  U R                  S:  a  UR                  5        nU H  n[        UR                  5      U R                   :  d  M(  / n/ n0 n[        S[        UR                  5      U R                  5       HL  nUR                  UR                  XwU R                  -    5        UR                  X7U R                  -  45        MN     UR                  X4XV5        M     S S S 5        U$ U$ ! , (       d  f       U$ = f)Nr   )r8   r9   r   lentextrangeappendsplit)r=   r   r   torthsheadsr   is           r   __call__TokenSplitter.__call__S   s    ??Q4#4#4q#8![A166{doo5 " " "!&q#aff+t7H7H!IA!LL8I8I4I)JK!LL!1B1B-B)CD "J $))!EA  " 
s
 "! 
s   'C>BC>>
Dc                 4    U R                   U R                  S.$ )Nr<   r<   r=   s    r   _get_configTokenSplitter._get_configa   s    // --
 	
r@   configNc                 `    UR                  SS5      U l        UR                  SS5      U l        g )Nr8   r   r9   )getr8   r9   )r=   rQ   s     r   _set_configTokenSplitter._set_configg   s'     **\15"JJ~q9r@   c                 B   ^  SU 4S j0n[         R                  " U/ 5      $ )Ncfgc                  L   > [         R                  " T R                  5       5      $ r;   )srsly
json_dumpsrO   rN   s   r   <lambda>(TokenSplitter.to_bytes.<locals>.<lambda>m   s    5++D,<,<,>?r@   r   to_bytesr=   kwargsserializerss   `  r   r^   TokenSplitter.to_bytesk   s#    ?
 }}["--r@   c                 F   ^  SU 4S j0n[         R                  " X/ 5        T $ )NrW   c                 N   > TR                  [        R                  " U 5      5      $ r;   )rT   rY   
json_loadsbr=   s    r   r[   *TokenSplitter.from_bytes.<locals>.<lambda>s   s    T--e.>.>q.ABr@   r   
from_bytesr=   datar`   deserializerss   `   r   rj   TokenSplitter.from_bytesq   s%    B
 	R0r@   c                 n   ^  [         R                  " U5      nSU 4S j0n[         R                  " X/ 5      $ )NrW   c                 N   > [         R                  " U TR                  5       5      $ r;   )rY   
write_jsonrO   pr=   s    r   r[   'TokenSplitter.to_disk.<locals>.<lambda>{   s    U--a1A1A1CDr@   r   ensure_pathto_diskr=   pathr`   ra   s   `   r   rw   TokenSplitter.to_diskx   s3    %D
 ||Dr22r@   c                 p   ^  [         R                  " U5      nSU 4S j0n[         R                  " X/ 5        g )NrW   c                 N   > TR                  [        R                  " U 5      5      $ r;   )rT   rY   	read_jsonrr   s    r   r[   )TokenSplitter.from_disk.<locals>.<lambda>   s    T--eooa.@Ar@   r   rv   	from_diskrx   s   `   r   r   TokenSplitter.from_disk   s0    %A
 	t"-r@   r<   )r   r   )__name__
__module____qualname____firstlineno__intr>   r
   rK   r   strr   rO   rT   r^   rj   rw   r   __static_attributes__ r@   r   r6   r6   N   sp    )3 )# )C C 
T#s(^ 
 46 :$sCx. :$ :.3.r@   r6   c                   b    \ rS rSrSS.S\\\4   S\4S jjrS\	S\	4S	 jr
S
 rS rS rS rSrg)
DocCleaner   T)silentr   r   c                *    [        U5      US.U l        g )N)r   r   )dictrW   )r=   r   r   s      r   r>   DocCleaner.__init__   s    -1%[F#Kr@   r   r   c                 6   U R                   S   nU R                   S   nUR                  5        H  u  pEUnUR                  S5      nSnUS S  H\  n	[        Xi5      (       a  [	        Xi5      nM   SnU(       a  M+  [
        R                  " [        R                  R                  US95        M^     U(       a  M  [        XgS   5      (       a  [        XgS   U5        M  U(       a  M  [
        R                  " [        R                  R                  US95        M     U$ )Nr   r   .FT)attr)rW   itemsrF   hasattrgetattrwarningswarnr   W116formatsetattr)
r=   r   r   r   r   valueobjpartsskipparts
             r   rK   DocCleaner.__call__   s    hhw'xx) ;;=KDCJJsOEDcr
3%%!#,CD!6 hmm&:&:&:&EF # 43b	**CrE2!6 hmm&:&:&:&EF! )" 
r@   c                 B   ^  SU 4S j0n[         R                  " U/ 5      $ )NrW   c                  D   > [         R                  " T R                  5      $ r;   )rY   rZ   rW   rN   s   r   r[   %DocCleaner.to_bytes.<locals>.<lambda>   s    5++DHH5r@   r]   r_   s   `  r   r^   DocCleaner.to_bytes   s#    5
 }}["--r@   c                 F   ^  SU 4S j0n[         R                  " X/ 5        T $ )NrW   c                 b   > TR                   R                  [        R                  " U 5      5      $ r;   )rW   updaterY   re   rf   s    r   r[   'DocCleaner.from_bytes.<locals>.<lambda>   s    TXX__U-=-=a-@Ar@   ri   rk   s   `   r   rj   DocCleaner.from_bytes   s%    A
 	R0r@   c                 n   ^  [         R                  " U5      nSU 4S j0n[         R                  " X/ 5      $ )NrW   c                 F   > [         R                  " U TR                  5      $ r;   )rY   rq   rW   rr   s    r   r[   $DocCleaner.to_disk.<locals>.<lambda>   s    U--a:r@   ru   rx   s   `   r   rw   DocCleaner.to_disk   s3    %:
 ||Dr22r@   c                 p   ^  [         R                  " U5      nSU 4S j0n[         R                  " X/ 5        g )NrW   c                 b   > TR                   R                  [        R                  " U 5      5      $ r;   )rW   r   rY   r}   rr   s    r   r[   &DocCleaner.from_disk.<locals>.<lambda>   s    TXX__U__Q-?@r@   r   rx   s   `   r   r   DocCleaner.from_disk   s0    %@
 	t"-r@   )rW   N)r   r   r   r   r   r   r   boolr>   r
   rK   r^   rj   rw   r   r   r   r@   r   r   r      sJ    @D Ld38n L LC C ,.3.r@   r   c                     U S:X  a"  [         R                  " S5      nUR                  $ U S:X  a"  [         R                  " S5      nUR                  $ [	        S[
         SU  35      e)Nmake_doc_cleanerzspacy.pipeline.factoriesmake_token_splitterzmodule z has no attribute )	importlibimport_moduler   r   AttributeErrorr   )namemodules     r   __getattr__r      sg    !!(()CD&&&	&	&(()CD)))
78*,>tfE
FFr@   )subtok)r   sysr   typingr   r   rY    r   errorsr   languager   matcherr	   tokensr
   	componentr   r   r   r%   r6   r   r   r   r@   r   <module>r      s     
         
4
3 3 
" 
<
 
 
%4P S   Q(6. 6.r3. 3.nGr@   