
    h4	                     p    S SK Jr  SSKJr  SSKJrJr  SSKJrJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
 jrS rg)    )Printer   )Errors)DocSpan)iob_to_biluotags_to_entities)	minibatch)Vocab   )n_sents_infoc              /      #    [        5       n[        US9nUS:  a  [        Xa5        [        U R	                  S5      XQ5       Sh  vN   g N7f)a  
Convert IOB files with one sentence per line and tags separated with '|'
into Doc objects so they can be saved. IOB and IOB2 are accepted.

Sample formats:

I|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O
I|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O
I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O
I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O
)no_printr   
N)r   r   r   read_iobsplit)
input_datan_sentsr   argskwargsvocabmsgs          _/home/james-whalen/.local/lib/python3.13/site-packages/spacy/training/converters/iob_to_docs.pyiob_to_docsr      sB      GE
8
$C{S"
((.???s   AAAAc              #     #    [        XS9 GH  n/ n/ n/ n/ n/ nU GH&  n	U	R                  5       (       d  M  U	R                  5        V
s/ s H  oR                  S5      PM     nn
[        US   5      S:X  a  [	        U6 u  pnOE[        US   5      S:X  a  [	        U6 u  pS/[        U5      -  nO[        [        R                  5      eUR                  U5        UR                  U5        UR                  U5        UR                  U5        UR                  S5        UR                  USS   Vs/ s H  nS	PM     sn5        GM)     [        XS
9n[        U5       H  u  nnUUU   l        M     [        U5       H  u  nnUUU   l        M     [        U5      n[        U5      nU VVVs/ s H  u  nnn[!        UUUS-   US9PM     snnnUl        Uv   GM     g s  sn
f s  snf s  snnnf 7f)N)size|r   r      -Tr   F)words)startendlabel)r
   stripr   lenzip
ValueErrorr   E902extendappendr   	enumeratetag_is_sent_startr   r	   r   ents)	raw_sentsr   r   grouptokensr    tagsiobsent_startslinetsent_tokens
sent_words	sent_tagssent_iob_docitag
sent_startbiluoentitiesLses                            r   r   r      s    93D::<<15>A773<K>;q>"a'25{2C/
x[^$)'*K'8$
 EC
O3	 --LL$KK	"JJx MM+&t$z!"~>~!~>?! " %%oFAsCFK &&{3MAz#-CF  4S!#E*LTUHy1aDA1q5:HU	? 4 ?  ? Vs,   AG,	G#CG,/G ;A/G,*G%
%G,N)
   F)wasabir   errorsr   r1   r   r   trainingr   r	   utilr
   r   r   conll_ner_to_docsr   r   r        r   <module>rM      s%       6   +@& rL   