
    h|                         S SK r S SKrS SKJrJrJrJrJr  S SKJ	r	  \R                  R                  S5      S 5       rS rS rS rS	 rS
 rS r\R                  R'                  SSS/S/5      S 5       rS rS rg)    N)DEPMORPHORTHPOSSHAPE)Doci  c                 (   / SQn/ SQn/ SQnU Vs/ s H  o@R                   R                  U5      PM     nnU Vs/ s H  o`R                   R                  U5      PM     nn[        XS9nUR                  S[        R
                  " USS95        UR                  S[        R
                  " USS95        U V	s/ s H  oR                  PM     sn	U:X  d   eU V	s/ s H  oR                  PM     sn	U:X  d   eUR                  SS/5      n
[        UR                  US9R                  SS/U
5      nU V	s/ s H  oR                  PM     sn	U:X  d   eU V	s/ s H  oR                  PM     sn	U:X  d   eg	s  snf s  snf s  sn	f s  sn	f s  sn	f s  sn	f )
z5Test that lemmas are set correctly in doc.from_array.)Iz'llsurvive)PRPMDVB)z-PRON-willr   wordsTAGuint64)dtypeLEMMAN)
stringsaddr   
from_arraynumpyarraytag_lemma_to_arrayvocab)en_vocabr   tagslemmastagtag_idslemma	lemma_idsdoct	doc_arraynew_docs               T/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/doc/test_array.pytest_issue2203r+      s^    $ED*F489DS##C(DG9:@A&!!%%e,&IA
h
$CNN5%++gX>?NN7EKK	BC CqFFC D(((!"cHHc"f,,,eW-.I#))5)44eW5EyQG#$GqFFG$,,,%&gHHg&&000 :A
 !" %&s"   $E6$E;8F F5F
Fc                     [        U / SQS9nUR                  S   nUR                  UR                  :w  d   eUR	                  [
        [        45      nUS   S   US   S   :w  d   eUS   S   US   S   :w  d   eg )NAnexamplesentencer   r/   r      r   r   orthshaper   r   r   r   r&   r/   feats_arrays       r*   test_doc_array_attr_of_tokenr7      s    
h;
<Cii	"G<<7==(((,,e}-Kq>!Aq 1111q>!Aq 1111    c                    [        U / SQS9nUR                  S   nUR                  UR                  :w  d   eUR	                  [
        [        45      nUR	                  S5      nUS   S   US   S   :X  d   eUS   S   US   S   :X  d   eg )Nr-   r   r/   )r   r   r   r1   r2   )r   r&   r/   r6   feats_array_stringys        r*   $test_doc_stringy_array_attr_of_tokenr;   &   s    
h;
<Cii	"G<<7==(((,,e}-K,,'89q!!$Aq(9999q!!$Aq(9999r8   c                     [        U / SQS9nUR                  S   nUR                  UR                  :w  d   eUR	                  [
        5      nUR                  S:X  d   eg )Nr-   r   r/   )   )r   r   r3   r4   r   r   r5   s       r*   test_doc_scalar_attr_of_tokenr>   0   sU    
h;
<Cii	"G<<7==(((,,t$K$$$r8   c                    / SQn/ SQn[        XUS9nUS   R                  US   R                  s=:w  a(  US   R                  s=:w  a  US   R                  :w  d   e   eUR                  [        [        45      nUS   S   US   R                  :X  d   eUS   S   US   R                  :X  d   eUS   S   US   R                  :X  d   eUS   S   US   R                  :X  d   eg )NAnicer0   .)DETADJNOUNPUNCT)r   posr   r1      r=   )r   rH   r   r   r   )r   r   rH   r&   r6   s        r*   test_doc_array_tagrJ   8   s    *E
)C
h
-Cq6::Q?s1vzz?SVZZ?????,,c{+Kq>!A

***q>!A

***q>!A

***q>!A

***r8   c                    / SQn/ SQn[        XUS9nUS   [        US   R                  5      :X  d   eUS   [        US   R                  5      :X  d   eUS   [        US   R                  5      :X  d   eUR                  [        [
        45      nUS   S   US   R                  R                  :X  d   eUS   S   US   R                  R                  :X  d   eUS   S   US   R                  R                  :X  d   eg )N)Eatblueham)zFeat=VzFeat=JzFeat=N)r   morphsr   r1   rI   )r   strmorphr   r   r   key)r   r   rQ   r&   r6   s        r*   test_doc_array_morphrS   D   s    "E*E
hE
2C8s3q6<<((((8s3q6<<((((8s3q6<<((((,,e}-Kq>!A 0 0000q>!A 0 0000q>!A 0 0000r8   c                 6   / SQn/ SQn[        XUS9nUR                  [        [        45      nUS   S   US   R                  :X  d   eUS   S   US   R                  :X  d   eUS   S   US   R                  :X  d   eUS   S   US   R                  :X  d   eg )Nr@   )detamodROOTpunct)r   depsr   r1   rI   r=   )r   r   r   r   dep)r   r   rY   r&   r6   s        r*   test_doc_array_depr[   R   s    *E+D
h$
/C,,c{+Kq>!A

***q>!A

***q>!A

***q>!A

***r8   attrsr   r   IS_ALPHAc                 l    / SQn[        XS9n[        XS9R                  XR                  U5      5        g)ztTest that both Doc.to_array and Doc.from_array accept string attrs,
as well as single attrs and sequences of attrs.
r-   r   N)r   r   r   )r   r\   r   r&   s       r*   #test_doc_array_to_from_string_attrsr_   ]   s/    
 *E
h
$C))%e1DEr8   c                 ~    / SQn[        XS9R                  S5      nUS   S:X  d   eUS   S:X  d   eUS   S:X  d   eg	)
z7Test that Doc.to_array can retrieve token start indicesr-   r   IDXr   r1   r=   rI      N)r   r   )r   r   offsetss      r*   test_doc_array_idxrd   g   sN    )E((11%8G1:??1:??1:r8   c                     / SQn[        XS9nU H  nUS   Ul        M     UR                  S/5      n[        XS9nUR                  S/U5        UR                  S/5      n[        R
                  " S5      R                  [        R                  5      US'   [        XS9n[        R                  " [        5         UR                  S/U5        SSS5        UR                  S/5      n[        R
                  " S5      R                  [        R                  5      US'   [        XS9n[        R                  " [        5         UR                  S/U5        SSS5        g! , (       d  f       N= f! , (       d  f       g= f)zBTest that Doc.from_array doesn't set heads that are out of bounds.)Thisisar0   rC   r   r   HEADN   )r   headr   r   r   int32astyper   pytestraises
ValueError)r   r   r&   tokenarrdoc_from_arrays         r*   #test_doc_from_array_heads_in_boundsru   p   s'   0E
h
$CV
  ,,x
 C/Nvh, ,,x
 C[[_##ELL1CF/N	z	"!!6(C0 
# ,,x
 C[[^""5<<0CF/N	z	"!!6(C0 
#	" 
#	" 
#	"s   :EE/
E,/
E=)r   ro   spacy.attrsr   r   r   r   r   spacy.tokensr   markissuer+   r7   r;   r>   rJ   rS   r[   parametrizer_   rd   ru    r8   r*   <module>r|      s      4 4  41 1(2:%	+1+ FG#4j"ABF CF1r8   