
    h                         S SK Jr  S SKJrJrJrJr  S SKrS SKJ	r	  S SK
Jr  S SKJrJr  S SKJr  S SKJrJrJrJr  S S	KJr  S
SKJr  S rS rS rS rg)    )Path)AnyCallableDictIterableN)zeros)Config)Errorsutil)InMemoryLookupKB)SimpleFrozenListensure_pathload_model_from_configregistry)Vocab   )make_tempdirc                 t   [        U 5      n[        U5        [        5        n[        U5      nUR	                  5       (       d  UR                  5         US-  nUR                  [        U5      5        [        U SS9nUR                  [        U5      5        S S S 5        [        W5        g ! , (       d  f       N= f)Nkb   )vocabentity_vector_length)
_get_dummy_kb	_check_kbr   r   existsmkdirto_diskstrr   	from_disk)en_vocabkb1ddir_path	file_pathkb2s         a/home/james-whalen/.local/lib/python3.13/site-packages/spacy/tests/serialize/test_serialize_kb.pytest_serialize_kb_diskr'      s    

!CcN 
1q>  NNtO	C	N#XAFc)n% 
 cN 
s   A4B))
B7c                 ,   [        U SS9nUR                  SS/ SQS9  UR                  SS/ S	QS9  UR                  S
S/ SQS9  UR                  SS/ SQS9  UR                  SSS
/SS/S9  UR                  S/ SQ/ SQS9  UR                  SS
/S/S9  U$ )Nr   )r   Q53!   )r      r   )entityfreqentity_vectorQ17r         r   Q007r1   r   r   r1   Q44V  )   r7   r7   double07皙?g?)aliasentitiesprobabilitiesguy)r)   r3   r/   r5   )333333?r>   g?r9   randomg      ?)r   
add_entity	add_alias)r   r   s     r&   r   r   #   s    	%a	8BMMRyMAMMQiM@MMayMAMMS	MBLLzUFOCQT:LVLL.*  
 LLx6(3%LHI    c                    U R                  5       S:X  d   eS H  nXR                  5       ;   a  M   e   S H  nXR                  5       ;  a  M   e   U R                  5       S:X  d   eS H  nX R                  5       ;   a  M   e   S H  nX R                  5       ;  a  M   e   [	        U R                  S5      S S	9n[        U5      S
:X  d   eUS   R                  S:X  d   eSUS   R                  s=:  a  S:  d   e   eUS   R                  / SQ:X  d   eUS   R                  S:X  d   eSUS   R                  s=:  a  S:  d   e   eUS   R                  S:X  d   eSUS   R                  s=:  a  S:  d   e   eUS   R                  / SQ:X  d   eUS   R                  S:X  d   eSUS   R                  s=:  a  S:  d   e   eg )Nr7   )r)   r/   r3   r5   ) Q0r   )r8   r=   r?   )nothingnessrD   randomnoiser8   c                     U R                   $ N)entity_)xs    r&   <lambda>_check_kb.<locals>.<lambda>E   s    199rB   )keyr   r   r3   g"@g
ףp=
@r4   gS㥛?gE?r2   r/   gףp=
?gGz @r0   gMbX?gB`"۹?)get_size_entitiesget_entity_stringsget_size_aliasesget_alias_stringssortedget_alias_candidateslenrJ   entity_freqr.   alias_
prior_prob)r   entity_stringalias_string
candidatess       r&   r   r   5   s   !Q&&&6 5 5 7777 7#$9$9$;;;; $  A%%%5335555 6:#7#7#9999 ; //
;ATUJz?aa=  F***:a=,,3t33333a=&&)333a=:---:a=++3e33333a=  E)))*Q-++2d22222a=&&)333a=:---:a=++3e33333rB   c            	        ^	 Sn  " S S[         5      m	[        R                  " S5      S[        [        [
        /T	4   4U	4S jj5       n[        R                  " S5      S[
        S	[
        S[        [        /T	4   4U	4S
 jj5       n[        5       R                  U 5      n[        USS9nUR                  5         UR                  S5      n[        UR                  5      T	:X  d   eUR                  R                  S:X  d   eUR                  R                  S:X  d   e[        5        nUR!                  U5        ["        R$                  " U5      nUR                  S5      n[        UR                  5      T	:X  d   eUR                  R                  S:X  d   eUR                  R                  S:X  d   e SSS5        g! , (       d  f       g= f)z>Check that IO of a custom KB works fine as part of an EL pipe.a  
    [nlp]
    lang = "en"
    pipeline = ["entity_linker"]

    [components]

    [components.entity_linker]
    factory = "entity_linker"
    
    [components.entity_linker.generate_empty_kb]
    @misc = "kb_test.CustomEmptyKB.v1"
    
    [initialize]

    [initialize.components]

    [initialize.components.entity_linker]

    [initialize.components.entity_linker.kb_loader]
    @misc = "kb_test.CustomKB.v1"
    entity_vector_length = 342
    custom_field = 666
    c                   p   ^  \ rS rSrU 4S jr\" 5       4S\\   4S jjr\" 5       4S\\   4S jjr	Sr
U =r$ )9test_serialize_subclassed_kb.<locals>.SubInMemoryLookupKBq   c                 0   > [         TU ]  X5        X0l        g rI   )super__init__custom_field)selfr   r   rc   	__class__s       r&   rb   Btest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.__init__r   s    GU9 ,rB   excludec                 N  ^ ^ [        U5      nUR                  5       (       d  UR                  SS9  UR                  5       (       d&  [	        [
        R                  R                  US95      eS[        SS4U 4S jjmU 4S jU 4S	 jU4S
 jS.n[        R                  " XU5        g)z[We overwrite InMemoryLookupKB.to_disk() to ensure that self.custom_field is stored as well.T)parentslocr$   returnNc                 L   > [         R                  " U STR                  05        g Nrc   )srsly
write_jsonrc   r$   rd   s    r&   serialize_custom_fieldsbtest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.to_disk.<locals>.serialize_custom_fields~   s      ^T=N=N,OPrB   c                 &   > TR                  U 5      $ rI   )write_contentsprd   s    r&   rL   Stest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.to_disk.<locals>.<lambda>   s    d&9&9!&<rB   c                 N   > TR                   R                  R                  U 5      $ rI   )r   stringsr   rv   s    r&   rL   rx      s    $***<*<*D*DQ*GrB   c                    > T" U 5      $ rI    )rw   rr   s    r&   rL   rx      s    +B1+ErB   contentszstrings.jsoncustom_fields)r   r   r   is_dir
ValueErrorr
   E928formatr   r   r   )rd   pathrg   	serializerr   s   `   @r&   r   Atest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.to_diskv   s    t$D;;==

4
(;;== !3!3!3!=>>Q4 QD Q = G!EI
 LL'2rB   c                 |  ^ ^ [        U5      nUR                  5       (       d&  [        [        R                  R                  US95      eUR                  5       (       d&  [        [        R                  R                  US95      eS[        SS4U 4S jjmU 4S jU 4S jU4S jS	.n[        R                  " XU5        g)
z]We overwrite InMemoryLookupKB.from_disk() to ensure that self.custom_field is loaded as well.rj   r$   rl   Nc                 B   > [         R                  " U 5      S   Tl        g rn   )ro   	read_jsonrc   rq   s    r&   deserialize_custom_fieldsftest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.from_disk.<locals>.deserialize_custom_fields   s    $)OOI$>~$N!rB   c                 &   > TR                  U 5      $ rI   )read_contentsrv   s    r&   rL   Utest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.from_disk.<locals>.<lambda>   s    d&8&8&;rB   c                 N   > TR                   R                  R                  U 5      $ rI   )r   rz   r   rv   s    r&   rL   r      s    $***<*<*F*Fq*IrB   c                    > T" U 5      $ rI   r|   )rw   r   s    r&   rL   r      s    +DQ+GrB   r}   )r   r   r   r
   E929r   r   r   r   r   r   )rd   r   rg   deserializer   s   `   @r&   r   Ctest_serialize_subclassed_kb.<locals>.SubInMemoryLookupKB.from_disk   s    t$D;;== !3!3!3!=>>;;== !3!3!3!=>>OT Od O < I!G<K
 NN4g6rB   )rc   )__name__
__module____qualname____firstlineno__rb   r   r   r   r   r   __static_attributes____classcell__)re   s   @r&   SubInMemoryLookupKBr^   q   s?    	- :J9K 	3# 	3$ <L;M 	78C= 	7 	7rB   r   zkb_test.CustomEmptyKB.v1rl   c                  0   > S[         S[        4U4S jjn U $ )Nr   r   c                    > T" U USS9$ )Nr   r   r   rc   r|   )r   r   r   s     r&   empty_kb_factoryOtest_serialize_subclassed_kb.<locals>.empty_custom_kb.<locals>.empty_kb_factory   s    &%9 rB   )r   int)r   r   s    r&   empty_custom_kb5test_serialize_subclassed_kb.<locals>.empty_custom_kb   s    	E 	 	  rB   zkb_test.CustomKB.v1r   rc   c                    >^ ^ UUU 4S jnU$ )Nc                 P   > T" U TTS9nUR                  SS[        T5      5        U$ )Nr   random_entityg        )r@   r   )r   r   r   rc   r   s     r&   custom_kb_factoryJtest_serialize_subclassed_kb.<locals>.custom_kb.<locals>.custom_kb_factory   s3    $%9)B
 MM/36J0KLIrB   r|   )r   rc   r   r   s   `` r&   	custom_kb/test_serialize_subclassed_kb.<locals>.custom_kb   s    	 ! rB   T)	auto_fillentity_linkerr6   i  N)r   r   miscr   r   r   r	   from_strr   
initializeget_pipetyper   r   rc   r   r   r   load_model_from_path)
config_stringr   r   confignlpr   tmp_dirnlp2entity_linker2r   s
            @r&   test_serialize_subclassed_kbr   U   s   M2'7. '7R ]]-. Xucl4G&GH   /  ]]()!!!14!	5'..	/! *! X}-F
 4
8CNNLL1M  !%888800C777((C/// 
7G((17N%%&*====  55<<<  --444 
s   'BF==
G)pathlibr   typingr   r   r   r   ro   numpyr   	thinc.apir	   spacyr
   r   spacy.kb.kb_in_memoryr   
spacy.utilr   r   r   r   spacy.vocabr   r   r'   r   r   r   r|   rB   r&   <module>r      s?     0 0     2 V V  &$4@p5rB   