
    h                     B   S SK r S SKrS SKJr  S SKJrJrJrJrJ	r	J
r
  S SKJrJr  S SKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJr  SSKJr  SSKJr  SSKJ r   Sr!\" 5       RE                  \!5      S   r#Sr$Sr%S\\   S\\&\4   4S jr'S r( " S S\ 5      r)S r*g)    N)islice)AnyCallableDictIterableListOptional)ConfigModel)Floats2d   )Errors)Language)Scorer)Doc)Examplevalidate_get_examples)registry)Vocab   )TextCategorizeraX  
[model]
@architectures = "spacy.TextCatEnsemble.v2"

[model.tok2vec]
@architectures = "spacy.Tok2Vec.v2"

[model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = 64
rows = [2000, 2000, 500, 1000, 500]
attrs = ["NORM", "LOWER", "PREFIX", "SUFFIX", "SHAPE"]
include_static_vectors = false

[model.tok2vec.encode]
@architectures = "spacy.MaxoutWindowEncoder.v2"
width = ${model.tok2vec.embed.width}
window_size = 1
maxout_pieces = 3
depth = 2

[model.linear_model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
length = 262144
ngram_size = 1
no_output_layer = false
modelzq
[model]
@architectures = "spacy.TextCatBOW.v3"
exclusive_classes = false
ngram_size = 1
no_output_layer = false
aa  
[model]
@architectures = "spacy.TextCatReduce.v1"
exclusive_classes = false
use_reduce_first = false
use_reduce_last = false
use_reduce_max = false
use_reduce_mean = true

[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v2"
pretrained_vectors = null
width = 96
depth = 4
embed_size = 2000
window_size = 1
maxout_pieces = 3
subword_features = true
examplesreturnc                 6    [         R                  " U S4SS0UD6$ )Ncatsmulti_labelT)r   
score_cats)r   kwargss     [/home/james-whalen/.local/lib/python3.13/site-packages/spacy/pipeline/textcat_multilabel.pytextcat_multilabel_scorer!   M   s/      	     c                      [         $ )N)r!    r"   r    make_textcat_multilabel_scorerr%   V   s    ##r"   c                       \ rS rSrSr S\S.S\S\S\S\	S\
\   S	S
4S jjjr\S 5       rS
S
S.S\/ \\   4   S\
\   S\
\\      4S jjrS\\   4S jrSrg
)MultiLabel_TextCategorizerZ   zdPipeline component for multi-label text classification.

DOCS: https://spacy.io/api/textcategorizer
)scorervocabr   name	thresholdr)   r   Nc                l    Xl         X l        X0l        SU l        / US.n[	        U5      U l        XPl        g)a  Initialize a text categorizer for multi-label classification.

vocab (Vocab): The shared vocabulary.
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
    losses during training.
threshold (float): Cutoff to consider a prediction "positive".
scorer (Optional[Callable]): The scoring method.

DOCS: https://spacy.io/api/textcategorizer#init
N)labelsr,   )r*   r   r+   _rehearsal_modeldictcfgr)   )selfr*   r   r+   r,   r)   r1   s          r    __init__#MultiLabel_TextCategorizer.__init__`   s5    ( 

	 $)49r"   c                     g)NTr$   )r2   s    r    support_missing_values1MultiLabel_TextCategorizer.support_missing_values|   s    r"   )nlpr.   get_examplesr8   r.   c                   [        US5        Uc=  U" 5        H1  nUR                  R                   H  nU R                  U5        M     M3     OU H  nU R                  U5        M     [	        [        U" 5       S5      5      nU R                  U5        U Vs/ s H  oR                  PM     n	nU R                  U5      u  pU R                  5         [        U	5      S:  d,   [        R                  R                  U R                  S95       e[        U
5      S:  d,   [        R                  R                  U R                  S95       eU R                  R!                  XS9  gs  snf )a  Initialize the pipe for training, using a representative set
of data examples.

get_examples (Callable[[], Iterable[Example]]): Function that
    returns a representative sample of gold-standard Example objects.
nlp (Language): The current nlp object the component is part of.
labels: The labels to add to the component, typically generated by the
    `init labels` command. If no labels are provided, the get_examples
    callback is used to extract the labels from the data.

DOCS: https://spacy.io/api/textcategorizer#initialize
z%MultiLabel_TextCategorizer.initializeN
   r   )r+   )XY)r   yr   	add_labellistr   _validate_categories	reference_examples_to_truth_require_labelslenr   E923formatr+   r   
initialize)r2   r9   r8   r.   examplecatlabelsubbatcheg
doc_samplelabel_sample_s               r    rH   %MultiLabel_TextCategorizer.initialize   s    & 	l,ST>'>"99>>CNN3' * *  u%  |~r23!!(+-56XrllX
611(;:"FFKK$6$6DII$6$FF"< 1$Hfkk&8&8dii&8&HH$


; 7s   E r   c                     U Hb  nUR                   R                  R                  5        H7  nUS:X  a  M  US:X  a  M  [        [        R
                  R                  US95      e   Md     g)zThis component allows any type of single- or multi-label annotations.
This method overwrites the more strict one from 'textcat'.g      ?g        )valN)rB   r   values
ValueErrorr   E851rG   )r2   r   exrS   s       r    rA   /MultiLabel_TextCategorizer._validate_categories   sU     B||((//1s
cSj$V[[%7%7C%7%@AA 2 r"   )r/   r1   r   r+   r)   r*   )textcat_multilabel)__name__
__module____qualname____firstlineno____doc__r!   r   r   strfloatr	   r   r3   propertyr6   r   r   r   rH   rA   __static_attributes__r$   r"   r    r'   r'   Z   s     )	 &>  	  " 
8   #'*.#<r8G#445#< h	#<
 #'#<JBXg-> Br"   r'   c                 |    U S:X  a"  [         R                  " S5      nUR                  $ [        S[         SU  35      e)Nmake_multilabel_textcatzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_modulerd   AttributeErrorrZ   )r+   modules     r    __getattr__ri      sA    (((()CD---
78*,>tfE
FFr"   )+re   sys	itertoolsr   typingr   r   r   r   r   r	   	thinc.apir
   r   thinc.typesr   errorsr   languager   r)   r   tokensr   trainingr   r   utilr   r*   r   textcatr   multi_label_default_configfrom_strDEFAULT_MULTI_TEXTCAT_MODELmulti_label_bow_configmulti_label_cnn_configr_   r!   r%   r'   ri   r$   r"   r    <module>rz      s     
  @ @ #       5   $ 8 %h//0JKGT   *x'8 tCQTH~ $RB RBlGr"   