
    <i|P                     
   S SK r S SKJr  S SKJr  S SKJr  S SKJrJ	r	J
r
Jr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJrJr  S SKJr  S SKJr  S SKJ r   S SK!J"r"J#r#  S SK$J%r%   " S S\#5      r& " S S5      r'g)    N)defaultdict)deepcopy)get_all_start_methods)IterableAnyTypeget_args)	BaseModel)BuiltinEmbedder)models)INFERENCE_OBJECT_TYPES)InspectorEmbed)Embedder)NumericVectorNumericVectorStruct)ModelSchemaParser)	FieldPath)FastEmbedMisc)ParallelWorkerPoolWorker)
iter_batchc                   x    \ rS rSrS\S\4S jr\S\S\SS 4S j5       rS\	\
\\4      S\	\
\\4      4S jrS	rg
)ModelEmbedderWorker   
batch_sizekwargsc                 0    [        S0 UD6U l        Xl        g )N )ModelEmbeddermodel_embedderr   )selfr   r   s      \/home/james-whalen/.local/lib/python3.13/site-packages/qdrant_client/embed/model_embedder.py__init__ModelEmbedderWorker.__init__   s    +5f5$    returnc                     U " SSUS.UD6$ )N   )threadsr   r   r   )clsr   r   s      r"   startModelEmbedderWorker.start   s    >1>v>>r%   itemsc           	   #      #    U H5  u  p#U[        U R                  R                  X0R                  S95      4v   M7     g 7f)Ninference_batch_size)listr    embed_models_batchr   )r!   r-   idxbatchs       r"   processModelEmbedderWorker.process   sF     JC''::OO ;    s   =?)r   r    N)__name__
__module____qualname____firstlineno__intr   r#   classmethodr+   r   tupler5   __static_attributes__r   r%   r"   r   r      sl    %3 %# % ?s ?c ?6K ? ?	XeCHo6 	8E#s(O;T 	r%   r   c                      \ rS rSrSr   S S\S-  S\S\S-  S\4S jjr	\
S\S\S-  S	\4S
 j5       r  S!S\\\   -  S\S\S	\\   4S jjr  S"S\\\\4   \-     S\S\S-  S	\\\\4   \-     4S jjr  S!S\\\\4   \-     S\S\S	\\   4S jjr    S#S\\\4   \-  S\\   S-  S\S\S\S-  S	\\\4   \\\4   -  \-  \-  4S jjrS\R0                  S	S4S jr S$S\R0                  S\S\S	\4S jjrS!S\S\S	S4S jjrS\S	\4S jrS\R0                  S	\R0                  4S jr\S	\ \!   4S j5       r"Sr#g)%r   +   @   Nparseris_local_modeserver_versionr   c                     0 U l         0 U l        [        US9U l        U R	                  X#5      U l        [        R                  " 5       (       a  [        S0 UD6U l
        g [        S0 UD6U l
        g )N)rB   r   )_batch_accumulator_embed_storager   _embed_inspector$_check_builtin_embedder_availability_is_builtin_embedder_availabler   is_installedr   r   embedder)r!   rB   rC   rD   r   s        r"   r#   ModelEmbedder.__init__.   sn     LN>@ .f =.2.W.W/
+ #0"<"<">">Hv 	DSD]V\D] 	r%   r&   c                     U (       a  gUc  g UR                  S5      u  p#nUR                  S5      S   n[        U5      [        U5      [        U5      4S:  a  gg! [         a     gf = f)NFT.-r   )r(         )splitr;   	Exception)rC   rD   majorminorpatchs        r"   rI   2ModelEmbedder._check_builtin_embedder_availability?   sw      " 		"0"6"6s";E%KK$Q'EE
CJE
3zA 		s   AA 
A)(A)
raw_modelsis_queryr   c              #      #    U R                   (       d  [        R                  " 5         [        U[        5      (       a  U/n[        X5       H  nU R                  XBUS9 Sh  vN   M     g N	7f)a  Embed raw data fields in models and return models with vectors

    If any of model fields required inference, a deepcopy of a model with computed embeddings is returned,
    otherwise returns original models.
Args:
    raw_models: Iterable[BaseModel] - models which can contain fields with raw data
    is_query: bool - flag to determine which embed method to use. Defaults to False.
    batch_size: int - batch size for inference
Returns:
    list[BaseModel]: models with embedded fields
r/   N)rJ   r   import_fastembed
isinstancer
   r   r2   )r!   rY   rZ   r   raw_models_batchs        r"   embed_modelsModelEmbedder.embed_modelsW   sh     " 22**,j),,$J *: B..  /    !Cs   A A."A,#
A.parallelc              #   d  #    U R                   (       d  [        R                  " 5         Sn[        U[        5      (       a  [        U5      U:  a  Sn[        U R                  [        5      (       d  Ub  US:X  d  U(       a*  [        X5       H  nU R                  XRS9 Sh  vN   M     gSn[        XS9nUS:X  a  [        R                  " 5       nS[        5       ;   a  SOS	nUc   e[        UU R                  5       UU R                  S
9n	U	R!                  XvS9 H  nU Sh  vN   M     g N N7f)a  Embed raw data fields in models and return models with vectors

Requires every input sequences element to contain raw data fields to inference.
Does not accept ready vectors.

Args:
    raw_models: Iterable[BaseModel] - models which contain fields with raw data to inference
    batch_size: int - batch size for inference
    parallel: int - number of parallel processes to use. Defaults to None.

Returns:
    Iterable[Union[dict[str, BaseModel], BaseModel]]: models with embedded fields
FTNr(   r/   )sizer   
forkserverspawn)num_workersworkerstart_methodmax_internal_batch_size)r   )rJ   r   r\   r]   r1   lenrL   r   r   r2   os	cpu_countr   r   _get_worker_classMAX_INTERNAL_BATCH_SIZEordered_map)
r!   rY   r   ra   is_smallr4   multiprocessing_batch_sizeraw_models_batchesrh   pools
             r"   embed_models_strict!ModelEmbedder.embed_models_strictr   s&    & 22**,j$'':+ t}}o661}#J;2252ZZZ < *+&!+J!X1}<<>+7;P;R+R<X_L'''%$--/)(,(D(D	D ))" *  !  # [( !s%   BD0 D,!BD0"D.#
D0.D0r0   c              #      ^ ^^#    T R                   (       d  [        R                  " 5         U H  nT R                  UTSS9  M     T R                  (       d  U Sh  vN   gUUU 4S jU 5        Sh  vN   g N N7f)a  Embed a batch of models with raw data fields and return models with vectors

    If any of model fields required inference, a deepcopy of a model with computed embeddings is returned,
    otherwise returns original models.
Args:
    raw_models: list[Union[dict[str, BaseModel], BaseModel]] - models which can contain fields with raw data
    is_query: bool - flag to determine which embed method to use. Defaults to False.
    inference_batch_size: int - batch size for inference
Returns:
    Iterable[BaseModel]: models with embedded fields
T)rZ   accumulatingNc              3   H   >#    U  H  nTR                  UTS TS9v   M     g7f)FrZ   rw   r0   N)_process_model).0	raw_modelr0   rZ   r!   s     r"   	<genexpr>3ModelEmbedder.embed_models_batch.<locals>.<genexpr>   s8       ",I ##%!&)=	 $  ",s   ")rJ   r   r\   rz   rF   )r!   rY   rZ   r0   r|   s   ` `` r"   r2    ModelEmbedder.embed_models_batch   ss     " 22**,#I	H4P $ &&!!! ",   "s$   AA=A9A=3A;4A=;A=modelpathsrw   c                    [        U[        [        5      5      (       a4  U(       a  U R                  U5        OUc   S5       eU R	                  UUUS9$ Uc  U(       d  [        U5      OUn[        U[        5      (       aI  UR                  5        H3  u  pgU(       a  U R                  XrSS9  M  U R                  UUUSUS9X'   M5     U$ Ub  UOU R                  R                  U5      nU GH  n[        U[        5      (       d  U/OUn	U	 H  n
[        XR                  S5      nUc  M  UR                  (       a  U R                  UUR                  UUUS9  MN  [        U[        5      nU(       a  UOU/nU(       dd  Uc   S5       eU Vs/ s H  nU R	                  XUS9PM     nnU(       a  [        XR                  U5        M  [        XR                  US   5        M  U H  nU R                  U5        M     M     GM     U$ s  snf )	a  Embed model's fields requiring inference

Args:
    model: Qdrant http model containing fields to embed
    paths: Path to fields to embed. E.g. [FieldPath(current="recommend", tail=[FieldPath(current="negative", tail=None)])]
    is_query: Flag to determine which embed method to use. Defaults to False.
    accumulating: Flag to determine if we are accumulating models for batch embedding. Defaults to False.
    inference_batch_size: Optional[int] - batch size for inference

Returns:
    A deepcopy of the method with embedded fields
Nz3inference_batch_size should be passed for inferencerZ   r0   T)rw   Fry   r   )r]   r	   r   _accumulate_drain_accumulatorr   dictr-   rz   rH   inspectr1   getattrcurrenttailsetattr)r!   r   r   rZ   rw   r0   keyvaluepath
list_modelitemcurrent_modelwas_listdata
embeddingss                  r"   rz   ModelEmbedder._process_model   s   * eX&<=>>  ' )4IHI4..%)= /   =+7HUOUEeT""#kkm
''4'H!%!4!4!)%*-A "5 "EJ	 , L*0E0E0M0Me0TD(25$(?(?%UJ" 'llD A (99''%		!)%1-A (   *->H5=MM?M'0<QPQ< )6	& )6 !33 $Nb 4  )6	 # & $#D,,
C#D,,
1F$1D ,,T2 %2? # F &s   	Hr   c                 4   [        U[        5      (       a)  UR                  5        H  nU R                  U5        M     g[        U[        5      (       a:  U H4  n[        U[        [        5      5      (       d    gU R                  U5        M6     [        U[        [        5      5      (       d  gU R                  U5      nUR                  U R                  ;  a  / U R                  UR                  '   U R                  UR                     R                  U5        g)a  Add data to batch accumulator

Args:
    data: models.VectorStruct - any vector struct data, if inference object types instances in `data` - add them
        to the accumulator, otherwise - do nothing. `InferenceObject` instances are converted to proper types.

Returns:
    None
N)r]   r   valuesr   r1   r	   r   _resolve_inference_objectr   rF   append)r!   r   r   s      r"   r   ModelEmbedder._accumulate)  s     dD!!  ' 'dD!!!%2H)IJJ  ' 
 $)? @AA--d3::T44424D##DJJ/

+2248r%   c                 P   [        U[        5      (       a-  UR                  5        H  u  pEU R                  XRUS9X'   M     U$ [        U[        5      (       aJ  [        U5       H9  u  pe[        U[        [        5      5      (       d  Us  $ U R                  XRUS9X'   M;     U$ [        U[        [        5      5      (       d  U$ U R                  (       a+  U R                  R                  UR                  S5      (       d  U R                  X#S9  U R                  UR                  5      $ )a,  Drain accumulator and replaces inference objects with computed embeddings
    It is assumed objects are traversed in the same order as they were added to the accumulator

Args:
    data: models.VectorStruct - any vector struct data, if inference object types instances in `data` - replace
        them with computed embeddings. If embeddings haven't yet been computed - compute them and then replace
        inference objects.
    inference_batch_size: int - batch size for inference

Returns:
    NumericVectorStruct: data with replaced inference objects
r   N)r]   r   r-   r   r1   	enumerater	   r   rG   getr   _embed_accumulator_next_embed)r!   r   rZ   r0   r   r   is          r"   r    ModelEmbedder._drain_accumulatorG  s
    dD!!"jjl
 33CW 4 	 + KdD!!%dO!%2H)IJJK11CW 2 	 , K(12
 
 K""$*=*=*A*A$**d*S*S##X#a

++r%   c           
        ^ ^ S[         [           S[        S[        S[         [           4UU 4S jjnT R
                   H  n[        T R                  R                  U5      T R                  R                  U5      T R                  R                  U5      T R                  R                  U5      T R                  R                  U5      45      (       a  M  [        T R                  [        5      (       a  [        U S35      e[        U S35      e   T R
                  R!                  5        H  u  pEU" XTUS9T R"                  U'   M     T R
                  R%                  5         g	)
zEmbed all accumulated objects for all models

Args:
    is_query: bool - flag to determine which embed method to use. Defaults to False.
    inference_batch_size: int - batch size for inference
Returns:
    None
objects
model_namer   r&   c                   > / n/ n/ n[        [        5      n[        U 5       GH  u  px[        U[        R
                  5      n	[        [        X45      5       Hd  u  n
u  pXR                  :X  d  M  X:X  d  M   Xj   R                  U5        XZ   R                  U	(       a  UR                  OUR                  5          M     U/U[        U5      '   UR                  UR                  5        UR                  U	5        UR                  U	(       a  UR                  OUR                  /5        GM	     / n[        [        X45      5       He  u  nu  pUR                  TR                  R                  UU	(       a  XW   OSU	(       d  XW   OSTU=(       d    0 US9 Vs/ s H  nUPM     sn5        Mg     [        U5      n/ /[        U 5      -  nUR!                  5        H  nU H  n[#        U5      UU'   M     M     U$ s  snf )zl
Assemble batches by options and data type based groups, embeds and return embeddings in the original order
N)r   textsimagesrZ   optionsr   )r   r1   r   r]   r   Documentzipr   r   textimagerj   extendrL   embediterr   next)r   r   r   unique_optionsunique_options_is_textbatchesgroup_indicesr   objis_textjr   options_is_textr   	embeddingiter_embeddingsordered_embeddingsindicesindexrZ   r!   s                      r"   r   /ModelEmbedder._embed_accumulator.<locals>.embed{  s    46N13"!#G2=d2CM#G,$S&//:5>?61A1 ++-'2L%(//2
))g#((399M6 ;<M#n"56"))#++6*11':NNCHHSYY#GH -  J)23~3^)_%%G!! *.)<)<'107'*T5<7:$%-$+Mr'1 *= *
*I "*
 *` #:.O=?D3w<<O(//1$E04_0E&u- % 2 &%%
s   G6
zw is not among supported models. Have you forgotten to set `cloud_inference` or install `fastembed` for local inference? is not among supported models)r   r   r   N)r1   r   strr;   r   rF   anyrL   is_supported_text_modelis_supported_sparse_model(is_supported_late_interaction_text_modelis_supported_image_model.is_supported_late_interaction_multimodal_modelr]   r   
ValueErrorr-   rG   clear)r!   rZ   r0   r   r   r   s   ``    r"   r    ModelEmbedder._embed_accumulatorq  sJ   0	&010	&?B0	&PS0	&- 0	& 0	&d ,,EMM99%@MM;;EBMMJJ5QMM::5AMMPPQVW  dmm_==$ ' "r s 
 %w.L%MNN! -$  2288:KE).;O*D& ; 	%%'r%   r   c                 >    U R                   U   R                  S5      $ )zGet next computed embedding from embedded batch

Args:
    model_name: str - retrieve embedding from the storage by this model name

Returns:
    NumericVector: computed embedding
r   )rG   pop)r!   r   s     r"   r   ModelEmbedder._next_embed  s      "":.22155r%   c                 T   [        U[        R                  5      (       d  U$ UR                  nUR                  nUR
                  n[        U R                  R                  U5      U R                  R                  U5      U R                  R                  U5      45      (       a  [        R                  " X#US9$ U R                  R                  U5      (       a  [        R                  " X#US9$ U R                  R                  U5      (       a  [        U S35      e[        U S35      e)zResolve inference object into a model

Args:
    data: models.VectorStruct - data to resolve, if it's an inference object, convert it to a proper type,
        otherwise - keep unchanged

Returns:
    models.VectorStruct: resolved data
)r   r   r   )r   r   r   z- does not support `InferenceObject` interfacer   )r]   r   InferenceObjectr   objectr   r   rL   r   r   r   r   r   Imager   r   )r!   r   r   r   r   s        r"   r   'ModelEmbedder._resolve_inference_object  s     $ 6 677KZZ
,,55jA77
CFFzR
 
 ??QQ==11*==<<jwOO==GG
SS
|+XYZZJ<'EFGGr%   c                     [         $ )N)r   )r*   s    r"   rm   ModelEmbedder._get_worker_class  s    ""r%   )rF   rH   rG   rJ   rL   )NFN)F   )r   N)NFFN)r   )$r7   r8   r9   r:   rn   r   boolr   r   r#   staticmethodrI   r
   r   r;   r_   r   rt   r1   r2   r   r   rz   r   VectorStructr   r   r   r   r   r   r<   r   r   rm   r>   r   r%   r"   r   r   +   s     ,0#%)	
!D(
 
 d
	

 
" -04Z	 4 	 33  	
 
)	< #	7!T#y.1I=>7! 7! *	7!
 
$sI~&2	37!x $%	"c9n-	9:" " "	"
 
)	"N )-"+/XCN#i/X I%X 	X
 X "DjX 
c9n	S-%7 8	89	D}	TXt 3 3  > VW(,''(,37(,OR(,	(,TR(4 R(s R([_ R(h	6c 	6m 	6Hf.A.A HfFYFY H@ #$':"; # #r%   r   )(rk   collectionsr   copyr   multiprocessingr   typingr   r   r   r	   pydanticr
   $qdrant_client.embed.builtin_embedderr   qdrant_client.httpr   qdrant_client.embed.commonr   #qdrant_client.embed.embed_inspectorr   qdrant_client.embed.embedderr   qdrant_client.embed.modelsr   r   !qdrant_client.embed.schema_parserr   qdrant_client.embed.utilsr   qdrant_client.fastembed_commonr    qdrant_client.parallel_processorr   r   qdrant_client.uploader.uploaderr   r   r   r   r%   r"   <module>r      sS    	 #  1 0 0  @ % = > 1 I ? / 8 G 6& *G# G#r%   