
    h&                         S SK r S SKrS SKJr  S SKJrJrJrJrJ	r	  S SK
r
SSKJrJrJrJr  SSKJr  SSKJr  SSKJrJr  SS	KJrJrJrJrJr  S
SKJr  S
SKJ r    " S S\ 5      r!S\S\"4S jr#S\S\"SSS\4S jr$g)    N)BytesIO)AnyCallableDictOptionalcast   )CupyOpscontext_poolsget_current_opsset_gpu_allocator)torch)	Optimizer)
ArgsKwargsFloatsXd)convert_recursiveget_torch_default_deviceiterate_recursivetorch2xpxp2torch   )PyTorchGradScaler)Shimc                     ^  \ rS rSrSr       SS\S\S\S\\   S\S   S	\\	\/\
4      S
\\	\\
S/\4      4U 4S jjjrS r\S 5       rS\S\4S jrS\4S jrS\4S jr\R*                  S 5       rS\S\4S jrS rS rSrU =r$ )PyTorchShim   aX  Interface between a PyTorch model and a Thinc Model. This container is
*not* a Thinc Model subclass itself.

mixed_precision:
    Enable mixed-precision. This changes whitelisted ops to run
    in half precision for better performance and lower memory use.
grad_scaler:
    The gradient scaler to use for mixed-precision training. If this
    argument is set to "None" and mixed precision is enabled, a gradient
    scaler with the default configuration is used.
device:
    The PyTorch device to run the model on. When this argument is
    set to "None", the default device for the currently active Thinc
    ops is used.
serialize_model:
    Callback that receives the wrapped PyTorch model as its argument and
    returns a "bytes" representation of the same. The representation should
    contain all the necessary information to fully deserialize the model.
deserialize_model:
    Callback that receives the default PyTorch model (passed to the constructor), the
    serialized "bytes" representation and a PyTorch device. It should return a
    fully deserialized model on the target device as its result.
model	optimizermixed_precisiongrad_scalerdevicetorch.deviceserialize_modeldeserialize_modelc	                   > [         TU ]  XU5        Uc
  [        5       nUb  UR                  U5        Uc  [	        U5      nUR                  U5        XPl        X@l        Ub  UO[        U l	        Ub  UO[        U l        [        R                  ba  [        [        5       [        5      (       aC  [         R"                  " 5       n	SU	;  a'  SSKJn
  [)        S5        U
" 5       R+                  5         g g g g )Npytorchr   )get_default_memory_pool)super__init__r   tor   to__grad_scaler_mixed_precisiondefault_serialize_torch_model_serialize_modeldefault_deserialize_torch_model_deserialize_modelr
   xp
isinstancer   r   getcupyr'   r   free_all_blocks)selfr   configr   r   r    r!   r#   r$   poolsr'   	__class__s              M/home/james-whalen/.local/lib/python3.13/site-packages/thinc/shims/pytorch.pyr)   PyTorchShim.__init__0   s     		2>-/FHHV+O<K' / * . 	 !, 0 	 ::!j1BG&L&L!%%'E%8!),')99;	 & 'M!    c                 Z    U(       a  U R                  U5      $ U R                  U5      S 4$ )Nc                     g)N. )as    r;   <lambda>&PyTorchShim.__call__.<locals>.<lambda>a   s    3r=   )begin_updatepredict)r7   inputsis_trains      r;   __call__PyTorchShim.__call__]   s*    $$V,,<<'66r=   c                 |    [        U R                  R                  5       S 5      nUc
  [        5       $ UR                  $ N)next_model
parametersr   r!   )r7   ps     r;   r!   PyTorchShim.devicec   s2    '')409+--88Or=   rF   returnc                    U R                   R                  5         [        R                  " 5          [        R                  " SU R
                  S9   U R                   " UR                  0 UR                  D6nSSS5        SSS5        U R                   R                  5         W$ ! , (       d  f       N2= f! , (       d  f       N;= f)zPass inputs through to the underlying PyTorch model, and return the
output. No conversions are performed. The PyTorch model is set into
evaluation mode.
cuda)device_typeenabledN)	rM   evalr   no_gradautocastr-   argskwargstrain)r7   rF   outputss      r;   rE   PyTorchShim.predictk   s    
 	]]_
 FD<Q<QR++v{{DfmmD S  	 SR _s#    B4'B#7B4#
B1	-B44
Cc                   ^ ^ T R                   R                  5         [        R                  " ST R                  S9   T R                   " TR
                  0 TR                  D6nSSS5        UU 4S jnWU4$ ! , (       d  f       N= f)a  Pass the inputs through to the underlying PyTorch model, keeping
track of which items in the input are tensors requiring gradients.
If the model returns a single value, it is converted into a one-element tuple.
Return the outputs and a callback to backpropagate.
rS   )rU   Nc                   > TR                   R                  U R                  S   SS9U R                  S'   [        R                  R
                  " U R                  0 U R                  D6  / n[        R                  " TR                  R                  5       [        S T5      5       H-  nUR                  c  M  UR                  UR                  5        M/     TR                   R                  U5      nU(       a  S nOS n[        S UT5      $ )Ngrad_tensorsT)inplacec                     [        U S5      $ Ngradhasattrxs    r;   rB   <PyTorchShim.begin_update.<locals>.backprop.<locals>.<lambda>   s    GAv,>r=   c                 h    U R                   b  U R                   R                  5       $ U R                   $ rK   )rd   zero_rg   s    r;   rB   ri      s#    qvv7IQVV\\^%Uqvv%Ur=   c                     U R                   $ rK   )rd   rg   s    r;   rB   ri      s    QVVr=   c                     [        U S5      $ rc   re   rg   s    r;   rB   ri      s    wq&/Ar=   )r,   scalerZ   r   autogradbackwardrY   	itertoolschainrM   rN   r   rd   appendunscaler   )gradsr`   
torch_data	found_infgrad_getrF   r7   s        r;   backprop*PyTorchShim.begin_update.<locals>.backprop   s     ,0+<+<+B+B^,d ,C ,ELL( NN##UZZ@5<<@ L'oo&&(!">G
 ??. ''
8 ))11,?I U+$%A8VTTr=   )rM   r[   r   rX   r-   rY   rZ   )r7   rF   outputry   s   ``  r;   rD   PyTorchShim.begin_update{   sj     	 ^^FD,A,AB[[&++??F C	U> xE CBs   'A66
Bc                    U R                   R                  5        H  u  p#UR                  c  M  U R                  R                  (       dt  U" U R
                  U4[        [        [        UR                  5      5      [        [        [        UR                  5      5      5      u  pE[        USUR                  S9Ul	        UR                  R                  5         M     U R                  R                  5         g )NT)requires_gradr!   )rM   named_parametersrd   r,   rw   idr   r   r   datar   r!   rk   update)r7   r   namerv   paramrd   s         r;   finish_updatePyTorchShim.finish_update   s     $ < < >D*))33"+$Xx
'@AXx
'@A#KE
 '/T*:K:K'JO %%' !? 	  "r=   c              #   .  #    SU R                    S3n0 nUR                  5        HU  u  pE[        US5      (       d  M  UR                  U5      (       d  M0  [	        XPR
                  S9X4R                  US5      '   MW     U(       a  U R                  R                  5       R                  5        VVs0 s H  u  pEXER                  5       _M     nnnU R                  R                  U5        S v   U R                  R                  U5        g S v   g s  snnf 7f)Npytorch__
startswith)r!    )r   itemsrf   r   r   r!   replacerM   
state_dictcloneload_state_dict)r7   params
key_prefixr   kvbackups          r;   
use_paramsPyTorchShim.use_params   s     y*

LLNDAq,''ALL,D,D8@;;8W
99Z45 # /3{{/E/E/G/M/M/OP/Otqal/OFPKK''
3KK''/ Qs   7DDAD0DA	DrT   	device_idc                     US:X  a  U R                   R                  5         g US:X  a  U R                   R                  U5        g SU S3n[        U5      e)NcpugpuzInvalid device_type: z. Try 'cpu' or 'gpu')rM   r   rS   
ValueError)r7   rT   r   msgs       r;   	to_devicePyTorchShim.to_device   sK    %KKOOE!KKY')+6JKCS/!r=   c                     U R                  U R                  5      nU R                  US.n[        R                  " U5      $ )N)r8   state)r/   rM   cfgsrslymsgpack_dumps)r7   model_bytesr   s      r;   to_bytesPyTorchShim.to_bytes   s5    ++DKK8K8""3''r=   c                     [        5       n[        R                  " U5      nUS   U l        U R	                  U R
                  US   U5      U l        U R                  R                  U5        U $ )Nr8   r   )r   r   msgpack_loadsr   r1   rM   r,   r+   )r7   
bytes_datar!   r   s       r;   
from_bytesPyTorchShim.from_bytes   s\    )+!!*-x=--dkk3w<Pf%r=   )r1   r,   r-   rM   r/   r   )NNFNNNN)__name__
__module____qualname____firstlineno____doc__r   boolr   r   r   bytesr)   rH   propertyr!   r   rE   rD   r   r   
contextlibcontextmanagerr   strintr   r   r   __static_attributes____classcell__)r:   s   @r;   r   r      s    6  %37+/<@SW+<+< 	+<
 +< /0+< (+< "(C5%<"89+< $Hc5.-I3-N$OP+< +<Z7  j S  / : / b#y #$  "S "S "(
 r=   r   r   rQ   c                     [        5       n[        R                  " U R                  5       U5        UR	                  S5        UR                  5       $ )zSerializes the parameters of the wrapped PyTorch model to bytes.

model:
    Wrapped PyTorch model.

Returns:
    A `bytes` object that encapsulates the serialized model parameters.
r   )r   r   saver   seekgetvalue)r   filelikes     r;   r.   r.      s=     yH	JJu!8,MM!r=   state_bytesr!   r"   c                     [        U5      nUR                  S5        U R                  [        R                  " X2S95        U R                  U5        U $ )a  Deserializes the parameters of the wrapped PyTorch model and
moves it to the specified device.

model:
    Wrapped PyTorch model.
state_bytes:
    Serialized parameters as a byte stream.
device:
    PyTorch device to which the model is bound.

Returns:
    The deserialized model.
r   )map_location)r   r   r   r   loadr*   )r   r   r!   r   s       r;   r0   r0      sB      {#HMM!	%**XCD	HHVLr=   )%r   rq   ior   typingr   r   r   r   r   r   backendsr
   r   r   r   compatr   
optimizersr   typesr   r   utilr   r   r   r   r   pytorch_grad_scalerr   shimr   r   r   r.   r0   r@   r=   r;   <module>r      s       6 6  Q Q  " (  3 J$ JZ  ",:r=   