
    h                     P    S SK JrJrJrJrJr  SSKJrJr  SSK	J
r
   " S S5      rg)    )DictIterableListUnioncast   )has_torch_amptorch)is_torch_arrayc                       \ rS rSrSr     SS\S\S\S\S\4
S jjrS	 r	 SS
\
S\S   4   S\
S\S   4   4S jjrSSS\S   S\4S jrS r\S 5       rS rS rSrg)PyTorchGradScaler   a  
Gradient scaler for the PyTorch shim.

Gradients with small magnitudes are not representable in half-precision and
will underflow to zero. A gradient scaler counters this issue by scaling
up the loss before backpropagation, increasing the gradients by the same
magnitude. A large enough scale will avoid that the gradients underflow.
The gradients are unscaled in single precision after backpropagation, to
provide the unscaled gradients to the optimizer.
enabled
init_scalebackoff_factorgrowth_factorgrowth_intervalc                     Xl         X@l        X0l        XPl        [        R
                  " SS[        R                  S9U l        [        R
                  " SU5      U l        SU l	        g)a  
Construct a gradient scaler for the PyTorch shim.

enabled (bool):
    Sets whether the gradient scalar is enabled. If it is disabled, the
    methods of the grad scaler are no-ops.

init_scale (float):
    The initial scale used to increase the gradient magnitude.

backoff_factor (float):
    The scale will be multiplied by this factor if any of the gradients
    overflows.

growth_factor (float):
    The scale will be multiplied by this factor when none of the gradients
    overflowed for "growth_interval" steps.

growth_interval (int):
    When no overflows were found for this number of steps, the scale will
    be multiplied by "growth_factor".
   r   )dtypeFN)
_enabled_growth_factor_backoff_factor_growth_intervalr
   fullint_growth_tracker_scale
_found_inf)selfr   r   r   r   r   s         Y/home/james-whalen/.local/lib/python3.13/site-packages/thinc/shims/pytorch_grad_scaler.py__init__PyTorchGradScaler.__init__   sM    <  +- /$zz$Cjjz2    c                     U R                   R                  U5      U l         U R                  R                  U5      U l        g N)r   tor   )r!   devices     r"   to_PyTorchGradScaler.to_:   s/    #3366v>kknnV,r%   tensorstorch.Tensorreturnc                 p   U R                   (       d  [        SU5      $ [        S5      n[        5       n[	        U5      (       a  [        SU5      nU R                  XTU5      $ [        U[        5      (       a@  / nU H6  n[	        U5      (       d  UeUR                  U R                  XTU5      5        M8     U$ Ue)z)Scale up the values in the given tensors.r-   z>Input to gradient scaling must be a Tensor or Iterable[Tensor])	r   r   
ValueErrordictr   _scale_tensor
isinstancer   append)r!   r,   inplaceincorrect_typescale_per_devicetensorscaled_tensorss          r"   scalePyTorchGradScaler.scale>   s     }}00#L

 BF'"".'2F%%fHH**N!%f--((%%&&vI	 " "!r%   r8   r7   )ztorch.devicer-   r5   c                    [         (       d  [        S5      eUR                  (       d  Sn[        U5      eUR                  nXR;  a  U R                  R                  US9X%'   X%   nU(       a  UR                  U5      $ X-  $ )NzHGradient scaling is not supported, requires capable GPU and torch>=1.9.0zGradient scaling is only supported for CUDA tensors. If you are using PyTorch models, you can avoid this error by disabling mixed-precision support.r)   )r	   r0   is_cudar)   r   r(   mul_)r!   r8   r7   r5   msgr)   r:   s          r"   r2   PyTorchGradScaler._scale_tensor^   s     }Z  ~~> 
 S/!)'+{{~~V~'D$ (;;u%%>!r%   c                     [        5       nU H0  nUR                  UR                  / 5      nUR                  U5        M2     U$ r'   )r1   
setdefaultr)   r4   )r!   r,   tensors_per_devicer8   device_tensorss        r"   _tensors_per_device%PyTorchGradScaler._tensors_per_device|   s@    !VF/::6=="MN!!&)  "!r%   c                     U R                   $ r'   )r    )r!   s    r"   	found_infPyTorchGradScaler.found_inf   s    r%   c                    U R                   (       d  gU R                  R                  5       R                  5       R	                  5       nU R                  U5      nUR                  5        H]  u  pE[        R                  " SSUS9nUR                  US9n[        R                  " XVU5        [        US:g  5      (       d  MV  SU l        M_     U R                  $ )zNUnscale the given tensors. Returns True if any of the gradients were infinite.Fr           r=   r   T)r   r   double
reciprocalfloatrF   itemsr
   r   r(   *_amp_foreach_non_finite_check_and_unscale_boolr    )r!   r,   	inv_scalerD   r)   rE   found_inf_deviceinv_scale_devices           r"   unscalePyTorchGradScaler.unscale   s    }} KK&&(335;;=	 "55g>&8&>&>&@"F$zz$FC(||6|:<<2B $)**"& 'A r%   c                 J   U R                   (       d  g[        R                  " SU R                  (       a  SOSU R                  R
                  S9n[        R                  " U R                  U R                  UU R                  U R                  U R                  5        SU l        g)z}
Update the scale factor and clear information about infinities.

This method should be called after each optimization step.
Nr   g      ?rL   r=   F)r   r
   r   r    r   r)   _amp_update_scale_r   r   r   r   )r!   rT   s     r"   updatePyTorchGradScaler.update   s}     }} ::#c$++:L:L
 	  KK    !!	
  r%   )r   r   r    r   r   r   r   N)Fg      @g      ?g       @i  )F)__name__
__module____qualname____firstlineno____doc__rR   rO   r   r#   r*   r   r   r   r:   r   r2   rF   propertyrI   rV   rZ   __static_attributes__ r%   r"   r   r      s    	 # #"#% %  %  	% 
 %  % N-
 QV^Xn-EEF	~tN33	4@"" =>" 	"<"  . r%   r   N)typingr   r   r   r   r   compatr	   r
   utilr   r   rc   r%   r"   <module>rg      s    4 4 ) !n  n r%   