
    oi                         S /r SSKrSSKJrJr  SSKJr  SSKJr  SSKrSSK	J
r  SSK	JrJr  SS	KJrJrJrJrJr  SS
KJrJrJrJrJr  \   S S\S\\   S\\   S\\   S\S\4S jj5       r \      S!S\S\S\\   S\\   S\S\\   S\S\S\4S jj5       r  S r!  SSK"r#\$" \#RJ                  RL                  RN                  S5      (       d  \(" \#RJ                  RL                  RN                  5      r)\) H  r*\*RW                  S5      (       a  M  \," \#RJ                  RL                  RN                  \*5      r*\$" \*S5      (       d  MR  \$" \*RZ                  S5      (       a  Ml  \" \*RZ                  5      \*l-        M     S\#RJ                  RL                  RN                  l.        g! \/ a,  r0\(       a  \Rb                  " S\" \05       35         Sr0C0gSr0C0ff = f)"patch_torch_functions    N   )torch_compileUNSLOTH_ENABLE_LOGGING)logger)Tensor)
functional)
_reductiongrad)handle_torch_functionhas_torch_functionhas_torch_function_variadic	normalizenp)CallableListOptionalTupleUnioninputnormalized_shapeweightbiasepsreturnc           
      (   [        XU5      (       a.  [        [        XU4U UUUUS9R                  U R                  5      $ [
        R                  " XX#U[
        R                  R                  R                  5      R                  U R                  5      $ )zpApply Layer Normalization for last certain number of dimensions.

See :class:`~torch.nn.LayerNorm` for details.
)r   r   r   )	r   r   
layer_normtodtypetorchbackendscudnnenabled)r   r   r   r   r   s        [/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/patch_torch_functions.pyr   r   '   s     #5$77$D!
 "U[[/	 sENN4H4H4P4Pbo    targetsize_averageignore_indexreduce	reductionlabel_smoothingc                 t   [        XU5      (       a1  [        [        XU4U UUUUUUUS9
R                  U R                  5      $ Uc  Ub  [
        R                  " X55      n[        R                  R                  R                  U UU[
        R                  " U5      UU5      R                  U R                  5      $ )a  Compute the cross entropy loss between input logits and target.

See :class:`~torch.nn.CrossEntropyLoss` for details.

Args:
    input (Tensor) : Predicted unnormalized logits;
        see Shape section below for supported shapes.
    target (Tensor) : Ground truth class indices or class probabilities;
        see Shape section below for supported shapes.
    weight (Tensor, optional): a manual rescaling weight given to each
        class. If given, has to be a Tensor of size `C`
    size_average (bool, optional): Deprecated (see :attr:`reduction`). By default,
        the losses are averaged over each loss element in the batch. Note that for
        some losses, there multiple elements per sample. If the field :attr:`size_average`
        is set to ``False``, the losses are instead summed for each minibatch. Ignored
        when reduce is ``False``. Default: ``True``
    ignore_index (int, optional): Specifies a target value that is ignored
        and does not contribute to the input gradient. When :attr:`size_average` is
        ``True``, the loss is averaged over non-ignored targets. Note that
        :attr:`ignore_index` is only applicable when the target contains class indices.
        Default: -100
    reduce (bool, optional): Deprecated (see :attr:`reduction`). By default, the
        losses are averaged or summed over observations for each minibatch depending
        on :attr:`size_average`. When :attr:`reduce` is ``False``, returns a loss per
        batch element instead and ignores :attr:`size_average`. Default: ``True``
    reduction (str, optional): Specifies the reduction to apply to the output:
        ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
        ``'mean'``: the sum of the output will be divided by the number of
        elements in the output, ``'sum'``: the output will be summed. Note: :attr:`size_average`
        and :attr:`reduce` are in the process of being deprecated, and in the meantime,
        specifying either of those two args will override :attr:`reduction`. Default: ``'mean'``
    label_smoothing (float, optional): A float in [0.0, 1.0]. Specifies the amount
        of smoothing when computing the loss, where 0.0 means no smoothing. The targets
        become a mixture of the original ground truth and a uniform distribution as described in
        `Rethinking the Inception Architecture for Computer Vision <https://arxiv.org/abs/1512.00567>`__. Default: :math:`0.0`.

Shape:
    - Input: Shape :math:`(C)`, :math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1`
      in the case of `K`-dimensional loss.
    - Target: If containing class indices, shape :math:`()`, :math:`(N)` or :math:`(N, d_1, d_2, ..., d_K)` with
      :math:`K \geq 1` in the case of K-dimensional loss where each value should be between :math:`[0, C)`.
      If containing class probabilities, same shape as the input and each value should be between :math:`[0, 1]`.

    where:

    .. math::
        \begin{aligned}
            C ={} & \text{number of classes} \\
            N ={} & \text{batch size} \\
        \end{aligned}

Examples::

    >>> # Example of target with class indices
    >>> input = torch.randn(3, 5, requires_grad=True)
    >>> target = torch.randint(5, (3,), dtype=torch.int64)
    >>> loss = F.cross_entropy(input, target)
    >>> loss.backward()
    >>>
    >>> # Example of target with class probabilities
    >>> input = torch.randn(3, 5, requires_grad=True)
    >>> target = torch.randn(3, 5).softmax(dim=1)
    >>> loss = F.cross_entropy(input, target)
    >>> loss.backward()
)r   r'   r(   r)   r*   r+   )r   r   cross_entropyr   r   
_Reductionlegacy_get_stringr    _C_nncross_entropy_lossget_enum)r   r&   r   r'   r(   r)   r*   r+   s           r$   r-   r-   C   s    X #5&99$F#%%+
 "U[[/	 6#500F	88<<**I& 	bor%   c                      [        [        R                  R                  S5      (       de  [        R                  R                  R                  [        R                  R                  l        [        [        R                  R                  l        g g )N_uncompiled_layer_norm)hasattrr    nnr	   r   r5    r%   r$   r   r      sT    588&&(@AA5:XX5H5H5S5S2)3& Br%   __UNSLOTH_PATCHED___forwardget_compiler_configTz)TorchAO patching failed with exception = )NNgh㈵>)NNiNmeang        )2__all__ostemporary_patches.commonr   r   logr   r    r   torch.nnr	   Fr
   r.   r   torch.nn.functionalr   r   r   r   r   typingr   r   r   r   r   intfloatr   boolstrr-   r   'torchao.quantization.qat.fake_quantizertorchaor6   quantizationqatfake_quantizerdirqat_classes	qat_class
startswithgetattrr;   r9   	Exceptionewarningr8   r%   r$   <module>rW      sT  &  
 K    $ 3  : 9   $!3i V 6
	
 
  0    $#'! aaa Va 4.	a
 a TNa a a a aD 4 M27''++::<QRR'..22AAB$I##C((( 4 4 8 8 G GSIy),,y002GHH(5i6G6G(HI% % GK  //C   MB3q6(KL Ms&   "B&F' F' &A F' 'G-"GG