ó
    ²—ýhZ  ã                   ó   • S r S\4S jrg)a#  Computes cross-entropy loss using the logits generated by performing
    the matrix multiplication between the embeddings (e) and classifier (c).

    This method saves GPU memory by not materializing the logits into GPU
    main memory.


    Specifically, this computes

    ```python

    loss = F.cross_entropy((e @ c.T).float(), targets)
    ```

    without allocating the intermediary (e @ c.T).float() matrix.

    :param e: Embedding of the inputs used to compute the logits. Shape (..., D)
    :param c: Classifier matrix. Shape (NumClasses, D)
    :param targets: The target class for each input. Values must be in [0, NumClasses). Shape (...)
    :param ignore_index: If an input as a target of this value, it is ignored in the loss computation.
    :param softcap: The value for logit softcapping.
    :param reduction: The reduction to perform over the loss. Supports "mean", "sum", and "none".
    :param shift: If true, the embedding and targets are assumed to require a shift along the
        temporal axis to perform next token prediction. Specifically, setting this to true
        will efficiently compute

        ```python
        shift_e = e[..., :-1, :].flatten(0, -2)
        shift_targets = targets[..., 1:].flatten()

        loss = F.cross_entropy((shift_e @ c.T), targets)
        ```
Údocstrc                  ó   ^ • U 4S jnU$ )Nc                 ól   >• SR                  T5      U R                  b  U R                  OS-   U l        U $ )NÚ )ÚjoinÚ__doc__)Úfnr   s    €ÚO/home/james-whalen/.local/lib/python3.13/site-packages/cut_cross_entropy/doc.pyÚadd_docÚadd_doc_start.<locals>.add_doc&   s,   ø€ Ø—W‘W˜V“_°b·j±jÑ6L¨¯
ª
ÐRTÑUˆŒ
àˆ	ó    © )r   r
   s   ` r	   Úadd_doc_startr   %   s   ø€ õð
 €Nr   N)ÚLINEAR_CROSS_ENTROPY_DOCÚstrr   r   r   r	   Ú<module>r      s   ðð Ð ðF˜3õ r   