ó
    ²—ýhJ
  ã                   ó&   • S SK Jr  S\S\S\4S jrg)é    )ÚCallableÚmax_completion_lenÚsoft_punish_cacheÚreturnc                 ó^   ^ ^• S[         [         [              S[         [           4U U4S jjnU$ )a  
Reward function that penalizes overlong completions. It is used to penalize overlong completions, but not to reward
shorter completions. Reference: Eq. (13) from the DAPO paper (https://huggingface.co/papers/2503.14476)

$$
R_{\text{length}}(y) = \begin{cases}
0, & |y| \le L_{\max} - L_{\text{cache}} \\
\dfrac{(L_{\max} - L_{\text{cache}}) - |y|}{L_{\text{cache}}}, & L_{\max} - L_{\text{cache}} < |y| \le L_{\max} \\
-1, & L_{\max} < |y|
\end{cases}
$$

Args:
    max_completion_len (`int`):
        Maximum length of the completion,  \( L_{\max} \).
    soft_punish_cache (`int`):
        Minimum length of the completion,  \( L_{\text{cache}} \). If set to `0`, no minimum length is applied.

Example:
```python
from trl.rewards import get_soft_overlong_punishment

soft_overlong_punishment = get_soft_overlong_punishment(max_completion_len=100, soft_punish_cache=20)
completion_ids = [[1] * 90]  # simulating a completion with 90 tokens. 90 is between 80 and 100.
rewards = soft_overlong_punishment(completion_ids)
print(rewards)  # [-0.5]
```
Úcompletion_idsr   c                 óì   >• / nU  Hj  n[        U5      nUTT-
  ::  a  UR                  S5        M*  TT-
  Us=:  a  T::  a  O  OUR                  TT-
  U-
  T-  5        MY  UR                  S5        Ml     U$ )z4Reward function that penalizes overlong completions.g        g      ð¿)ÚlenÚappend)r   ÚkwargsÚrewardsÚidsÚcompletion_lengthr   r   s        €€ÚS/home/james-whalen/.local/lib/python3.13/site-packages/trl/rewards/other_rewards.pyÚsoft_overlong_punishment_rewardÚEget_soft_overlong_punishment.<locals>.soft_overlong_punishment_reward2   s~   ø€ àˆÛ!ˆCÜ # C£ÐØ Ð$6Ð9JÑ$JÓJØ—‘˜sÖ#Ø#Ð&7Ñ7Ð:KÕaÐOaÖaØ—‘Ð 2Ð5FÑ FÐIZÑ ZÐ^oÑoÖpà—‘˜tÖ$ñ "ð ˆó    )ÚlistÚintÚfloat)r   r   r   s   `` r   Úget_soft_overlong_punishmentr      s2   ù€ ð>¼¼TÄ#¹Y¹ð ÔVZÔ[`ÑVa÷ ð ð +Ð*r   N)Úcollections.abcr   r   r   © r   r   Ú<module>r      s%   ðõ  %ð,+°Sð ,+ÈSð ,+ÐU]õ ,+r   