
    h                     :    S SK r S\\\\\4         S\\   4S jrg)    Ncompletionsreturnc           	         SnU  Vs/ s H
  o3S   S   PM     nnU Vs/ s H9  n[         R                  " X%[         R                  [         R                  -  5      PM;     nnU Vs/ s H  ow(       a  SOSPM     sn$ s  snf s  snf s  snf )aF  
Reward function that checks if the reasoning process is enclosed within `"<think>"` and `"</think>"` tags. The
function returns a reward of 1.0 if the format is correct, otherwise 0.0.

Args:
    completions (`list[list[dict[str, str]]]`):
        List of completions to be evaluated. Each completion must be a list of one message, i.e. a dictionary
        containing the key `"content"` with the value being the text of the completion.
    **kwargs:
        Additional keyword arguments. This function does not use them, but they are required in the function
        signature to ensure compatibility with trainers like [`GRPOTrainer`].

Returns:
    `list[float]`:
        A list of rewards, where each reward is 1.0 if the completion matches the expected format, otherwise 0.0.

Example:
```python
>>> from trl.rewards import think_format_reward

>>> completions = [
...     [{"content": "<think>\nThis is my reasoning.\n</think>\nThis is my answer."}],
...     [{"content": "<think>\nThis is my reasoning.\nThis is my answer."}],
... ]
>>> think_format_reward(completions)
[1.0, 0.0]
```
z%^<think>(?!.*<think>)(.*?)</think>.*$r   contentg      ?g        )rematchDOTALL	MULTILINE)r   kwargspattern
completioncompletion_contentsr   matchesr   s           T/home/james-whalen/.local/lib/python3.13/site-packages/trl/rewards/format_rewards.pythink_format_rewardr      s{    : 7GFQRk
a=3kRSfgSfrxx"))bll*BCSfGg/67we5Cc!w77 Sg7s   A;A B $B)r   listdictstrfloatr        r   <module>r      s4    
 8T$tCH~*>%?  8dSXk  8r   