
    h              
          S SK r S SKJrJrJr  S SKrS SKJrJr  S SKJ	r	  S SK
Jr  \	R                  " \R                  5      \	R                  " S5      :  a,  S SKJr  \" \" SSS	9\" SSS	9S
.5      \" SSS	9\" SSS	9S.S.rO \" SSS	9\" SSS	9S
./\" SSS	9\" SSS	9S.S.r SS\S\S   S\\   4S jjrS\4S jr SS\S\S\\   S\\   4S jjrg)    N)CallableLiteralOptional)DatasetValue)version)AutoTokenizerz4.0.0)Liststring)dtypeid)contentrole)
completionprompt)chatmlinstruction	tokenizermessages_field)messagesconversationstoolsc                    ^ ^^ UU U4S jnU$ )z
return a callable function that takes in a "messages" dataset and returns a formatted dataset, based on the
tokenizer apply chat template to the dataset along with the schema of the list of functions in the tools list.
c           	         > [        U T   S   [        5      (       aH  / n[        [        U T   5      5       H)  nUR	                  TR                  U T   U   STS95        M+     U$ TR                  U T   STS9$ )Nr   F)tokenizer   
isinstancelistrangelenappendapply_chat_template)examplesoutput_textsir   r   r   s      W/home/james-whalen/.local/lib/python3.13/site-packages/trl/extras/dataset_formatting.pyformat_dataset9conversations_formatting_function.<locals>.format_dataset.   s    h~.q1488L3x789##11(>2J12MX]ej1k :  00.1ITYaf0gg     )r   r   r   r'   s   ``` r&   !conversations_formatting_functionr+   &   s    	h r)   c                    ^  U 4S jnU$ )z
return a callable function that takes in an "instructions" dataset and returns a formatted dataset, based on the
tokenizer apply chat template to the dataset
c                 "  > [        U S   [        5      (       aW  / n[        [        U S   5      5       H8  nSU S   U   S.SU S   U   S./nUR	                  TR                  USS95        M:     U$ SU S   S.SU S   S./nTR                  USS9$ )Nr   user)r   r   	assistantr   F)r   r   )r#   r$   r%   converted_sampler   s       r&   r'   8instructions_formatting_function.<locals>.format_datasetB   s    hx($//L3x123#0B10EF(Xl5KA5NO$  ##I$A$ABR]b$A$cd 4    HX,>?$,1GH  001AE0RRr)   r*   )r   r'   s   ` r&    instructions_formatting_functionr2   <   s    S" r)   datasetreturnc                    [        U [        5      (       a  SU R                  ;   a=  U R                  S   [        S   :X  a#  [        R
                  " S5        [        USU5      $ SU R                  ;   a?  U R                  S   [        S   :X  a#  [        R
                  " S5        [        USU5      $  gU R                  [        S   :X  a!  [        R
                  " S5        [        U5      $ g)aY  
Finds the correct formatting function based on the dataset structure. Currently supported datasets are:
- `ChatML` with [{"role": str, "content": str}]
- `instruction` with [{"prompt": str, "completion": str}]

Args:
    dataset (Dataset): User dataset
    tokenizer (AutoTokenizer): Tokenizer used for formatting
    tools (list, *optional*): List of tools (callable functions) that will be accessible to the model.
        If the template does not support function calling, this argument will have no effect.

Returns:
    Callable: Formatting function if the dataset format is supported else None
r   r   z%Formatting dataset with chatml formatr   r   z*Formatting dataset with instruction formatN)r   r   featuresFORMAT_MAPPINGlogginginfor+   r2   )r3   r   r   s      r&    get_formatting_func_from_datasetr:   V   s    " '7##)))
+~h/GGDE8JPUVVg...0N84LLDE8OUZ[[ M 	 !>>LLEF3I>>r)   )N)r8   typingr   r   r   datasetsr   r   	packagingr   transformersr	   parse__version__r
   r7   r   r+   r2   r:   r*   r)   r&   <module>rA      s.    . .  #  & ==%%&'--*@@ 5xD#A5W_dhKijk&+(t&DPU\dimPnoN  %8=uS[`dGefg&+(t&DPU\dimPnoN mq.56Q.R[cdh[i, 6 IM!.7?~hr)   