
    h                        S SK JrJr  S SKJr  S SKJr  S SKJr  S SK	J
r
  \ " S S5      5       rS rS	 r\" S
5      r\S:X  a  \
" \5      r\R#                  5       S    r\" SSS9r\R)                  \/ SQ\R*                  S9r\R-                  \5      r\R/                  SSS9r\R0                  (       a6  \R1                  \R2                  5        \R1                  \R2                  SS9  ggg)    )	dataclassfield)Optional)load_dataset)	ModelCard)HfArgumentParserc                   t    \ rS rSr% Sr\" SSS0S9r\\S'   \" SSS	0S9r	\
\S
'   \" SSS0S9r\\   \S'   Srg)ScriptArguments   a  
Arguments for the script.

Args:
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether to push the dataset to the Hugging Face Hub.
    repo_id (`str`, *optional*, defaults to `"trl-lib/ultrafeedback-prompt"`):
        Hugging Face repository ID to push the dataset to.
    dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
        Number of workers to use for dataset processing.
Fhelpz4Whether to push the dataset to the Hugging Face Hub.)defaultmetadatapush_to_hubztrl-lib/ultrafeedback-promptz2Hugging Face repository ID to push the dataset to.repo_idNz0Number of workers to use for dataset processing.dataset_num_proc )__name__
__module____qualname____firstlineno____doc__r   r   bool__annotations__r   strr   r   int__static_attributes__r       `/home/james-whalen/.local/lib/python3.13/site-packages/examples/datasets/ultrafeedback-prompt.pyr
   r
      si    
 PQK  .NOGS  ',LM'hsm r   r
   c                     SU S   S./nSU0$ )Nuserinstruction)rolecontentpromptr   )exampler$   s     r   to_unpaired_preferencer&   3   s    '-*@ABFfr   c                 6    [        U S   S   S   5      S:  a  gg)Nr$   r   r#   i   FT)len)r%   s    r   drop_long_promptr)   8   s$    
78Q	*+c1r   a  
---
tags: [trl]
---

# UltraFeedback - Prompts Dataset

## Summary

The UltraFeedback - Prompts dataset is a processed version of the [UltraFeedback](https://huggingface.co/datasets/openbmb/UltraFeedback) dataset for model evaluation on specific aspects like helpfulness, honesty, and instruction-following.

## Data Structure

- **Format**: [Conversational](https://huggingface.co/docs/trl/main/dataset_formats#conversational)
- **Type**: [Prompt-only](https://huggingface.co/docs/trl/main/dataset_formats#prompt-only)

Column:
- `"prompt"`: The input question or instruction provided to the model.

## Generation script

The script used to generate this dataset can be found [here](https://github.com/huggingface/trl/blob/main/examples/datasets/ultrafeedback-prompt.py).
__main__zopenbmb/UltraFeedbacktrain)split)sourcer!   modelscompletionscorrect_answersincorrect_answers)remove_columnsnum_procg?*   )	test_sizeseeddataset)	repo_typeN)dataclassesr   r   typingr   datasetsr   huggingface_hubr   transformersr   r
   r&   r)   
model_cardr   parserparse_args_into_dataclassesscript_argsr7   mapr   filtertrain_test_splitr   r   r   r   r   <module>rE      s   )  ! % )   6
   
0 zo.F446q9K2'BGkkq--  G
 nn-.G&&B&?GK//0{22iH  r   