
    h                        S SK JrJr  S SKJr  S SKJr  S SKJr  S SK	J
r
  \ " S S5      5       rS rS	 r\" S
5      r\S:X  a  \
" \5      r\R#                  5       S    rSSS.r\" S\S9r\R+                  \SS/ SQ\R,                  S9r\R.                  (       a6  \R/                  \R0                  5        \R/                  \R0                  SS9  ggg)    )	dataclassfield)Optional)load_dataset)	ModelCard)HfArgumentParserc                   t    \ rS rSr% Sr\" SSS0S9r\\S'   \" SSS	0S9r	\
\S
'   \" SSS0S9r\\   \S'   Srg)ScriptArguments   a  
Arguments for the script.

Args:
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether to push the dataset to the Hugging Face Hub.
    repo_id (`str`, *optional*, defaults to `"trl-lib/prm800k"`):
        Hugging Face repository ID to push the dataset to.
    dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
        Number of workers to use for dataset processing.
Fhelpz4Whether to push the dataset to the Hugging Face Hub.)defaultmetadatapush_to_hubztrl-lib/prm800kz2Hugging Face repository ID to push the dataset to.repo_idNz0Number of workers to use for dataset processing.dataset_num_proc )__name__
__module____qualname____firstlineno____doc__r   r   bool__annotations__r   strr   r   int__static_attributes__r       S/home/james-whalen/.local/lib/python3.13/site-packages/examples/datasets/prm800k.pyr
   r
      si    
 PQK  !NOGS  ',LM'hsm r   r
   c                    / nU S   S   n/ n/ nU S   S    H  nUS   c  US   c  US   c    O[        US   5       HB  u  pgXeS   :w  d  M  US   nUS S  U/-   n	US	   S
:H  n
US S  U
/-   nUR                  X)US.5        MD     US   b  US   US      nUS	   S
:H  n
OUS   b  US   nSn
O  O+US   nUR                  U5        UR                  U
5        M     UR                  X#US.5        U$ )Nquestionproblemlabelstepscompletionshuman_completionchosen_completiontextrating   )promptr$   labelsT)	enumerateappend)exampleoutputsr*   previous_completionsprevious_labelsstepcompletion_idx
completioncontentr$   r"   r+   r&   s                r   process_exampler6   3   s_   GZ +F O )&40B+C+KPTUhPiPq*3D4G*H&N&9!::$V,215	A"8,1(+ug5&X^_` +I #$0 $] 3D9L4M N%h/14E$%1 $%7 8E#F+##G,u%3 *8 NNfUdefNr   c           
      B   / n[        U S   5      n[        U5       HD  nU R                  5        VVs0 s H
  u  pEXEU   _M     nnnUR                  [	        U5      5        MF     US    VVs0 s H  oDU Vs/ s H  oUU   PM	     sn_M     nnnU$ s  snnf s  snf s  snnf )Nr"   r   )lenrangeitemsextendr6   )examplesr/   
batch_sizeidxkvr.   s          r   process_batchrA   Z   s    GXg&'JZ )1)9:)91f9)9:w/0 ! 4;1:>:a)AQ4)):G>N	 ; *>s   B,
B6BBBa  
---
tags: [trl]
---

# PRM800K Dataset

## Summary

The PRM800K dataset is a processed version of [OpenAI's PRM800K](https://github.com/openai/prm800k), designed to train models using the [TRL library](https://github.com/huggingface/trl) for stepwise supervision tasks. It contains 800,000 step-level correctness labels for model-generated solutions to problems from the MATH dataset. This dataset enables models to learn and verify each step of a solution, enhancing their reasoning capabilities.

## Data Structure

- **Format**: [Standard](https://huggingface.co/docs/trl/main/dataset_formats#standard)
- **Type**: [Stepwise supervision](https://huggingface.co/docs/trl/main/dataset_formats#stepwise-supervision)

Columns:
- `"prompt"`: The problem statement.
- `"completions"`: A list of reasoning steps generated to solve the problem.
- `"labels"`: A list of booleans or floats indicating the correctness of each corresponding reasoning step.

This structure allows models to learn the correctness of each step in a solution, facilitating improved reasoning and problem-solving abilities.

## Generation script

The script used to generate this dataset can be found [here](https://github.com/huggingface/trl/blob/main/examples/datasets/prm800k.py).
__main__zUhttps://github.com/openai/prm800k/raw/refs/heads/main/prm800k/data/phase1_train.jsonlzThttps://github.com/openai/prm800k/raw/refs/heads/main/prm800k/data/phase1_test.jsonl)traintestjson)
data_filesT
   )labeler	timestamp
generationis_quality_control_questionis_initial_screening_questionr    r"   )batchedr=   remove_columnsnum_procdataset)	repo_typeN)dataclassesr   r   typingr   datasetsr   huggingface_hubr   transformersr   r
   r6   rA   
model_cardr   parserparse_args_into_dataclassesscript_argsrF   rP   mapr   r   r   r   r   r   <module>r\      s    )  ! % )   6$N   
8 zo.F446q9K ifJ 6j9Gkk
 --  G  K//0{22iH 5 r   