
    h                         S SK JrJr  S SKJr  S SKJr  \ " S S5      5       rS r\	S:X  aD  \" \5      r
\
R                  5       S    r\" \R                  \R                  \R                  5        gg)	    )	dataclassfield)Dataset)HfArgumentParserc                   n    \ rS rSr% Sr\" SSS0S9r\\S'   \" SSS	0S9r	\
\S
'   \" SSS0S9r\\S'   Srg)ScriptArguments   a  
Arguments for the script.

Args:
    test_size (`float`, *optional*, defaults to `0.1`):
        Fraction of the dataset to include in the test split.
    push_to_hub (`bool`, *optional*, defaults to `False`):
        Whether to push the dataset to the Hugging Face Hub.
    repo_id (`str`, *optional*, defaults to `"trl-internal-testing/zen"`):
        Hugging Face repository ID to push the dataset to.
g?helpz5Fraction of the dataset to include in the test split.)defaultmetadata	test_sizeFz4Whether to push the dataset to the Hugging Face Hub.push_to_hubztrl-internal-testing/zenz2Hugging Face repository ID to push the dataset to.repo_id N)__name__
__module____qualname____firstlineno____doc__r   r   float__annotations__r   boolr   str__static_attributes__r       V/home/james-whalen/.local/lib/python3.13/site-packages/scripts/generate_zen_dataset.pyr   r      sd    
 QRIu  PQK  *NOGS r   r   c                 v   [         R                  " S/ SQ05      nUR                  U SS9nU(       a  UR                  USS9  [         R                  " S/ SQ05      nUR                  U SS9nU(       a  UR                  US	S9  [         R                  " / SQ/ S
QS.5      nUR                  U SS9nU(       a  UR                  USS9  [         R                  " / SQ/ S
Q/ SQS.5      nUR                  U SS9nU(       a  UR                  USS9  [         R                  " / SQ/ SQS.5      nUR                  U SS9nU(       a  UR                  USS9  [         R                  " / SQ/ SQ/ SQS.5      nUR                  U SS9nU(       a  UR                  USS9  [         R                  " / SQSS// SQSS/SS/SS /S!/S"/S#S$/S%S&/S'S(/S)// S*QS+S,/S-/S./S//S0/S1S2/S3//SS4// S5QSS4/S4S4/S4S/S4/S/S4S/SS/SS/S4// S6QS4S4/S/S4/S/S/S4S4/S//S7.5      n	U	R                  U SS9n	U(       a  U	R                  US8S9  [         R                  " S9S:S;S<.S=S>S<./S:S?S<.S=S@S<./S:SAS<.S=SBS<./S:SCS<.S=SDS<./S:SES<.S=SFS<./S:SGS<.S=SHS<./S:SIS<.S=SJS<./S:SKS<.S=SLS<./S:SMS<.S=SNS<./S:SOS<.S=SPS<./S:SQS<.S=SRS<./S:SSS<.S=STS<./S:SUS<.S=SVS<./S:SWS<.S=SXS<./S:SYS<.S=SZS<./S:S[S<.S=S\S<./S:S]S<.S=S^S<./S:S_S<.S=S`S<./S:SaS<.S=SbS<.//05      n
U
R                  U SS9n
U(       a  U
R                  UScS9  [         R                  " SS:S;S<./S:S?S<./S:SAS<./S:SCS<./S:SES<./S:SGS<./S:SIS<./S:SKS<./S:SMS<./S:SOS<./S:SQS<./S:SSS<./S:SUS<./S:SWS<./S:SYS<./S:S[S<./S:S]S<./S:S_S<./S:SaS<.//05      nUR                  U SS9nU(       a  UR                  USdS9  [         R                  " S:S;S<./S:S?S<./S:SAS<./S:SCS<./S:SES<./S:SGS<./S:SIS<./S:SKS<./S:SMS<./S:SOS<./S:SQS<./S:SSS<./S:SUS<./S:SWS<./S:SYS<./S:S[S<./S:S]S<./S:S_S<./S:SaS<.//S=S>S<./S=S@S<./S=SBS<./S=SDS<./S=SFS<./S=SHS<./S=SJS<./S=SLS<./S=SNS<./S=SPS<./S=SRS<./S=STS<./S=SVS<./S=SXS<./S=SZS<./S=S\S<./S=S^S<./S=S`S<./S=SbS<.//S.5      nUR                  U SS9nU(       a  UR                  USeS9  [         R                  " S:S;S<./S:S?S<./S:SAS<./S:SCS<./S:SES<./S:SGS<./S:SIS<./S:SKS<./S:SMS<./S:SOS<./S:SQS<./S:SSS<./S:SUS<./S:SWS<./S:SYS<./S:S[S<./S:S]S<./S:S_S<./S:SaS<.//S=S>S<./S=S@S<./S=SBS<./S=SDS<./S=SFS<./S=SHS<./S=SJS<./S=SLS<./S=SNS<./S=SPS<./S=SRS<./S=STS<./S=SVS<./S=SXS<./S=SZS<./S=S\S<./S=S^S<./S=S`S<./S=SbS<.//S=SfS<./S=SgS<./S=ShS<./S=SiS<./S=SjS<./S=SkS<./S=SlS<./S=SmS<./S=SnS<./S=SoS<./S=SpS<./S=SqS<./S=SrS<./S=SsS<./S=StS<./S=SuS<./S=SvS<./S=S^S<./S=SwS<.//S.5      nUR                  U SS9nU(       a  UR                  USxS9  [         R                  " S:S;S<.S=S>S<./S:S?S<.S=S@S<./S:SAS<.S=SBS<./S:SCS<.S=SDS<./S:SES<.S=SFS<./S:SGS<.S=SHS<./S:SIS<.S=SJS<./S:SKS<.S=SLS<./S:SMS<.S=SNS<./S:SOS<.S=SPS<./S:SQS<.S=SRS<./S:SSS<.S=STS<./S:SUS<.S=SVS<./S:SWS<.S=SXS<./S:SYS<.S=SZS<./S:S[S<.S=S\S<./S:S]S<.S=S^S<./S:S_S<.S=S`S<./S:SaS<.S=SbS<.//S:S;S<.S=SfS<./S:S?S<.S=SgS<./S:SAS<.S=ShS<./S:SCS<.S=SiS<./S:SES<.S=SjS<./S:SGS<.S=SkS<./S:SIS<.S=SlS<./S:SKS<.S=SmS<./S:SMS<.S=SnS<./S:SOS<.S=SoS<./S:SQS<.S=SpS<./S:SSS<.S=SqS<./S:SUS<.S=SrS<./S:SWS<.S=SsS<./S:SYS<.S=StS<./S:S[S<.S=SuS<./S:S]S<.S=SvS<./S:S_S<.S=S^S<./S:SaS<.S=SwS<.//S.5      nUR                  U SS9nU(       a  UR                  USyS9  [         R                  " S:S;S<./S:S?S<./S:SAS<./S:SCS<./S:SES<./S:SGS<./S:SIS<./S:SKS<./S:SMS<./S:SOS<./S:SQS<./S:SSS<./S:SUS<./S:SWS<./S:SYS<./S:S[S<./S:S]S<./S:S_S<./S:SaS<.//S=S>S<./S=S@S<./S=SBS<./S=SiS<./S=SFS<./S=SHS<./S=SJS<./S=SmS<./S=SNS<./S=SoS<./S=SRS<./S=SqS<./S=SVS<./S=SsS<./S=StS<./S=S\S<./S=S^S<./S=S`S<./S=SbS<./// SzQS.5      nUR                  U SS9nU(       a  UR                  US{S9  g g )|Ntext)zBeautiful is better than ugly.z!Explicit is better than implicit.zSimple is better than complex.z#Complex is better than complicated.zFlat is better than nested.zSparse is better than dense.zReadability counts.z7Special cases aren't special enough to break the rules.z#Although practicality beats purity.z"Errors should never pass silently.zUnless explicitly silenced.z9In the face of ambiguity, refuse the temptation to guess.zEThere should be one-- and preferably only one --obvious way to do it.zBAlthough that way may not be obvious at first unless you're Dutch.Now is better than never.z0Although never is often better than *right* now.z:If the implementation is hard to explain, it's a bad idea.z@If the implementation is easy to explain, it may be a good idea.z@Namespaces are one honking great idea -- let's do more of those!F)r   shufflestandard_language_modeling)config_nameprompt)Beautiful is better thanzExplicit iszSimple is betterComplexFlat is better thanzSparse is betterReadabilityzSpecial cases aren't specialAlthough practicality beatszErrors should neverzUnless explicitly In the face of ambiguity, refusez$There should be one-- and preferablyz;Although that way may not be obvious at first unless you'rezNow iszAlthough never is oftenz)If the implementation is hard to explain,zIf the implementation is easyz Namespaces are one honking greatstandard_prompt_only) ugly.z better than implicit.z than complex. is better than complicated. nested.z than dense. counts.z enough to break the rules. purity. pass silently.z
 silenced. the temptation to guess.! only one --obvious way to do it.z Dutch. better than never.z better than *right* now. it's a bad idea.z# to explain, it may be a good idea.z  idea -- let's do more of those!)r#   
completionstandard_prompt_completion)z
 the moon. worse than nothing. than a long vacation.z is always the answer.z chocolate. without any context.z is optional. enough to become unicorns.z	 reality.z pass their driving test. forgotten.z the opportunity to laugh.z two or more confusing methods. a time traveler.z never better. not even a possibility.z it's clearly the best choice. it's probably magic.  watermelon -- let's plant some!)r#   chosenrejectedstandard_preference)z"Beautiful is better than the moon.zExplicit is worse than nothing.z&Simple is better than a long vacation.zComplex is always the answer.zFlat is better than chocolate.z%Sparse is better without any context.zReadability is optional.z7Special cases aren't special enough to become unicorns.z$Although practicality beats reality.z,Errors should never pass their driving test.zUnless explicitly forgotten.z:In the face of ambiguity, refuse the opportunity to laugh.zCThere should be one-- and preferably two or more confusing methods.zLAlthough that way may not be obvious at first unless you're a time traveler.zNow is never better.z/Although never is often not even a possibility.zGIf the implementation is hard to explain, it's clearly the best choice.z2If the implementation is easy it's probably magic.z@Namespaces are one honking great watermelon -- let's plant some!)r@   rA   #standard_implicit_prompt_preference)r+   r7   r8   r,   r-   r9   r.   r:   r/   r0   r;   r1   r2   r<   r3   r=   r4   r>   r?   )TFFTTFTFTTFTTFTFTFF)r#   r5   labelstandard_unpaired_preference)r$   zExplicit is better thanzSimple is better thanzComplex is better thanr&   zSparse is better thanzReadability countsz#Special cases aren't special enoughr(   zErrors should never passr)   z0There should be one-- and preferably only one --zAlthough that way may not bezNow is better thanzNever is often better thanz.If the implementation is hard to explain, it'sz,If the implementation is easy to explain, itzNamespaces are onez-Although practicality sometimes beats purity,z, let me think...r+   )z, of course,z
 implicit.z because clarity matters.z... let's keep it basic,z	 complex.z when needed,z complicated.z in terms of structure,r-   z... especially for readability.z  especially when others read it.z, unless...z they follow the rules.z some theoretical elegance,r/   z
 silently,z unless explicitly silenced.r1   )z way to do it,z  but sometimes it's not obvious.z3 especially when there's more than one possibility.z clear at first,z it will eventually emerge.z later.z problematic fixes.z% likely because it's too complicated.z might be a good design.z of those great ideas,z that solve many problems.z' the code should still aim for balance.T)FTF)TTF)r#   completionslabelsstandard_stepwise_supervisionmessagesuserzWhat is better than ugly?)rolecontent	assistantz
Beautiful.zWhat is better than implicit?z	Explicit.zWhat is better than complex?zSimple.z What is better than complicated?zComplex.zWhat is better than nested?zFlat.zWhat is better than dense?zSparse.zWhat counts?zReadability.z,Are special cases enough to break the rules?z;No, special cases aren't special enough to break the rules.zWhat beats purity?zPracticality.z What should never pass silently?zErrors.zWhen can errors pass silently?zWhen explicitly silenced.z,What should you do in the face of ambiguity?zRefuse the temptation to guess.z'How many ways should there be to do it?zOne, and preferably only one.z-For whom may the way not be obvious at first?zDutch.zWhat is better than never?r   z!Is never better than *right* now?zYes, often.z;What does it mean if the implementation is hard to explain?zIt means it's a bad idea.z;What does it mean if the implementation is easy to explain?zIt means it may be a good idea.zAny great ideas?z&Namespaces are one honking great idea. conversational_language_modelingconversational_prompt_only conversational_prompt_completionzAcceptable.z
Explained.zVery complex.zVery complicated.z	Circular.zHeavy.zLooking complicated.z9Yes, special cases are special enough to break the rules.zNothing.z	Warnings.zNever.zGive up.zAs many as possible.zFrench.z	Some day.z
No, never.zIt means it's a good idea.z
Recursion.conversational_preference)conversational_implicit_prompt_preference)TTTFTTTFTFTFTFFTTTT"conversational_unpaired_preference)r   	from_dicttrain_test_splitr   )r   r   r   "standard_language_modeling_datasetstandard_prompt_only_dataset"standard_prompt_completion_datasetstandard_preference_dataset+standard_implicit_prompt_preference_dataset$standard_unpaired_preference_dataset%standard_stepwise_supervision_dataset(conversational_language_modeling_dataset"conversational_prompt_only_dataset(conversational_prompt_completion_dataset!conversational_preference_dataset1conversational_implicit_prompt_preference_dataset*conversational_unpaired_preference_datasets                   r   mainrc   1   sA   )0):): 
< *&. *L)\)\gpz)\  *A&*66wLh6i#*#4#4 
6 $ . $@#P#P[dns#P#t $00F\0])0):):
*
-+< +*&X *L)\)\gpz)\  *A&*66wLh6i")"3"3
*
*
W@5 @#B #>"N"NYblq"N"r#//EZ/[292C2C
*
-+E +3/X 3^2n2n  zC  MR2n  3S/3??Uz?{+2+<+<
*
* MW,> ,,(Z ,P+`+`kt  D+`  ,E(,88Nl8m,3,=,=
, !(+G'5o.&
3.//056*J79:()x!>?K"#45'(%'CD67'
, DM DM4L5MFG5MENENF4LGFUGG4LG%
W?? ?-)@ -R,b,bmv  AF,b  -G)-99'On9o/6/@/@)DEP[htGuw)HIT_lwKxy)GHS^ktJuv)KLWboyNz{)FGR]jqIrs)EFQ\irHst8;[i:jk)WXcn  |y  [z  {)=>ap@qr)KLWboxNyz)IJU`  nI  MJ  K)WXcn  |]  [^  _)RS^i  wV  VW  X)XYdo  }E  \F  G)EFQ\  jE  IF  G)LMXcp}O~)fgr}  Kf  jg  h)fgr}  Kl  jm  n);<{  `H  ?I  J'
B 0,. 0X/h/hs|  GL/h  0M,0<<WRt<u)0):):)DEF)HIJ)GHI)KLM)FGH)EFG89)WXY)=>?)KLM)IJK)WXY)RST)XYZ)EFG)LMN)fgh)fgh);<='
< *&. *L)\)\gpz)\  *A&*66wLh6i/6/@/@)DEF)HIJ)GHI)KLM)FGH)EFG89)WXY)=>?)KLM)IJK)WXY)RST)XYZ)EFG)LMN)fgh)fgh);<='
, "l;<!k:;!i89!j9:!g67!i89!n=>!.klm!o>?!i89!.IJK!.OPQ!.MNO!h78!.IJK!m<=!.IJK!.OPQ!.VWX'
-+B +0,X 0X/h/hs|  GL/h  0M,0<<WRt<u(/(9(9)DEF)HIJ)GHI)KLM)FGH)EFG89)WXY)=>?)KLM)IJK)WXY)RST)XYZ)EFG)LMN)fgh)fgh);<='
, "l;<!k:;!i89!j9:!g67!i89!n=>!.klm!o>?!i89!.IJK!.OPQ!.MNO!h78!.IJK!m<=!.IJK!.OPQ!.VWX'
, "m<=!l;<!o>?!.ABC!k:;!h78!.DEF!.ijk!j9:!k:;!h78!j9:!.DEF!i89!k:;!l;<!.JKL!.IJK!l;<'
W@; @)%B )J(Z(Zenx}(Z(~%)55gKf5g8?8I8I)DEP[htGuv)HIT_lwKxy)GHS^ktJuv)KLWboyNz{)FGR]jqIrs)EFQ\irHst8;[i:jk)WXcn  |y  [z  {)=>ap@qr)KLWboxNyz)IJU`  nI  MJ  K)WXcn  |]  [^  _)RS^i  wV  VW  X)XYdo  }E  \F  G)EFQ\  jE  IF  G)LMXcp}O~)fgr}  Kf  jg  h)fgr}  Kl  jm  n);<{  `H  ?I  J'
, )DEP[huGvw)HIT_lxKyz)GHS^kzJ{|)KLWb  pC  OD  E)FGR]juIvw)EFQ\iqHrs8;[q:rs)WXcn  |w  [x  y)=>ak@lm)KLWbozN{|)IJU`muLvw)WXcn  |F  [G  H)RS^i  wM  VN  O)XYdo  }F  \G  H)EFQ\itHuv)LMXcp|O}~)fgr}  Kg  jh  i)fgr}  Kf  jg  h);<{_k>lm'
-+K +95X 9j8z8z  FO  Y^8z  9_59EEg  \GE  	H181B1B)DEF)HIJ)GHI)KLM)FGH)EFG89)WXY)=>?)KLM)IJK)WXY)RST)XYZ)EFG)LMN)fgh)fgh);<='
, "l;<!k:;!i89!.ABC!g67!i89!n=>!.ijk!o>?!k:;!.IJK!j9:!.MNO!i89!k:;!m<=!.IJK!.OPQ!.VWX'
* JW,D ,2.Z 2\1l1l  xA  KP1l  2Q.2>>wTx>y r   __main__N)dataclassesr   r   datasetsr   transformersr   r   rc   r   parserparse_args_into_dataclassesscript_argsr   r   r   r   r   r   <module>rk      s{    )  )   6\	z@ zo.F446q9K		 7 79L9LM r   