
    ho8                        S SK r S SKJr  S SKJrJr  S SKrS SKJrJ	r	  S SK
Jr  S SKrS SKJr  S SKJrJrJrJrJrJrJrJrJrJr  SS\R4                  SSS	SSSSS	4S
\S\\   S\\   S\\   S\\   S\\\      S\\   S\\   S\4S jjr\S:X  Ga  S SK r \ RB                  " SS9r"\"RG                  S\\" S5      SS9  \"RG                  SS\S/SS9  \"RG                  S \SS!S9  \"RG                  S"S# \R4                  S$S9  \"RG                  S%\SS&S9  \"RG                  S'S(\S)S*9  \"RG                  S+\S,S*9  \"RG                  S-S.S/S09  \"RG                  S1\SS2S9  \"RG                  S3\SS/S4S59  \"RG                  S6\S7S8S9  \"RG                  S9\S:S;S9  \"RG                  S<\S	S=S9  \"RI                  5       r%\" \%RL                  \%RN                  \%RP                  \%RR                  \%RT                  \%RV                  \%RX                  \%RZ                  \%R\                  \%R^                  \%R`                  \%Rb                  \%Rd                  5        gg)>    N)Path)ListOptional)_load_modeldevice_sync)get_tokenizer)prepare_inputs_for_model)
)Float8DynamicActivationFloat8WeightConfigFloat8WeightOnlyConfigFPXWeightOnlyConfigInt4WeightOnlyConfig%Int8DynamicActivationInt8WeightConfigInt8WeightOnlyConfigPerRow	PerTensorUIntXWeightOnlyConfig	quantize_cudaFcheckpoint_pathtaskslimitquantizationsparsitycalibration_taskscalibration_limitcalibration_seq_lengthpad_calibration_inputsc                 
  ^3^4 [        SU  SU SU SU SU S3SU SU S	U S
U SU	 S3-   SU SU S3-   5        [        R                  R                  R	                  5         U R                  5       (       d   U 5       eU R                  S-  nUR                  5       (       d   [        U5      5       e[        S5        [        R                  " 5       n[        U SU5      nUc  UR                  R                  n[        US9  [        S[        R                  " 5       U-
  S S35        [        X5      nU(       Ga  SU;   a  SSKJn  U" U5        SU;   a  [!        U[#        5       5        SU;   a  [!        U[%        5       5        SU;   a  [!        U['        SS5      5        SU;   ab  S U;  a\  S!U;   a  S"nOS#n[)        UR+                  S$5      S%   5      nUS&;   d
   S'U 35       e[!        UR-                  U5      [/        UUS%S(95        S)U;   a  S!U;   a  S"nOS#nUR+                  S$5      n[)        US%   5      n[0        R2                  [0        R4                  [0        R6                  [0        R8                  [0        R:                  [0        R<                  [0        R>                  [0        R@                  S*.nUU   n[)        US   5      n[!        U[C        UUUS+95        S,U;   a  SS-K"J#n  [!        U[/        U" 5       S%S.95        SU;   a  S U;   a  SS/K$J%n  SS0K&J'n  [)        UR+                  S$5      S1   5      nUS&;   d
   S'U 35       eU[0        RP                  :X  d   U S2U 35       eS3U;   d   S45       eU" UU[R        UR                  RT                  USS9RW                  U	U
5      RY                  5       n[        S55        U" UUS69nUR[                  S%US79  UR\                  " U/UQ76   UR-                  U5      nS8U;   a  [!        U[_        5       5        S9U;   aq  [        UR+                  S$5      S:   5      nUS;:X  a  [a        5       nO0US<:X  a  [c        5       nOUS9:X  a  [a        5       nO[e        S=U 35      e[!        U[g        US>95        S?U;   Ga=  SS@K4J5n  SSAK6J7m4  SSBK8J9n   URu                  U R                  5      n!UR+                  S$5      n/ SCQn"[w        U5      S%:  a  US%   OUn#USS nU V$s/ s H  n$[)        U$5      PM     sn$U"[w        U5      S -   u  n%n&nn'n(n)n*n+UR-                  U#5      n[        SDU& SEU SFU% SGU' SHU( SIU) SJU* SKU+ SL35        [0        Rx                  " U#5         UR[                  U'U(S"SM9  SSS5        U%(       a  U44SN jn,OU44SO jn,U " UU!U,SPU(U'U&U)U*U+S%:H  SQ9
  UR-                  U5        UR{                  5         SRU;   a7  SSSK>J?n-  UR-                  U5        [!        UU-" [0        R8                  STSU95        OUR                  SV5      (       a  SSWK$JAn.  SSXKBJCm3JDn/JEn0  UR+                  S$5      S%   n1[)        UR+                  S$5      S   5      n[        [0        U1[0        R@                  5      n1UR-                  U5      nU0" US%SYU1USZ9  U." UR-                  U5      USY[R        US[9R                  S\/S%S]9  U34S^ jn2S!U;   n[!        UU/" U1UUS_9U25        U(       a  [0        R                  " US`S"Sa9n[0        R                  " 5          [        Sb5        SSWK$JAn.  U." UR-                  U5      UU[R        US[9R                  UUS]9  SSS5        gs  sn$f ! , (       d  f       GN= f! , (       d  f       g= f)cz-Runs the evaluation of a model using LM Eval.z
Evaluating model z on tasks: z	, limit: z
, device: z, precision: z, zquantization: z, sparsity: z, compile: z, max_length: z, calibration_tasks: zcalibration_seq_length: z, pad_calibration_inputs: 
ztokenizer.modelzLoading model ...cpuN)devicezTime to load model: z.02fz seconds	spinquantr   )apply_spinquantint8woint8dqfp6      int4wogptqhqqTF-   )    @         z<int4wo groupsize needs to be one of [32,64,128,256] but got )
group_sizeuse_hqqversionuintx)r-   r(   r'                  )r3   marlin)MarlinSparseLayout)layoutr4   )LMEvalInputRecorder)Int4WeightOnlyGPTQQuantizerz( requires precision or bfloat16 but got r   z)int4 gptq quantization only works on cudaz%Obtained inputs, starting calibration)r2   r!   )max_batch_sizemax_seq_lengthfloat8wofloat8dqtensorrowzUnknown granularity )granularity	autoround)AutoTokenizer)TransformerBlock)quantize_model_with_autoround_)F   r0   r:   i   r0   r-   r   z&Quantizing model with autoround(iters=z, groupsize=z, quant_lm_head=z, batch_size=z	, seqlen=z, nsamples=z, gradient_accumulate_steps=z, compile_optimization_process=))rA   rB   trainingc                 4   > [        U T5      =(       d    SU;   $ )Noutput
isinstancemodfqnrK   s     T/home/james-whalen/.local/lib/python3.13/site-packages/torchao/_models/llama/eval.py<lambda> run_evaluation.<locals>.<lambda>   s    Z5E%F &'3&'    c                    > [        U T5      $ NrR   rT   s     rW   rX   rY      s    JsDT4UrZ   r6   )
model	tokenizeris_target_modulebitsseqlen
batch_sizeitersnsamplesgradient_accumulate_stepscompile_optimization_processcodebook)codebook_weight_onlyr/   )dtypescale_block_sizez	awq-uintx)TransformerEvalWrapper)AWQObservedLinear	awq_uintxinsert_awq_observer_r1   )quant_dtyper2   )r]   r^   rB   input_prep_funcr!   wikitext)r   r   c                    > [        U T5      $ r\   rR   )mrV   rl   s     rW   rX   rY   	  s    
1>O0PrZ   )ro   r2   r3   zmax-autotune)mode	fullgraphzRunning evaluation ...)Jprinttorchaor   utils"recommended_inductor_config_setteris_fileparentstrtimer   config
block_sizer   r   torchao.prototype.spinquantr#   r   r   r   r   intsplittor   torchuint1uint2uint3uint4uint5uint6uint7uint8r   torchao.dtypesr<   torchao._models._evalr>   torchao.quantization.GPTQr?   bfloat16r	   
vocab_sizerecord_inputsget_recorded_inputssetup_cachesquantizer   r   r   
ValueErrorr
   transformersrJ   torchao._models.llama.modelrK   )torchao.prototype.autoround.autoround_llmrL   from_pretrainedlenr!   reset_caches'torchao.prototype.quantization.codebookrh   
startswithrk   torchao.prototype.awqrl   rm   rn   getattrrun_evalcompileno_grad)5r   r   r   r!   	precisionr   r   r   
max_lengthr   r   r   r   tokenizer_patht0r]   r^   r#   r3   	groupsize_quant_argsnbits_NBITS_TO_DTYPEri   r2   r<   r>   r?   inputs	quantizerrH   rJ   rL   
_tokenizer_default_quant_args_model_deviexquant_lm_headrc   rb   ra   rd   grad_acc_stepsrf   r_   rh   rk   rm   rn   ro   is_observed_linearrl   rK   s5                                                      @@rW   run_evaluationr   !   s     

o.k%	%PZ[aZbbopyozz|}<.XJk'R`ak`l  mB  CT  BU  UW  X	X$%;$<<VWmVnnp
q	r
 AAC""$$5o5$$++.??N!!##8S%88#	
	B	:E\\,,
v	 r!1$ 7x
@An>I,&CE"|#e134|#eBDEL e0A67|#Fl,B$L..s3A67I 22 NykZ2  $	7TUV l" $&,,S1KA'E;;;;;;;;;;;;;;;;	O $E*E[^,Je25*gVW|#9+3E3GQRS |#,(>AML..s3B78I 22 NykZ2 . . HT. V#P%PP##*,LL++*  %% %$&  9:3yQWXIa@VWu.v.HHV$E%e356%l005b9:Kh&'k%$h*,"++K$';K=%IJJ9kR ,&2D '667M7MNJ ',,S1K"G-0-=-A;q>vL%ab/K "--AQ-0CCDTDV0WW	,HH\*E<UG<PY{ [%%2O=IV\U]]hiqhr s11?0@ A44P3QQRT l+""#-ft #  ,
 ' !
 $V *$!1%!*8-IQ-N HHV %THHV+%++PRS $$[11D  ',,S1!4K\//4Q78J!%ekkBKHHV$E q#;: #hhv&#" 8 h!l   "Q|+G +
G # e.DI	&'@((6"%4	
 (  
 
c . ,+N 
s   1],]"=]4"
]14
^__main__zRun HF Model Evaluation)descriptionz--checkpoint_pathz<../../../checkpoints/meta-llama/Llama-2-7b-chat-hf/model.pthzModel checkpoint path.)typedefaulthelpz--tasks+rq   z?List of lm-eluther tasks to evaluate usage: --tasks task1 task2)nargsr   r   r   z--limitz"Number of eval samples to evaluatez--precisionc                 F    [        [        U R                  S5      S   5      $ )N.rE   )r   r   r   )r   s    rW   rX   rX   <  s    wuaggcl2&67rZ   zdtype precision to usez--devicezDevice to use for evaluationz-qz--quantizationa  Which quantization techniques to apply: int8dq, int8wo, fp6, int4wo-<groupsize>, int4wo-<groupsize>-gptq, autoquant, autoquant-int4, int4wo-<groupsize>-hqq, uintx-<nbits>-<groupsize>, uintx-<nbits>-<groupsize>-hqq, sparse-marlin, spinquant, autoround-<model_device>-<quant_lm_head>-<iters>-<groupsize>-<batch_size>-<seqlen>-<nsamples>-<grad_acc_steps>-<c>, float8wo, float8dq, float8saq)r   r   z
--sparsityz3Which sparsity techniques to apply: semi-structuredz	--compile
store_truezWhether to compile the model.)actionr   z--max_lengthz%Length of text to process at one timez--calibration_tasksz.tasks to do gptq calibration on, if doing gptq)r   r   r   r   z--calibration_limiti  z-number of samples to use for gptq calibrationz--calibration_seq_lengthd   z/length of sequences to use for gptq calibrationz--pad_calibration_inputsz{pads sequences shorter than calibration_seq_length to that length, yielding more calibration inputs but running much slower)3r}   pathlibr   typingr   r   r   generater   r   r^   r   rw   r   r	   torchao.quantizationr
   r   r   r   r   r   r   r   r   r   r   r|   r   boolr   __name__argparseArgumentParserparseradd_argument
parse_argsargsr   r   r   r!   r   r   r   r   r   r   r   r   r    rZ   rW   <module>r      s<     !  $  @  "  nn"&"-1'+,0#(B
B
9B
 C=B
 3-B
 smB
  S	*B
  }B
 %SMB
 !B
J z$$1JKF
ST%	   N   T0T   7%	   f3Q   ,   C  
 L/N   4	   =   <	   ">	   " K	   D



####e rZ   