
    h                         S SK r S SKJrJrJrJrJrJrJrJ	r	  S SK
r
S SKJr  S SKJrJr  S SKJrJrJr  S SKJr  S SKJr  S SKJr  S SKJrJrJr  0 r/ S	Qr " S
 S\
R@                  5      r! " S S\
R                  RD                  5      r# " S S\5      r$ " S S\$5      r% " S S5      r&S&S jr' S'S\\   S\\\\S4      \\(   4   4S jjr)\
RT                  " S5      S4S jr+S(S jr,S r-\
RT                  " S5      4S jr. S)S\RD                  S \\RD                  /\RD                  4   S!\\RD                  \//\04   S"\/S\RD                  4
S# jjr1S\RD                  S\RD                  4S$ jr2S\RD                  S\RD                  4S% jr3g)*    N)AnyCallableDictListOptionalSequenceTupleUnion)tree_flattentree_unflatten)LayoutTensorCoreTiledLayoutto_affine_quantized_intx_static)ZeroPointDomain)	Quantizer)compute_error)get_groupwise_affine_qparams/groupwise_affine_dequantize_tensor_from_qparams-groupwise_affine_quantize_tensor_from_qparams)Int4WeightOnlyGPTQQuantizerMultiTensorInputRecorderMultiTensorGPTQQuantizerStateDictManagerc                      \ rS rSr% SrSrSrSrSrSr	Sr
Sr\\S'   Sr\\S'   Sr\\S'   S	r\\S
'   \S\\R*                  \\R*                     4   S\SS 4S j5       rS\\R*                  \\R*                     4   S\SS4S jrS\4S jrS\R*                  4S jrS\\R*                  \\R*                     4   SS 4S jrS-S jrS.S jr\ SSSS\RB                  " S5      4S\RB                  4S jj5       r"\    S/S\#S\$\%S4   S\$\S4   S\&\'\\4      S\(S\4S jj5       r)\ S \*\$\S4      S\$\*\   \(4   4S! j5       r+\ S" 5       r,\ S# 5       r-\ S0 S4S\#S\$\%S4   S\$\S4   S\'\\4   S\(S\4S$ jj5       r.S\$\*\   \&\   4   4S% jr/\ S&\'\\4   S'\&\   S(\R`                  S)\$\S4   SS 4
S* j5       r1\ S\#S\(4S+ j5       r2S,r3g)0r   .   N
group_size{Gz?percdamp   	blocksize   in_place_thresholdinputkwargsreturnc                     [        U[        [        45      (       a  US   nUR                  SUR                  5      US'   UR                  SUR                  5      n[        R                  R                  " X40 UD6$ )Nr   dtypeshape)

isinstancelisttuplegetr)   popr*   torchTensor_make_wrapper_subclass)clsr%   r&   r*   s       X/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/GPTQ/GPTQ.py__new__MultiTensor.__new__=   sd     edE]++!HE **Wekk:w

7EKK0||223HHH    c                     / U l         [        R                  5       U l        SU l        U R                  U5        SU l        SU l        g )Nr   F)valuesr   get_instancestate_dict_managercountadd_tensorsdebug	gptq_done)selfr%   r&   s      r4   __init__MultiTensor.__init__G   s?     +-"2"?"?"A
 
r7   c                 n    U R                   R                   SU R                   SU R                  S    S3$ )Nz(shape=z
, example=r   ))	__class____name__r*   r9   r@   s    r4   __repr__MultiTensor.__repr__Q   s7    ~~&&'wtzzl*T[[QR^DTTUV	
r7   c                 $    U R                  U5      $ N)r=   )r@   r%   s     r4   appendMultiTensor.appendV   s    &&r7   c                 8   [        U[        [        45      (       a  U H  nU R                  U5        M     U $ [        U[        R
                  5      (       d   S[        U5       35       eU =R                  S-  sl        U R                  R                  U5        U $ )NzMMultiTensor can only use add_tensors for Tensors or lists of tensors but got    )
r+   r-   r,   r=   r0   r1   typer<   r9   rL   )r@   r%   inps      r4   r=   MultiTensor.add_tensorsY   s     eeT]++  %   eU\\22 _`dej`k_lm2 JJ!OJKKu%r7   c                 Z   U R                   U:  a  U(       aL  [        XR                   -
  5       H/  nU R                  U R                  S   R	                  5       5        M1     U $ U R                  U R                  5      R                  U R                  S   /XR                   -
  -  5      $ U $ Nr   )r<   ranger=   r9   clonerE   )r@   lengthpad_in_place_s       r4   pad_to_lengthMultiTensor.pad_to_lengthg   s    ::v

23A $$T[[_%:%:%<= 4  ~~dkk2>>[[_%**)<=  r7   c                 b    [        XR                  5      nU R                  S U U l        Xl        g rK   )minr<   r9   )r@   r<   s     r4   unpadMultiTensor.unpadx   s'    E::&kk&5)
r7   cudadevicec                     Xl         X l        X0l        X@l        XPl        X`l        Ub  UOS U l        Xl        Xl        Xl	        Xl
        g )Nc                     U $ rK    )xs    r4   <lambda>9MultiTensor.configure_quantization_mode.<locals>.<lambda>   s    RSr7   )get_qparams_funcquantize_funcdequantize_funccombine_qparams_list_funcmake_qtensorskip_layer_funcact_fake_quant_funcr   r    r"   ra   )r3   rh   ri   rj   rk   rl   rm   rn   r   r    r"   ra   s               r4   configure_quantization_mode'MultiTensor.configure_quantization_mode}   sQ      0)-(A%'-#6#B 	 $!
r7   rd   Ffunctypes.args	skip_gptqc           
      
   U(       + =(       a    U R                  U5      n[        U S5      (       a6  [        U R                  [        R                  5      (       a  U R                  nOSnU[
        ;  a  SS S.[
        U'   [
        U   S==   S-  ss'   [
        U   S   b  [
        U   S   nO&[
        U   S   U R                  :  d  U(       a  SnOS	nUc  0 OUn[        X445      u  pU(       d	  [        XS
9n	[        X5      u  p[        R                  R                  5          U(       d:  U R                  XXU5      n[
        U   S   (       d  [        X5        UsS S S 5        $ [        XU5      nUS   n[        U[        5      (       a  UR                   S   nUR#                  UR                  5      nU R%                  XR'                  5       U5      u  nnnU R)                  UU5      n[*        R-                  5       nUR/                  US   5      nUR1                  UU5        [3        U5        U R5                  XUS   UR7                  5       /USS  Q7US	S9nUS   R8                  (       d  [        XS9  UsS S S 5        $ US   R8                  (       GaN  US   R                   S   R#                  U5      nUS   b!  US   R                   S   R#                  U5      OUS   nUR                   S   R7                  5       nU R5                  UUUUS   R                   S   U4US	S9R                   S   R7                  5       nU R;                  UU5      R#                  UR<                  5      n[3        S[?        UU5      5        [3        S[?        UUR#                  U5      5      5        [3        S[?        UU5      5        URA                  5       n[        RB                  RD                  RG                  UUU5      R7                  5       n[3        S[?        UU5      5        [3        S[?        UU5      5        U RI                  XR<                  5      nU RK                  UU5      nU R;                  UU5      R#                  UR<                  5      nU R5                  XUUU4US	S9R                   S   R7                  5       n [3        S[?        UU 5      5        [        XS9  UsS S S 5        $  S S S 5        g ! , (       d  f       g = f)Nra   cpur   )r<   is_in_placer<   rO   rw   FTra      )rt   )orig_countsz!SQNR for QDQ (this should be inf)zSQNR for weight (can be low)z)SQNR for output with GPTQ (hopefully 35+)z?SQNR for output from qtensor vs output from DQ (should be high)z.SQNR for DQ vs DQ from qtensor (should be inf)z8SQNR for output without GPTQ (should be less than above))&is_linear_layerhasattrr+   ra   r0   GPTQ_FUNC_LISTr$   r   _tensors_to_device_flat_to_grouped_and_pad_CDisableTorchFunctionSubclass_evaluate_function	_do_unpad_calculate_hessianr   r9   tofaster_quantdetachrl   r   r:   get_name_for_paramupdate_paramprint__torch_function__rv   r>   rj   r)   SQNR
dequantizenn
functionallinearrh   ri   )!r3   rq   rr   rs   r&   rt   quantize_linearra   rw   	flat_argsspecgrouped_argsrz   outHWQDQall_qparamsqtensorr;   original_param_nameactbiasnew_outold_outDQ_afterDQ_from_qtensorqtensor_outqparams2Q2DQ2	old_q_outs!                                    r4   r   MultiTensor.__torch_function__   s   @ (-EC,?,?,E3!!jU\\&J&JZZFF ~%-.t#DN4 tW%*%$.:(.}=KD!'*c.D.DDKK~6 '~6	 *9DI
 %=Y$T!XX224",,6
 &d+M:i5 54 #<v>A QA![))HHQKQXXA!$!1!1!XXZ!HAr; &&q+6G "2!>!>!@"4"G"GQ"P++,?I%& ((d1grvvx;$qr(;Vt ) C 7==)=M 54N Aw}}}1gnnQ'**627;Aw7JtAw~~a(++F3PTUVPW**Q-++-**d1gnnQ/6"& +  VA SU  ..q+>AA!''J7b(9K 2DBEE&M4J ?'*
 #*"4"4"6#hh1188gtLPPRUg. D_-
 //77;&&q(3))"h7::177C**c3%5v +  VA SU  N), )= 54N O 544s   #8S0%DS0IS00
S>groupedc                     [        [        U6 5      nU Vs/ s H@  n[        US   [        R                  5      (       a  U " U5      R                  5       OUS   PMB     nn[        S U 5       5      nXE4$ s  snf )Nr   c              3      ^#    U  H>  m[        TS    [        R                  5      (       a  M'  [        U4S jT 5       5      v   M@     g7f)r   c              3   2   >#    U  H  oTS    :H  v   M     g7f)r   Nrd   ).0re   tups     r4   	<genexpr>8MultiTensor.grouped_to_flat.<locals>.<genexpr>.<genexpr>F  s     )SSVSs   N)r+   r0   r1   all)r   r   s    @r4   r   .MultiTensor.grouped_to_flat.<locals>.<genexpr>E  s:       
 c!fell3 *C)S))) s
   &A	A	)r,   zipr+   r0   r1   rv   r   )r3   r   	flat_tupsr   	flattenednon_tensors_equals         r4   grouped_to_flatMultiTensor.grouped_to_flat<  s     g'	 !
  )Q>>CHLLNCFJ  	 
    
  
 

 ++
s   AA4c                    / nU H  n[        Xu5      n[        X5      u  pU" U	0 U
D6nUR                  [        U[        R
                  5      (       a  UR                  5       OU5        U(       d  Ml  [        Xx[        U   S   S9nU(       a*  [        U   S   c  S[        U   S'   [        SU S35        M  [        U   S   U R                  :  d  M  [        [        U   S   [        5      (       a  M  S[        U   S'   M     U Vs/ s H  n[        U5      S   PM     nn[        US   5      S	   nU R                  U5      u  nnU(       d   S
U S3S-   S-   5       e[        UU5      nU$ s  snf )Nrw   )forceTz#>>GPTQ process identified function z as in-place, continuing...<<r<   Fr   rO   z ERR: found a function in model: z which zScaused an error in GPTQ MultiTensor, the function dispatch only works for functionszSwith Tensor outputs or that have the same non-Tensor output value across all inputs)r~   r   rL   r+   r0   r1   rv   _maybe_copy_new_valuesr}   r   r$   boolr   r   )r3   rq   r   r   rw   ra   outputsrQ   
device_inpcur_args
cur_kwargsr   detected_mutationre   grouped_outputsout_specflat_outputsr   	final_outs                      r4   r   MultiTensor._evaluate_functionL  s   C+C8J $2*#C H/J/CNN
3(E(E37793O
 {$:>$+?+N%! %)=m)L)T:>N4(7=dVC`a
 $D)++,4>"4(75 5 ;@N4(7=  @ 8??w!<?1-w?
+A.*-*=*=o*N''  	
.tfG<cdcd	
 
 #<:	 @s   E,c                 <	   SnU R                   c   U5       eU R                  c   U5       eU R                  c   U5       eU R                  c   U5       eU R                  nU R
                  nU R                  nUR                  nUR                  5       R                  5       nUR                  S   UR                  S   pUR                  nUS:X  a  U
nO[        R                  " Xg-  5      U-  n[        R                  " U5      S:H  nSXU4'   SUSS2U4'   [        R                   " U5      nU[        R"                  " [        R                  " U5      5      -  n[        R$                  " XS9nXU4==   U-  ss'   [        R&                  R)                  U5      n[        R*                  " U5      n[        R&                  R)                  USS9nUnSn/ n[-        SX5       GH  n[/        UU-   U
5      nUSS2UU24   R1                  5       n[        R                   " U5      n[        R                   " U5      nUUU2UU24   n[-        UUU5       GHH  n[/        UU-   U
5      nUU-  S:X  a,  U R                  USS2UU24   U5      nUR3                  U5        [-        UU5       H  nUU-
  nUSS2U4   nUUU4   nU R                  UR5                  S5      U5      R7                  5       nU R                  UR5                  S5      U5      R7                  5       nUUSS2U4'   UU-
  U-  n USS2US24==   U R9                  UR                  5      R5                  S5      R;                  UUUS24   R5                  S5      5      -  ss'   U USS2U4'   M     GMK     UUSS2UU24'   USS2US24==   UR9                  UR                  5      R;                  UUU2US24   5      -  ss'   GM     S	UR<                  ;   a  [        R>                  RA                  5         O0S
UR<                  ;   a  [        RB                  RA                  5         O U/ :X  a  UR3                  U5        U R                  U5      nU R                  UU5      n!U!UR9                  U5      U4$ )a  
GPTQ quantization implementation.

Args:
    H: Hessian matrix approximation
    W: Weight matrix to quantize
    device: accelerator device

Returns:
    Tuple containing:
    - Q: Quantized weights
    - DQ: Dequantized weights
    - all_qparams: Quantization parameters
zItried to do faster quant but configure quantization mode was never calledNr   rO   r   rx   T)upperxpur`   )"rh   ri   rj   rk   r    r"   r   r)   r   floatr*   ra   mathceilr0   diag
zeros_likemeanarangelinalgcholeskycholesky_inverserU   r]   rV   rL   	unsqueezeflattenr   matmulrP   r   synchronizer`   )"r3   r   r   ra   msgr    r"   r   
orig_dtyperY   columnsdeadr   dampr   Hinvcur_qparamsr   block_start	block_endW1DQ1Err1Hinv1group_start	group_endindexiwdqdqerr1r   s"                                     r4   r   MultiTensor.faster_quant{  sc   " X 	 ##/44/  ,1c1,"".33.,,8=#=8<<MM	^^
WW
HHJWWQZ7 J		)"89JFIzz!}!*!T'
a %**UZZ]33||G3	*LL!!!$""1%LL!!!4!0 w
K K)3W=I1k)++,224B""2&C##B'DY.I0EEFE$Y
   j 8'B	+q0"%"6"6![223Z#K  &&{3";	:E+A1a4AadA))!++a.+FNNPA,,Q[[^[IQQSB "C1IFa<Dq!"uI,"1ae 6 6q 9:I
 "&DAJ! ; : ,/Bq+i''(am

 3 : :[*IJ67! M
T FKKII!!#v{{"JJ""$"{+33K@b+."%%
#[00r7   c                     g rK   rd   )r3   rq   rr   rs   r&   rt   s         r4   __torch_dispatch__MultiTensor.__torch_dispatch__  s     	r7   c                     S/S 4$ Nr9   rd   rG   s    r4   __tensor_flatten__MultiTensor.__tensor_flatten__  s    z4r7   tensor_data_dicttensor_attributes
outer_sizeouter_stridec                     U " US   5      $ r   rd   )r3   r   r   r   r   s        r4   __tensor_unflatten__ MultiTensor.__tensor_unflatten__  s     #H-..r7   c                 P    U[         R                  R                  R                  :H  $ rK   )r0   r   r   r   )r3   rq   s     r4   r{   MultiTensor.is_linear_layer  s    uxx**1111r7   )r<   r>   r?   r;   r9   T)rO   )rd   NF)4rF   
__module____qualname____firstlineno__rh   ri   rj   rk   rl   rm   rn   r   int__annotations__r    r   r"   r$   staticmethodr
   r0   r1   r   r   r5   rA   strrH   rL   r=   rZ   r^   classmethodra   ro   r   r	   rP   r   r   r   r   r   r   r   r   r   r   Sizer   r{   __static_attributes__rd   r7   r4   r   r   .   sV   MO $LOJHeIs	   I%,,(>>?IKNI	I I5<<%,,)??@LO	
# 

'ELL '5<<%,,)??@	"
  !$||F3  8 
 !#+/__ T3Y_ CHo	_
 c3h(_ _ 
_ _B ,d5c?&; ,d3iQUo@V , , , ,\ k1 k1Z 
 !#!# T3Y CHo	
 S#X  
  E$s)Xc]*B$C   /sCx./ $C=/ JJ	/
 CHo/ 
/ / 28 2 2 2r7   r   c                      ^  \ rS rSrS\4U 4S jjrS\S\SS 4S jrS\\S4   4S	 jr	S\\\S4   \
\\4   4   4S
 jrSrU =r$ )r   i  Fc                 p   > [         TU ]  5         / U l        S U l        U(       + U l        X l        SU l        g Nr   )superrA   r   r   validatetarget_classr<   )r@   disable_input_validationr  rE   s      r4   rA   !MultiTensorInputRecorder.__init__  s3    	44(
r7   rs   r&   r'   c                   ^  U 4S jnUc  0 OUn[        X45      u  pET R                  cQ  UT l        U Vs/ s H5  n[        U[        R                  5      (       a  T R                  U5      OUPM7     snT l        T $ T R                  (       a  U" XE5        T =R                  S-  sl        [        U5       HD  u  pv[        U[        R                  5      (       d  M&  T R                  U   R                  U5        MF     T $ s  snf )Nc           	        > TR                   c'  UTR                   :X  d   STR                    SU 35       e[        U 5       H  u  p#TR                  U   n[        U[        R
                  5      (       d  X4:X  d   SU SU SU S35       eMK  [        UTR                  5      (       d   S[        U5       SU 35       eUR                  UR                  :X  d%   SUR                   SUR                   SU S	35       eUR                  UR                  :X  a  M   S
UR                   SUR                   SU S35       e   g )Nz:got two different input structures when recording inputs, z is not the same as z)got different values for nontensor input z for flattened input element zD, different inputs to input recorder must have same nontensor valuesz,expected input of type torch.Tensor but got zexpected input of dtype z	 but got z@ different inputs to input recorder must have same tensor dtypeszexpected input of shape z? different inputs to input recorder must have same tensor shape)
r   	enumerater   r+   r0   r1   r  rP   r)   r*   )r   r   r<   re   yr@   s        r4   validate_input8MultiTensorInputRecorder.forward.<locals>.validate_input  s   yy tyy( PQUQZQZP[[optouv( &i0NN5)!!U\\226 CA3FZ[\Z]]z  |A  {B  BF  G6 &a):):;; FtAwiOlmrlst; 77agg- 2177)9QWWIMjkpjq  rr  s- 77agg- 2177)9QWWIMjkpjq  rq  r- 1r7   rO   )r   r   r+   r0   r1   r  r   r  r<   r  rL   )r@   rs   r&   r  r   r   re   r<   s   `       r4   forward MultiTensorInputRecorder.forward  s    	. ~6&~6	99DI #"A )31ell(C(C!!!$J"DN K==9+

a
!),HE!U\\**u%,,Q/ - s   <C>.c                 V    U R                  5       u  p[        U5      S:X  d   S5       eU$ )Nr   zxkwargs is not empty but get_recorded_inputs called on MultiTensorInputRecorder, use get_recorded_args_and_kwargs instead)get_recorded_args_and_kwargslenr@   rs   r&   s      r4   get_recorded_inputs,MultiTensorInputRecorder.get_recorded_inputs:  s8    88:6{a 	
 G	
 r7   c                 t    U R                   c   S5       e[        U R                  U R                   5      u  pX4$ )Nz no inputs have been recorded yet)r   r   r   r  s      r4   r  5MultiTensorInputRecorder.get_recorded_args_and_kwargsA  s6    yy$H&HH$%dnndii@|r7   )r<   r   r   r  r  )rF   r   r  r  r   rA   r   r  r	   r  r   r  r  r	  __classcell__rE   s   @r4   r   r     sh    05K )S )C )4N )VU38_ eE#s(OT#s(^4S.T  r7   r   c                   t   ^  \ rS rSrU 4S jrS rS r\R                  " 5          SS\	4S jj5       r
SrU =r$ )	r   iG  c                    > [         TU ]  5         [        R                  5       U l        S U l        S U l        S U l        S U l        S U l	        S U l
        S U l        S U l        g rK   )r  rA   r   r:   r;   rh   ri   rj   rk   rl   rm   rn   ra   )r@   rE   s    r4   rA   GPTQQuantizer.__init__H  s^    "2"?"?"A $!#)-& ##' r7   c                     U R                   c   S5       eU R                  c   S5       eU R                  c   S5       eU R                  c   S5       eU R                  c   S5       eU R
                  c   S5       eg )Nzget_qparams_func must be setzquantize_func must be setzdequantize_func must be setz%combine_qparams_list_func must be setzmake_qtensor must be setzskip_layer_func must be set)rh   ri   rj   rk   rl   rm   rG   s    r4   _check_functionsGPTQQuantizer._check_functionsT  s    $$0P2PP0!!-J/JJ-##/N1NN/--9 	
3	
9   ,H.HH,##/N1NN/r7   c                     UR                  5        H-  u  p#[        U[        5      (       d  M  UR                  S   X'   M/     U$ r  )itemsr+   r   r9   )r@   
state_dictkeyvalues       r4   covert_multi_tensors_to_tensors-GPTQQuantizer.covert_multi_tensors_to_tensors^  s;    $**,JC%--"',,q/
 - r7   r'   c                 `   Uc  0 n[         R                  U R                  U R                  U R                  U R
                  U R                  U R                  UUUU R                  S9
  U R                  R                  U5        [        R                  " 5          [        U[        S S9  S S S 5        U R                  R                  U5        [        R                  " 5          U" U0 UD6  S S S 5        U R                  R!                  5       nU$ ! , (       d  f       Nk= f! , (       d  f       N;= f)N)
rh   ri   rj   rk   rl   rm   r   r    r"   ra   c                     gNTrd   re   r  s     r4   rf   <GPTQQuantizer._create_quantized_state_dict.<locals>.<lambda>  s    tr7   modelreplacement_fn	filter_fn)r   ro   rh   ri   rj   rk   rl   rm   ra   r;   set_state_dictr0   no_grad)_replace_with_custom_fn_if_matches_filter-_replace_buffers_and_params_with_multitensorsupdate_id_to_nameget_state_dict)r@   r7  rs   r&   r   r"   r    r,  s           r4   _create_quantized_state_dict*GPTQQuantizer._create_quantized_state_dictd  s     >F//!22,, 00&*&D&D** 00!;; 	0 	
 	..u5]]_5L+  	11%8 ]]_4"6" ,,;;=
 _ _s   D!	D
D
D-)	rn   rk   rj   ra   rh   rl   ri   rm   r;   )@   r!   r   )rF   r   r  r  rA   r(  r/  r0   r;  r   r@  r	  r"  r#  s   @r4   r   r   G  sB    
O ]]_ ' 
' 'r7   r   c            
         ^  \ rS rSrSSSSS\R
                  " S5      \" SS94S	\R
                  S
\\   4U 4S jjjr	S\R                  R                  S\\S4   S\\\4   S\R                  R                  4S jrSrU =r$ )r   i  rB  r!   r      Tr`   )inner_k_tilesra   layoutc                 &  >^ ^^^	 [         T
T ]  5         TT l        UT l        UT l        UT l        TT l        UT l        T R                  T l        S T l        UT l	        Sm	ST R                  R                  ;   a+  [        R                  T l        [        R                  T l        O[        R"                  T l        UU	U 4S jT l        UU	U 4S jT l        UU	U 4S jT l        S T l        UU4S jT l        UU 4S jnUT l        T R1                  5         g )	N   r   c                 2   > [        U TTUTR                  S9$ )N)r)   zero_point_domain)r   rJ  )r   	precisionr   n_bitr@   s     r4   rf   6Int4WeightOnlyGPTQQuantizer.__init__.<locals>.<lambda>  s     5Q"446
r7   c           	      @   > [        U US   US   TTTR                  S9$ Nr   rO   )rJ  )r   rJ  )r   qparamsr   rL  r@   s     r4   rf   rM    s+    L

"&"8"8 r7   c           	      @   > [        U US   US   TTTR                  S9$ rO  )r   rJ  )r   rP  r   rL  r@   s     r4   rf   rM    s+    N

"&"8"8 r7   c                 d    [        U 6  Vs/ s H  n[        R                  " USS9PM     sn$ s  snf )NrO   )dim)r   r0   cat)qparams_listre   s     r4   rf   rM    s/    ),l);?
);AEIIaQ);?
 ?
s   -c                 R   > [        U R                  S   T5      =(       d    T(       + $ rT   )_check_linear_int4_kr*   )linear_weightr   padding_alloweds    r4   rf   rM    s"     !4!4R!8*EX6
r7   c                 *  > TR                  X5      nUS   nUS   nTR                  [        R                  :X  a  UR	                  TR
                  5      nST
4n[        R                  nSnSn[        UUUUUUUTR                  TR                  S9	n	U	$ )Nr   rO      )scale
zero_point
block_sizetarget_dtype	quant_min	quant_maxrJ  _layout)
rj   rJ  r   INTr   zeros_precisionr0   int32r   rF  )r   rP  weightr\  r]  r^  r_  r`  ra  quantized_tensorr   r@   s             r4   rl   :Int4WeightOnlyGPTQQuantizer.__init__.<locals>.make_qtensor  s     ))!5FAJE J%%)<)<<']]4+?+?@
 ZJ ;;LII  ?%%)##"&"8"8
  $#r7   )r  rA   r   r"   r    rE  rY  ra   rn   rF  rP   r   rc  rJ  r0   int8rd  FLOATrh   ri   rj   rk   rm   rl   r(  )r@   r   r"   r    rE  rY  ra   rF  rl   rL  rE   s   ``   `   @r4   rA   $Int4WeightOnlyGPTQQuantizer.__init__  s     	$" *.kk#' DKK$$$%4%8%8D"#(::D %4%:%:D"!
 	 	*
& 
	$> )r7   r7  rs   .r&   r'   c                    U R                  UUUU R                  U R                  U R                  5      n[	        U[
        S S9nU Vs/ s H  nSU;   d  M  UPM     nnU H  nXE	 M     UR                  USSS9  U$ s  snf )Nc                     gr3  rd   r4  s     r4   rf   6Int4WeightOnlyGPTQQuantizer.quantize.<locals>.<lambda>  s    4r7   r6  kv_cacheTF)assignstrict)r@  r   r"   r    r<  ,_remove_multitensors_from_buffers_and_paramsload_state_dict)r@   r7  rs   r&   r,  kremoves          r4   quantize$Int4WeightOnlyGPTQQuantizer.quantize  s     66OONNMM

 :G'

 (;Z:?!Z;A  	jeD <s   	
A=A=)rn   r"   rk   rj   ra   rh   r   rE  rF  rl   rY  r    ri   rm   rJ  rd  )rF   r   r  r  r0   ra   r   r   r   rA   r   Moduler	   r   r   r  rv  r	  r"  r#  s   @r4   r   r     s     $||F3#8q#I`  `   `  ` DXX__-238_HLSRUX	 r7   r   c                   L    \ rS rSrSr\S 5       rS rS rS r	S r
S rS	 rS
rg)r   i  Nc                  j    [         R                  c  [        5       [         l        [         R                  $ rK   )r   	_instancerd   r7   r4   r:   StateDictManager.get_instance  s&    %%-)9);&)))r7   c                      0 U l         0 U l        g rK   )r,  
id_to_namerG   s    r4   rA   StateDictManager.__init__  s    r7   c                     UR                  5       U l         UR                  5        VVs0 s H  u  p#[        U5      U_M     snnU l        g s  snnf rK   )r,  named_parametersidr~  r@   r7  rt  vs       r4   r:  StateDictManager.set_state_dict  s@    **,050F0F0HI0H2a5!80HIIs   Ac                 x    UR                  5        VVs0 s H  u  p#[        U5      U_M     snnU l        g s  snnf rK   )r  r  r~  r  s       r4   r>  "StateDictManager.update_id_to_name   s0    050F0F0HI0H2a5!80HIIs   6c                 L    U R                   R                  [        U5      S 5      $ rK   )r~  r.   r  )r@   params     r4   r   #StateDictManager.get_name_for_param#  s    ""2e9d33r7   c                     XR                   ;   aA  [        U[        5      (       a  UR                  S   U R                   U'   g X R                   U'   g [	        SU S35      e)Nr   z
Parameter z not found in state_dict)r,  r+   r   r9   KeyError)r@   name	new_values      r4   r   StateDictManager.update_param&  sX    ??")[11(1(8(8)% )2%Zv-EFGGr7   c                     U R                   $ rK   )r,  rG   s    r4   r?  StateDictManager.get_state_dict1  s    r7   )r~  r,  )rF   r   r  r  r{  r  r:   rA   r:  r>  r   r   r?  r	  rd   r7   r4   r   r     s:    I* *
JJ4	Hr7   r   c                 D    X-  S:H  nUb  XS-  -  S:H  nU=(       a    U$ U$ )z
Check if the dimensions are compatible with int4 quantization.

Args:
    k: The dimension size to check
    group_size: The group size for quantization
    inner_k_tiles: The inner k tiles size

Returns:
    bool: Whether the dimensions are compatible
r      rd   )rt  r   rE  k_divisible_by_group_size%k_divisible_by_16_times_inner_k_tiless        r4   rW  rW  :  s<     !"! 3 01R5G0HA0M-(R-RR$$r7   flatr'   .c                 @   U  Vs/ s H&  n[        U[        5      (       a  UR                  OSPM(     nn[        U5      n[	        [        U  Vs/ s H7  n[        U[        5      (       a  UR                  XAS9R                  OU/U-  PM9     sn6 5      nXS4$ s  snf s  snf )z
Convert flattened arguments to grouped arguments with padding.

Args:
    flat: Flattened arguments
    pad_in_place: Whether to pad in place

Returns:
    Tuple containing grouped arguments and original counts
rO   )rX   )r+   r   r<   maxr,   r   rZ   r9   )r  rX   re   rz   multi_tensor_sizer   s         r4   r   r   M  s     JNNAjK88177a?KNK(
 	 A a--  1MTTS,,- 		
	G  Os   -B>Br`   Fc                    / nU  H  n[        U[        5      (       aU  UR                  S:X  d  U(       a>  UR                  UR	                  UR
                  S   R                  U5      5      5        Mm  UR                  [        U[        R                  5      (       a&  [        U[        5      (       d  UR                  U5      OU5        M     U$ )z
Move tensors to accelerator for faster processing.

Args:
    args: Arguments that may contain tensors
    device: accelerator device
    move_all: Whether to move all tensors or just single count tensors

Returns:
    List with tensors moved to CUDA
rO   r   )	r+   r   r<   rL   rE   r9   r   r0   r1   )rs   ra   move_allnew_argsre   s        r4   r~   r~   j  s     Ha%%177a<8OOAKKv(>?@OOa..z![7Q7Q V	  Or7   c                    Sn[        X5       Hr  u  pE[        U[        R                  5      (       d  M&  U(       d2  XER	                  UR
                  5      :g  R                  5       (       d  M_  UR                  U5        SnMt     U$ )a7  
Copy values from new inputs to original inputs if they've changed.
Used for handling in-place operations.

Args:
    orig_inp: Original inputs
    new_inp: New inputs (potentially modified)
    force: Whether to force copying regardless of differences

Returns:
    bool: Whether any differences were detected
FT)r   r+   r0   r1   r   ra   anycopy_)orig_inpnew_inpr   detected_differencere   new_xs         r4   r   r     sf      *a&&hhqxx005577&*#	 +
 r7   c                     [        X5       H?  u  p#[        U[        5      (       d  M  UR                  U:  d  M.  UR	                  U5        MA     g)z
Unpad MultiTensors to their original counts.

Args:
    args: Arguments that may contain MultiTensors
    orig_counts: Original counts of MultiTensors
N)r   r+   r   r<   r^   )rs   rz   argr<   s       r4   r   r     s9     $,
c;''CII,=IIe -r7   c                    SnSnU  H  nU Vs/ s H5  n[        U[        R                  5      (       a  UR                  U5      OUPM7     nn[	        Xq5      u  pUS   R                  5       nUR                  n
[        U
5      S:X  a  SOU
S   nUR                  SU
S   5      nX4XK-   -  -  nXK-  nSU-  S-  UR                  5       -  nX6R                  UR                  5       5      -  nM     U$ s  snf )z
Calculate the Hessian matrix for GPTQ.

Args:
    grouped_args: Grouped arguments
    spec: Original structure specification
    device: accelerator device

Returns:
    torch.Tensor: Hessian matrix
r   ry   rO   r   g      ?)r+   r0   r1   r   r   r   r*   r  reshapetr   )r   r   ra   r   total_batchesrQ   re   r   r   rY   r*   ns               r4   r   r     s     	
AMRUVRUQjELL&A&Aadd6lqHRU
V %Z6 QKUqAeAhIIb%)$ 	
m/00-U+qssu4	XXacce_% ( H% Ws   <C0r7  r8  r9  cur_fqnc           	          U" XSS 5      (       a  U" U 5      n U R                  5        H(  u  pE[        XQX# U S35      nXeLd  M  [        XU5        M*     U $ )aW  
Replace modules in the model if they match a filter.

Args:
    model: The model to modify
    replacement_fn: Function to apply to matching modules
    filter_fn: Function to determine if a module should be replaced
    cur_fqn: Current fully qualified name (for tracking position in model hierarchy)

Returns:
    nn.Module: Modified model
Nr   .)named_childrenr<  setattr)r7  r8  r9  r  r  child	new_childs          r4   r<  r<    sg    $ %%u%++-=9	$q.A
	 !E+ . Lr7   c           
          U R                  SS9 H  u  p[        X[        U/5      5        M     U R                  SS9 H:  u  p[        X[        R
                  " [        U/5      UR                  5      5        M<     U $ )z
Replace model buffers and parameters with MultiTensors.

Args:
    model: The model to modify

Returns:
    nn.Module: Modified model
Frecurse)named_buffersr  r   r  r   	Parameterrequires_gradr7  r  bufr  s       r4   r=  r=    st     (((7	[#/0 8 --e-<R\\+ug*>@S@STU = Lr7   c           
         U R                  SS9 H5  u  p[        U[        5      (       d  M  [        XUR                  S   5        M7     U R                  SS9 Hb  u  p[        U[        5      (       d  M  [        U U[        R                  " UR                  S   UR                  S   R                  5      5        Md     U $ )z
Convert MultiTensors in model buffers and parameters back to regular tensors.

Args:
    model: The model to modify

Returns:
    nn.Module: Modified model
Fr  r   )	r  r+   r   r  r9   r  r   r  r  r  s       r4   rr  rr    s     (((7	c;''EA/ 8 --e-<e[))U\\!_ell1o.K.KL = Lr7   )rO   Nr   )F) )4r   typingr   r   r   r   r   r   r	   r
   r0   torch.nnr   torch.utils._pytreer   r   torchao.dtypesr   r   r   %torchao.quantization.quant_primitivesr   torchao.quantization.unifiedr   torchao.quantization.utilsr   r   r   r   r   r}   __all__r1   r   rx  r   r   r   r   rW  r  r   ra   r~   r   r   r   r  r   r<  r=  rr  rd   r7   r4   <module>r     s    N N N   < 
 3 <  U2%,, U2p>uxx >BEI EP}- }@# #V%( #' 
s) 
4c3h $s)+, : %*LL$85 2,
 38,,v2F "R 	99bii["))34 C($./ 	
 YY> ryy &		 bii r7   