
    h                         S SK r S SK Jr  SS jrS\S\4S jrS\S\4S jrS\S\4S	 jrS
\S\S\4S jrS\S\4S jrg)    N)Tensorc                    / nX (       a  SOS-
  nSXA-
  -  S-
  n[        U5       H  n[        U (       a  SXd-   U-
  -  S-   OSXd-   U-
  S-   -  S-   5      n[        R                  " SSU5      nUSS USS -   S-  n	USUS-
  * U-   -  U	-  R	                  5       -  nU (       d  M  USUS-
  * U-   -  * U	-  R	                  5       -  nM     US:  aq  [        R                  " SSUS-   5      nUSS USS -   S-  n	USUS-
  * W-   -  U	-  R	                  5       -  nU (       a!  USUS-
  * U-   -  * U	-  R	                  5       -  nUR                  S5        UR                  S	5        [        U5      SU-  :X  d   eUR                  5         U$ )
a  
Creates the dynamic quantiztion map.

The dynamic data type is made up of a dynamic exponent and
fraction. As the exponent increase from 0 to -7 the number
of bits available for the fraction shrinks.

This is a generalization of the dynamic type where a certain
number of the bits and be reserved for the linear quantization
region (the fraction). n determines the maximum number of
exponent bits.

For more details see
(8-Bit Approximations for Parallelism in Deep Learning)[https://arxiv.org/abs/1511.04561]
      g?Ng       @
   r   g      ?)rangeinttorchlinspacetolistappendlensort)
signedmax_exponent_bits
total_bitsdatanon_sign_bitsadditional_itemsifraction_items
boundariesmeanss
             S/home/james-whalen/.local/lib/python3.13/site-packages/torchao/optim/quant_utils.pycreate_dynamic_mapr      s   " D v!15M]>?!C$% !#&7781<q(+<<q@AAE

 ^^CN;
CR:ab>1S8",q01A56%?GGII6r 1A 56:;<uDLLNND & !^^C,<q,@A
CR:ab>1S8",q01A56%?GGIIr 1A 56:;<uDLLNNDKKNKKt9:%%%IIKK    input
block_sizec                     U R                   nU R                  SU5      n U R                  5       R                  S5      R	                  S5      nXR                  SS5      -  n U R                  U5      U4$ )z(Scale tensor so that max(abs(input)) = 1r   g-q=r   )shapeviewabsamaxclip)r   r   r!   scales       r   scale_tensorr'   ?   se    KKE JJr:&EIIKR %%e,EJJr1%%E::ee##r   qmapc                    [         R                  " XS   :  SS5      nU[         R                  " XUS-      :  SS5      -  nU[         R                  " XUS-      :  SS5      -  nU[         R                  " XUS-      :  SS5      -  nU[         R                  " XUS-      :  SS5      -  nU[         R                  " XUS-      :  SS5      -  nU[         R                  " XUS-      :  SS5      -  nU[         R                  " XUS	-      :  S	S5      -  nUS	-   R                  S
S9nX   nX   nX-
  n[         R                  " XeU-
  S-  :  X25      nUR                  [         R                  5      $ )N   r   @                r   r      max      ?r   wherer%   touint8r   r(   codescodes_upval_downval_upresiduals          r   quantize_8bit_with_qmapr>   J   sm    KKc*C3E	U[[urz"22B::E	U[[urz"22B::E	U[[urz"22B::E	U[[uqy/11a88E	U[[uqy/11a88E	U[[uqy/11a88E	U[[uqy/11a88E 	C(H{H^FHKKh%6#$==xOE88EKK  r   c                    [         R                  " XS   :  SS5      nU[         R                  " XUS-      :  SS5      -  nU[         R                  " XUS-      :  SS5      -  nU[         R                  " XUS-      :  SS5      -  nUS-   R                  SS9nX   nX   nX-
  n[         R                  " XeU-
  S-  :  X25      nUR                  [         R                  5      $ )	Nr.   r   r/   r   r      r1   r3   r4   r8   s          r   quantize_4bit_with_qmaprA   `   s     KKa(!Q/E	U[[uqy/11a88E	U[[uqy/11a88E	U[[uqy/11a88E 	B'H{H^FHKKh%6#$==xOE88EKK  r   r9   r&   c                     XR                  5          R                  UR                  S   S5      UR                  SS5      -  nUR                  U R                  5      $ )Nr   r   r   )r
   r"   r!   )r9   r(   r&   outs       r   dequant_with_qmaprD   r   sH    
yy{

 
 Q
4uzz"a7H
HC88EKK  r   x_f32returnc                 \   [         R                  " SSU R                  U R                  [         R                  S9nU R                  [         R                  5      nUS-  nUS-  n[         R                  " X:  US-   U5      nUR                  [         R                  5      R                  5       $ )Nr   i   )devicedtypei  l      )	r   randintr!   rH   int32r"   r5   float32bfloat16)rE   
rand_16bit
x_f32_bits
x_fractionx_bf16_towards_zeros        r   _fp32_to_bf16_srrR   x   s     	7EKKEKKJ EKK(Jf$J$z1
		J ??5==)2244r   )T   r.   )	r   r   r   r
   r'   r>   rA   rD   rR    r   r   <module>rU      sy     
0f$ $C $!6 ! !,!6 ! !$!V !6 !& !5F 5v 5r   