
    ho6                     "   S SK JrJr  S SKrS SKJs  Jr  S SKJ	r	  S SK
Jr  S SKJr  SSKJrJr  SSKJr  SS	KJr   " S
 S\R
                  R*                  5      r " S S\5      r " S S\5      r " S S\R
                  R2                  5      rg)    )AnyOptionalN)TorchAODType)TwoStepQuantizer)get_group_qparams_symmetric   )FakeQuantizeConfigBaseIntxFakeQuantizeConfig)FakeQuantizerBase)_get_qmin_qmaxc                   6  ^  \ rS rSrSr      SS\S\S\\   S\\   S\S	\S
\S\\	   SS4U 4S jjjr
S\R                  S\R                  4S jrS\R                  R                  4S jr\ SS\R                  R                  S\\	   4S jj5       rSrU =r$ )FakeQuantizedEmbedding   a  
General embedding layer with fake quantized weights.

Specific target dtypes, granularity, schemes etc. are specified
through separate configs for weights and activations.

Example usage::

    weight_config = IntxFakeQuantizeConfig(
        dtype=torch.int4,
        group_size=8,
        symmetric=True,
    )
    fq_embedding = FakeQuantizedEmbedding(5, 10, weight_config)
    fq_embedding(torch.LongTensor([3]))
Nnum_embeddingsembedding_dimpadding_idxmax_norm	norm_typescale_grad_by_freqsparseweight_configreturnc	           	         > [         TU ]  " UUUUUUU/U	Q70 U
D6  [        R                  R	                  S5        Ub  [
        R                  " U5      U l        g S U l        g )Nz/torchao.quantization.qat.FakeQuantizedEmbedding)super__init__torch_C_log_api_usage_oncer   from_configweight_fake_quantizer)selfr   r   r   r   r   r   r   r   argskwargs	__class__s              \/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/qat/embedding.pyr   FakeQuantizedEmbedding.__init__,   sq     	
	
 
	
 
	
 	$$%VW$):)F)F})UD&)-D&    xc           	         U R                   b  U R                  U R                  5      nOU R                  n[        R                  " UUU R                  U R
                  U R                  U R                  U R                  5      $ N)	r    weightF	embeddingr   r   r   r   r   )r!   r(   ws      r%   forwardFakeQuantizedEmbedding.forwardJ   si    %%1**4;;7AA{{MMNN##KK
 	
r'   c                    [         R                  R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  R                  U R                  R                  S9	nU R                  R                  [         R                  " S5      :w  a  U R                  Ul
        U$ )N)devicedtypemeta)r   nn	Embeddingr   r   r   r   r   r   r   r+   r2   r3   )r!   new_embeddings     r%   to_embedding#FakeQuantizedEmbedding.to_embeddingY   s    **MMNN##KK;;%%++## + 

 ;;f!55#';;M r'   modc                    [        UR                  UR                  UR                  UR                  UR
                  UR                  UR                  UUR                  R                  UR                  R                  S9
nUR                  R                  [        R                  " S5      :w  a  UR                  Ul        U$ )N)r   r2   r3   r4   )r   r   r   r   r   r   r   r   r+   r2   r3   r   )clsr:   r   r7   s       r%   from_embedding%FakeQuantizedEmbedding.from_embeddingl   s     /OOLLMM""JJ'::$$**""
 ::V 44#&::M r'   )r    )NN       @FFNr*   )__name__
__module____qualname____firstlineno____doc__intr   floatboolr	   r   r   Tensorr/   r5   r6   r8   classmethodr=   __static_attributes____classcell__r$   s   @r%   r   r      s    * &*$(#(:>.. . c]	.
 5/. . !. .   67. 
. .<
 
%,, 
ehh00 &  ;?XX   67 r'   r   c            	         ^  \ rS rSrSrS\R                  \R                  4S\S\R                  S\R                  SS4U 4S	 jjjr
S
\R                  R                  S\S\S\R                  R                  4S jrS
\R                  R                  S\S\S\R                  R                  4S jrS\R                  R                  4S jrSrU =r$ )#Int4WeightOnlyEmbeddingQATQuantizer   zw
Quantizer for performing QAT on a model, where embedding layers have
int4 fake quantized grouped per channel weights.
   
group_sizescale_precisionzero_point_precisionr   Nc                    > [         TU ]  5         [        R                  R	                  S5        SU l        Xl        X l        X0l        g )Nz<torchao.quantization.qat.Int4WeightOnlyEmbeddingQATQuantizer   )	r   r   r   r   r   	bit_widthrQ   rR   rS   )r!   rQ   rR   rS   r$   s       r%   r   ,Int4WeightOnlyEmbeddingQATQuantizer.__init__   s?     	$$J	
 ),;1E!r'   modelr"   r#   c                    ^  SSK Jn  S[        R                  R                  S[
        S[        4S jnS[        R                  R                  S[        R                  R                  4U 4S jjnU" XU5        U$ )z@
Swap `nn.Embedding` modules with `Int4WeightOnlyQATEmbedding`.
r   ))_replace_with_custom_fn_if_matches_filterchildcur_fqnr   c                 J    [        U [        R                  R                  5      $ r*   )
isinstancer   r5   r6   )r[   r\   s     r%   	filter_fn>Int4WeightOnlyEmbeddingQATQuantizer.prepare.<locals>.filter_fn   s    eUXX%7%788r'   c                   > [        U R                  U R                  U R                  U R                  U R
                  U R                  U R                  TR                  TR                  TR                  U R                  R                  U R                  R                  S9nU R                  R                  [        R                  " S5      :w  a  U R                  Ul        U$ )N)r   r   r   r   r   r   r   rQ   rR   rS   r2   r3   r4   )Int4WeightOnlyQATEmbeddingr   r   r   r   r   r   r   rQ   rR   rS   r+   r2   r3   r   )r[   r7   r!   s     r%   replacement_fnCInt4WeightOnlyEmbeddingQATQuantizer.prepare.<locals>.replacement_fn   s    6$33#11!--//#(#;#;||?? $ 4 4%)%>%>||**ll((M& ||""ell6&::',||$  r'   )torchao.quantization.quant_apirZ   r   r5   ModulestrrG   )r!   rX   r"   r#   rZ   r_   rc   s   `      r%   prepare+Int4WeightOnlyEmbeddingQATQuantizer.prepare   s^    	
	9UXX__ 	9s 	9t 	9	!%((// 	!ehhoo 	!0 	2%Sr'   c                 (    U R                  U5        U$ )zO
Swap all `Int4WeightOnlyQATEmbedding` modules with `Int4WeightOnlyEmbedding`.
)_convert_helper)r!   rX   r"   r#   s       r%   convert+Int4WeightOnlyEmbeddingQATQuantizer.convert   s     	U#r'   modulec                    SSK Jn  UR                  5        GH  u  p4[        U[        5      (       Ga  UR
                  R                  R                  nUR
                  R                  R                  nUR
                  R                  R                  n[        UR                  UR                  UR                  UR                  UR                  UR                   UR"                  UUUUR$                  R&                  UR$                  R(                  S9n[+        XU5        [-        U R.                  5      u  p[1        UR$                  U R.                  UUS9u  pUR3                  U5      nU" UR$                  UUU	U
[4        R6                  U5      nXl        UR3                  U5      Ul        UR3                  U5      Ul        GM  U R=                  U5        GM     g)zi
Helper function to recursively swap `Int4WeightOnlyQATEmbedding`
modules with `Int4WeightOnlyEmbedding`
r   )8_quantized_decomposed_quantize_per_channel_group_wrapper)r   r   r   r   r   r   r   rQ   rR   rS   r2   output_dtype)	precisionN)torchao._executorch_opsrp   named_childrenr^   rb   r    configrQ   rR   rS   Int4WeightOnlyEmbeddingr   r   r   r   r   r   r   r+   r2   r3   setattrr   rV   r   tor   int8scale
zero_pointrk   )r!   rn   rp   namer[   rQ   rR   rS   quantized_embeddingqminqmaxszpq_weights                 r%   rk   3Int4WeightOnlyEmbeddingQATQuantizer._convert_helper   s   
	
 "002KD%!;<<"88??JJ
"'"="="D"D"T"T//66KK % '>#(#7#7"'"5"5 % 1 1"^^#oo','?'? <<)$3)= <<..!&!3!3'#  &9:  .dnn=5LLNN-	 UU/0SLLJJ .6*,-DD,A#)137K1L#.$$U+_ 3r'   )rV   rQ   rR   rS   )r@   rA   rB   rC   rD   r   float32int32rE   r3   r   r5   rf   r   rh   rl   rk   rJ   rK   rL   s   @r%   rN   rN      s     ',}},1KK	FF F $kk	F
 
F F'XX__'-0'<?'	'RXX__-0<?	8,ehhoo 8, 8,r'   rN   c                      ^  \ rS rSrSrSSSSSS\R                  \R                  4S\S\S	\	\   S
\	\
   S\
S\S\S\S\R                  S\R                  4U 4S jjjrSS\4S jjrS rSrU =r$ )rb   i  a"  
This module implements a embedding layer with int4 fake quantized
grouped per channel weights.

args:
    group_size: the number of elements in each quantized group for weights
    scale_precision: precision of per group scales
    zero_point_precision: precision of per group zero points
Nr?   F    r   r   r   r   r   r   r   rQ   rR   rS   c           
      t   > [        [        R                  USSU	U
S9n[        TU ]  " UUUUUUUU/UQ70 UD6  g )NT)r3   rQ   is_symmetric
is_dynamicrR   rS   )r
   r   INT4r   r   )r!   r   r   r   r   r   r   r   rQ   rR   rS   r"   r#   r   r$   s                 r%   r   #Int4WeightOnlyQATEmbedding.__init__  sb     /##!+!5
 		
 	
 	
r'   enabledc                 $    XR                   l        g r*   )r    r   )r!   r   s     r%   enable_fake_quant,Int4WeightOnlyQATEmbedding.enable_fake_quant<  s    -4""*r'   c                 &    U R                  S5        g )NF)r   )r!   s    r%   disable_fake_quant-Int4WeightOnlyQATEmbedding.disable_fake_quant?  s    u%r'    )T)r@   rA   rB   rC   rD   r   r   r   rE   r   rF   rG   r3   r   r   r   rJ   rK   rL   s   @r%   rb   rb     s     &*$(#(',}},1KK"
"
 "
 c]	"

 5/"
 "
 !"
 "
 "
 "
 $kk"
 "
H5 5& &r'   rb   c                     ^  \ rS rSrSrSSSSSS\R                  \R                  S\R                  4
S\S\S	\	\   S
\	\
   S\
S\S\S\S\R                  S\R                  S\R                  S\R                  4U 4S jjjrS rSrU =r$ )rv   iC  z[
This module implements a embedding layer with int4 quantized
grouped per channel weights.
Nr?   Fr   r   r   r   r   r   r   r   rQ   rR   rS   r2   rq   c           	        > [         TU ]  5         Xl        X l        X0l        X@l        XPl        X`l        Xpl        SU l	        Xl
        Xl        Xl        Xl        U R                  S[        R                   " X4[        R"                  US95        U R                  S[        R                   " XU-  4U	US95        U R                  S[        R                   " XU-  4U
US95        g )NrU   r+   )r3   r2   rz   r{   )r   r   r   r   r   r   r   r   r   rV   rQ   rR   rS   rq   register_bufferr   emptyry   )r!   r   r   r   r   r   r   r   rQ   rR   rS   r2   rq   r$   s                r%   r    Int4WeightOnlyEmbedding.__init__I  s     	 -*& ""4 $.$8!( 	KK/uzz&	
 	KK*!<=%	
 	KK*!<=*	
r'   c                 v   SSK Jn  [        U R                  5      u  p4U" U R                  SU R
                  /U R                  U R                  [        R                  UUU R                  S9n[        R                  " UUU R                  U R                  U R                  U R                   U R"                  5      $ )Nr   )dequantize_affiner   )rq   )%torchao.quantization.quant_primitivesr   r   rV   r+   rQ   rz   r{   r   ry   rq   r,   r-   r   r   r   r   r   )r!   r(   r   r~   r   w_dqs         r%   r/   Int4WeightOnlyEmbedding.forward  s    	
 $DNN3

 !KK JJOOJJ**	
 {{MMNN##KK
 	
r'   )rV   r   rQ   r   r   r   rq   r   r   rR   r   rS   )r@   rA   rB   rC   rD   r   r   r   rE   r   rF   rG   r3   r2   r   r/   rJ   rK   rL   s   @r%   rv   rv   C  s     &*$(#(',}},1KK#$)MM7
7
 7
 c]	7

 5/7
 7
 !7
 7
 7
 7
 $kk7
 7
 kk7
 7
r
 
r'   rv   )typingr   r   r   torch.nn.functionalr5   
functionalr,   r   r   torchao.quantization.unifiedr   torchao.quantization.utilsr   fake_quantize_configr	   r
   fake_quantizerr   utilsr   r6   r   rN   rb   rf   rv   r   r'   r%   <module>r      sx    !    > 9 B .
iUXX// ib,*: ,D3&!7 3&l[
ehhoo [
r'   