َ
    ®—‎h<E  م                   َ&  • S SK r S SKrS SKJr  S SKJr  S SKJrJrJ	r	J
r
  S SKrS SKJr  S SKJrJr  S SKJr  SS	KJr  SS
KJrJrJrJr  SSKJr  SSKJr   " S S\\5      r\ " S S\5      5       r \" \ 5      S\RB                  RD                  S\ S\RB                  RD                  4S j5       r#\ " S S\5      5       r$ " S S\$5      r%\" \$5      S\RB                  RD                  S\$S\RB                  RD                  4S j5       r&\ " S S\5      5       r' " S S\'5      r(\" \'5      S\RB                  RD                  S\'S\RB                  RD                  4S j5       r) " S  S!\5      r*S"\RB                  RD                  S#\
\S$4   SS4S% jr+g)&é    N)ع	dataclass)عEnum)عAnyعListعOptionalعTuple)عAOBaseConfig)ع_QUANTIZE_CONFIG_HANDLERع register_quantize_module_handler)عTwoStepQuantizeré   )عFakeQuantizedEmbedding)عFakeQuantizeConfigعFakeQuantizeConfigBaseعIntxFakeQuantizeConfigع_infer_fake_quantize_configs)عFakeQuantizedLinear©ع_log_deprecation_warningc                   َ    • \ rS rSrSrSrSrSrg)عQATStepé!   zR
Enum value for the `step` field in :class:`~torchao.quantization.qat.QATConfig`.
عprepareعconvert© N)ع__name__ع
__module__ع__qualname__ع__firstlineno__ع__doc__عPREPAREعCONVERTع__static_attributes__r   َ    عV/home/james-whalen/.local/lib/python3.13/site-packages/torchao/quantization/qat/api.pyr   r   !   s   † ٌً €GطƒGr$   r   c            
       َک   • \ rS rSr% Sr\\   \S'   \\   \S'   \\   \S'   \	\S'      SSS	.S\\   S\\   S\\   S\	4S
 jjjr
S rSrg)ع	QATConfigé*   a>  
Config for applying quantization-aware training (QAT) to a `torch.nn.Module`,
to be used with :func:`~torchao.quantization.quant_api.quantize_`.

This config has two steps, "prepare" and "convert". The prepare step applies
"fake" quantization to the model and should be applied before training, while
the convert step converts the model into an actual quantized model. Fake
quantization here refers to simulating the quantization numerics (e.g. int4)
using high precision arithmetic (e.g. bf16), with the goal of reducing
eventual degradation from quantization.

There are two ways to use this config. The first involves passing a base
post-training quantization (PTQ) config, which we will use to automatically
infer the corresponding fake quantization schemes to use in the prepare phase.
In the convert phase, we will then apply the base PTQ config to the model.
This will be the most common use case.

Example usage::

    from torchao.quantization import (
        quantize_,
        Int8DynamicActivationInt4WeightConfig,
    )
    from torchao.quantization.qat import QATConfig

    base_config = Int8DynamicActivationInt4WeightConfig(group_size=32)
    quantize_(model, QATConfig(base_config, step="prepare"))
    train_loop(model)
    quantize_(model, QATConfig(base_config, step="convert"))

Currently only the following are supported as base configs:

    - :class:`~torchao.quantization.Int8DynamicActivationInt4WeightConfig`
    - :class:`~torchao.quantization.Int4WeightOnlyConfig`

The second way to use this config involves specifying the fake quantization
schemes directly. Users will pass in :class:`~torchao.quantization.qat.FakeQuantizeConfigBase`
for weights and/or activations instead of the base PTQ config. This use case
is mostly for experimentation, e.g. when the corresponding PTQ config does
not exist yet.

Example usage::

    from torchao.quantization import quantize_
    from torchao.quantization.qat import IntxFakeQuantizeConfig

    activation_config = IntxFakeQuantizeConfig(
        torch.int8, "per_token", is_symmetric=False,
    )
    weight_config = IntxFakeQuantizeConfig(
        torch.int4, group_size=32, is_symmetric=True,
    )
    qat_config = QATConfig(
        # must specify one of `base_config` or `weight_config`
        activation_config=act_config,
        weight_config=weight_config,
        step="prepare",
    )
    quantize_(model, qat_config)

Args:
    base_config (Optional[AOBaseConfig]): Base PTQ config to infer the fake
        quantization configs during the prepare phase, and to apply directly
        during the convert phase.
    activation_config (Optional[FakeQuantizeConfigBase]): Custom fake
        quantization config for input activations, always optional.
        Must be None if `base_config` is used.
    weight_config (Optional[FakeQuantizeConfigBase]): Custom fake quantization
        config for weights. Must be None if `base_config` is used.

Keyword args:
    step (str): One of "prepare" or "convert", determines the QAT phase

Raises:
    ValueError: If `base_config` and `activation_config` are both specified
    ValueError: If `base_config` and `weight_config` are both specified
    ValueError: If none of `base_config`, `activation_config`, or
        `weight_config` are specified
    ValueError: If either `activation_config` or `weight_config` is specified
         and `step` is "convert"
    ValueError: If `step` is not one of "prepare" or "convert"
    ValueError: If the config is applied on a module that is not a
        `torch.nn.Linear` or `torch.nn.Embedding`, or it is applied on
        `torch.nn.Embedding` with an activation config
عbase_configعactivation_configعweight_configعstepNr   )r,   c                َT   • Xl         X l        X0l        X@l        U R	                  5         g ©N)r)   r*   r+   r,   ع__post_init__)عselfr)   r*   r+   r,   s        r%   ع__init__عQATConfig.__init__‰   s'   € ً 'شط!2شط*شطŒ	ط×رصr$   c                 َx  • [         R                  R                  S5        U R                  R	                  5       U l        [
         Vs/ s H  oR                  PM     nnU R                  U;  a  [        SU 35      eU R                  b  U R                  b  [        S5      eU R                  b  U R                  b  [        S5      eU R                  [
        R                  :X  a<  [        U R                  U R                  U R                  45      (       d  [        S5      eU R                  [
        R                  :X  a%  U R                  c  U R                  b  [        S5      e[        U R                  [        5      (       a4  U R                  R                   R"                  n[        U SU SU S	35      eg s  snf )
Nz"torchao.quantization.qat.QATConfigz`step` must be one of z9Cannot specify both `base_config` and `activation_config`z5Cannot specify both `base_config` and `weight_config`zWMust specify `base_config`, `activation_config`, or `weight_config` in the prepare stepzICannot specify `weight_config` or `activation_config` in the convert stepz~ was passed as `base_config`. Did you mean to do the following instead?
    qat_config = QATConfig(
        activation_config=z(...),
        weight_config=z$(...),
        step="prepare",
    ))عtorchع_Cع_log_api_usage_oncer,   عlowerr   عvalueع
ValueErrorr)   r*   r+   r!   عanyr"   ع
isinstancer   ع	__class__r   )r0   عsعall_step_valuesعconfig_types       r%   r/   عQATConfig.__post_init__—   s”  € ـڈ‰×$ر$ذ%IشJط—I‘I—O‘Oس%ˆŒ	ف,3س4ھG qں7œ7©Gˆذ4طڈ9‰9کOس+ـذ5°oذ5FذGسHذHط×رر'¨D×,Bر,Bر,NـطKَً ً ×رر'¨D×,>ر,>ر,JـذTسUذUطڈ9‰9œں™س'´ط×رکt×5ر5°t×7Iر7IذJ÷1
ٌ 1
ô طiَً ً ڈ9‰9œں™س'ط×"ر"ر.°$×2Dر2Dر2Pنط[َً ô گd×&ر&ش(>×?ر?ط×*ر*×4ر4×=ر=ˆKـطگ-ً  -à-8¨Mً :)ط)4¨ً 6ًَً ً @ùٍ- 5s   ءF7)r*   r)   r,   r+   )NNN)r   r   r   r   r    r   r	   ع__annotations__r   r   r1   r/   r#   r   r$   r%   r'   r'   *   s‰   ‡ ٌTًl ک,ر'س'طذ 6ر7س7طذ2ر3س3ط
ƒMً /3ط>Bط:>ً	ً "ٍàکlر+ًً $ذ$:ر;ًً  ذ 6ر7ً	ً ِُ"r$   r'   عmoduleعconfigعreturnc                 َھ  • UR                   nUR                  nU[        R                  :X  Ga  Ub  [	        U5      u  pEOUR
                  nUR                  n[        U [        R                  R                  5      (       aV  SSKJnJn  [        XV5      (       a'  Ub  [        XF5      (       d   eUR                  XU5      $ [        R                  " XU5      $ [        U [        R                  R                   5      (       a$  Ub  [#        S5      e[$        R&                  " X5      $ [#        S[)        U 5      -  5      eU[        R*                  :X  d
   SU-  5       eUR
                  b   S5       eUR                  b   S5       e[        U [        [$        45      (       d  U $ 0 nS	n	U R,                  R.                  n[        U[0        5      (       aE  UR2                  (       a4  U R,                  R4                  US
'   U R,                  R6                  US'   Sn	[        U [        5      (       a  U R9                  5       n O5[        U [$        5      (       a  U R;                  5       n O[#        SU  S35      eUbd  U	(       aD  [=        US5      (       a3  [>        R@                  " S5        [B        RD                  " U5      nSUl#        [H        [)        U5         " X40 UD6$ U $ )ad  
During the prepare step, perform module swap to apply fake quantization.
If the base PTQ config is specified, derive the fake quantization configs from it.

During the convert step, first perform module swap to revert all fake quantized
modules to the corresponding built-in `torch.nn.Module`s, then apply the
base config directly to quantize the module.
Nr   )عNVFP4FakeQuantizeConfigعNVFP4FakeQuantizedLinearْ;Activation fake quantization is not supported for embeddingْ-Module of type '%s' does not have QAT supportz!unexpected step '%s' in QATConfigzunexpected `activation_config`zunexpected `weight_config`Fعcustom_scaleعcustom_zero_pointTzEncountered unexpected module z, should never happenعintx_choose_qparams_algorithmz'Disabling intx_choose_qparams_algorithm)%r)   r,   r   r!   r   r*   r+   r;   r4   عnnعLinearعtorchao.prototype.qatrF   rG   عfrom_linearr   ع	Embeddingr9   r   عfrom_embeddingعtyper"   عweight_fake_quantizerrC   r   عrange_learningعscaleع
zero_pointع	to_linearعto_embeddingعhasattrعloggingعdebugعcopyعdeepcopyrL   r
   )
rB   rC   r)   r,   ع
act_configr+   rF   rG   عkwargsعhas_custom_scale_and_zero_points
             r%   ع_qat_config_transformrb   ¼   sµ  € ً  ×$ر$€Kطڈ;‰;€DطŒwڈ‰شطر"ـ*Fہ{س*Sر'ˆZکà×1ر1ˆJط"×0ر0ˆMـگfœeںh™hںo™o×.ر.÷ô
 ک-×AرAط!ر)¬Zط÷.ٌ .ً ً ً 0×;ر;ط¨َً ô +×6ز6ط¨َً ô ک¤§،× 2ر 2×3ر3طر%ـ طQَً ô *×8ز8¸سOذOنط?ؤ$ہvأ,رNَً ً
 ”w—‘س&ذRذ(Kبdر(RسRذ&ط×'ر'ر/ذQذ1QسQذ/ط×#ر#ر+ذIذ-IسIذ+ô ک&ش#6ش8Nذ"O×PرPطˆMً ˆط*/ذ'ط×4ر4×;ر;ˆنگ}ش&<×=ر=ط×,×,à%+×%Aر%A×%Gر%GˆFگ>ر"ط*0×*Fر*F×*Qر*QˆFذ&ر'ط.2ذ+ô گfش1×2ر2ط×%ر%س'‰Fـکش 6×7ر7ط×(ر(س*‰Fنط0°°ذ8MذNَً ً ر"و.´7طذ<÷4ٌ 4ô —’ذGشHـ"ںmڑm¨Kس8گط<@گش9ـ+¬D°س,=ز>طٌط'-ٌً ً ˆMr$   c                   َH   • \ rS rSr% SrSr\\   \S'   Sr	\\   \S'   S r
Srg)ع#IntXQuantizationAwareTrainingConfigi#  ay  
(Deprecated) Please use :class:`~torchao.quantization.qat.QATConfig` instead.

Config for applying fake quantization to a `torch.nn.Module`.
to be used with :func:`~torchao.quantization.quant_api.quantize_`.

Example usage::

    from torchao.quantization import quantize_
    from torchao.quantization.qat import IntxFakeQuantizeConfig
    activation_config = IntxFakeQuantizeConfig(
        torch.int8, "per_token", is_symmetric=False,
    )
    weight_config = IntxFakeQuantizeConfig(
        torch.int4, group_size=32, is_symmetric=True,
    )
    quantize_(
        model,
        IntXQuantizationAwareTrainingConfig(activation_config, weight_config),
    )

Note: If the config is applied on a module that is not
`torch.nn.Linear` or `torch.nn.Embedding`, or it is applied on
`torch.nn.Embedding` with an activation config, then we will raise
ValueError as these are not supported.
Nr*   r+   c                 َ   • [        U 5        g r.   r   ©r0   s    r%   r/   ع1IntXQuantizationAwareTrainingConfig.__post_init__C  َ
   € ـ  ص&r$   r   )r   r   r   r   r    r*   r   r   rA   r+   r/   r#   r   r$   r%   rd   rd   #  s/   ‡ ٌً6 ;?ذگxذ 6ر7س>ط6:€Mگ8ذ2ر3س:ُ'r$   rd   c                   َ   • \ rS rSrSrg)ع intx_quantization_aware_trainingiH  r   N©r   r   r   r   r#   r   r$   r%   rj   rj   H  َ   † عr$   rj   c                 َ€  • U nUR                   nUR                  n[        U[        R                  R
                  5      (       a  [        R                  " UUU5      $ [        U[        R                  R                  5      (       a$  Ub  [        S5      e[        R                  " X$5      $ [        S[        U5      -  5      e)NrH   rI   )r*   r+   r;   r4   rM   rN   r   rP   rQ   r9   r   rR   rS   )rB   rC   عmodr*   r+   s        r%   ع+_intx_quantization_aware_training_transformro   L  s©   € ً
 €Cط×0ر0ذط×(ر(€Mنگ#”u—x‘x—‘×'ر'ـ"×.ز.طططَ
ً 	
ô
 
گCœں™×+ر+×	,ر	,طر(ـطMَً ô &×4ز4°SسHذHنذHج4ذPSث9رTسUذUr$   c                   َ   • \ rS rSrSrS rSrg)ع'FromIntXQuantizationAwareTrainingConfigie  ag  
(Deprecated) Please use :class:`~torchao.quantization.qat.QATConfig` instead.

Config for converting a model with fake quantized modules,
such as :func:`~torchao.quantization.qat.linear.FakeQuantizedLinear`
and :func:`~torchao.quantization.qat.linear.FakeQuantizedEmbedding`,
back to model with the original, corresponding modules without
fake quantization. This should be used with
:func:`~torchao.quantization.quant_api.quantize_`.

Example usage::

    from torchao.quantization import quantize_
    quantize_(
        model_with_fake_quantized_linears,
        FromIntXQuantizationAwareTrainingConfig(),
    )
c                 َ   • [        U 5        g r.   r   rf   s    r%   r/   ع5FromIntXQuantizationAwareTrainingConfig.__post_init__z  rh   r$   r   N)r   r   r   r   r    r/   r#   r   r$   r%   rq   rq   e  s   † ٌُ&'r$   rq   c                   َ   • \ rS rSrSrg)ع%from_intx_quantization_aware_trainingi  r   Nrk   r   r$   r%   ru   ru     rl   r$   ru   rn   c                 َڑ   • [        U [        5      (       a  U R                  5       $ [        U [        5      (       a  U R	                  5       $ U $ )z„
If the given module is a fake quantized module, return the original
corresponding version of the module without fake quantization.
)r;   r   rX   r   rY   )rn   rC   s     r%   ع0_from_intx_quantization_aware_training_transformrw   ƒ  s@   € ô گ#ش*×+ر+طڈ}‰}‹ذـ	گCش/×	0ر	0ط×رس!ذ!àˆ
r$   c                   َ   • \ rS rSrSrS\\   4S jrS\R                  R                  S\S\S\R                  R                  4S	 jrS\R                  R                  S\S\S\R                  R                  4S
 jrSrg)عComposableQATQuantizeri”  a*  
Composable quantizer that users can use to apply multiple QAT quantizers easily.
Quantizers will be applied in the order they are specified in the constructor.

Note: the quantizers provided must apply to different modules in the model,
e.g. nn.Linear and nn.Embedding, otherwise the behavior will be undefined.

Example usage::

    my_quantizer = ComposableQATQuantizer([
        QATQuantizer1(),
        QATQuantizer2(),
        QATQuantizer3(),
    ])
    model = my_quantizer.prepare(model)
    train(model)
    model = my_quantizer.convert(model)
ع
quantizersc                 َN   • [         R                  R                  S5        Xl        g )Nz/torchao.quantization.qat.ComposableQATQuantizer)r4   r5   r6   rz   )r0   rz   s     r%   r1   عComposableQATQuantizer.__init__¨  s   € ـڈ‰×$ر$ذ%VشWط$چr$   عmodelعargsr`   rD   c                 َN   • U R                    H  nUR                  U5      nM     U$ r.   )rz   r   ©r0   r}   r~   r`   ع	quantizers        r%   r   عComposableQATQuantizer.prepare¬  َ'   € ً ںœˆIط×%ر% eس,ٹEٌ )àˆr$   c                 َN   • U R                    H  nUR                  U5      nM     U$ r.   )rz   r   r€   s        r%   r   عComposableQATQuantizer.convert³  rƒ   r$   )rz   N)r   r   r   r   r    r   r   r1   r4   rM   عModuler   r   r   r#   r   r$   r%   ry   ry   ”  sƒ   † ٌً&% 4ذ(8ر#9ô %ًط—X‘X—_‘_ًط-0ًط<?ًà	ڈ‰ڈ‰ôًط—X‘X—_‘_ًط-0ًط<?ًà	ڈ‰ڈ‰÷r$   ry   r}   عexample_inputs.c                 َہ   ^• [         R                  R                  S5        SSKJm  S[         R
                  R                  4U4S jjnU R                  U5        U " U6   g)zµ
(Prototype) Initialize the scales and zero points on all
:class:`~torchao.quantization.qat.fake_quantizer.IntxFakeQuantizerBase`
in the model based on the provided example inputs.
z3torchao.quantization.qat.initialize_fake_quantizersr   )عIntxFakeQuantizerعmc                 َ8   >• [        U T5      (       a  SU l        g g )NT)r;   ع_initialized)rٹ   r‰   s    €r%   ع_set_initializedع4initialize_fake_quantizers.<locals>._set_initializedة  s   ّ€ ـگaذ*×+ر+ط!ˆAچNً ,r$   N)r4   r5   r6   ع'torchao.quantization.qat.fake_quantizerr‰   rM   r†   عapply)r}   r‡   rچ   r‰   s      @r%   عinitialize_fake_quantizersr‘   »  sI   ّ€ ô 
‡HپH× ر ذ!VشWُ Jً"œEںH™HںO™O÷ "ً 
‡KپKذ ش!ظ	ˆ>زr$   ),r]   r[   عdataclassesr   عenumr   عtypingr   r   r   r   r4   عtorchao.core.configr	   ع%torchao.quantization.transform_moduler
   r   عtorchao.quantization.unifiedr   ع	embeddingr   عfake_quantize_configr   r   r   r   عlinearr   عutilsr   عstrr   r'   rM   r†   rb   rd   rj   ro   rq   ru   rw   ry   r‘   r   r$   r%   ع<module>r‌      sر  ًَ غ ف !ف ك -س -م ه ,÷ُ :ه -÷َ ُ (ف +ôˆcگ4ô ً ôNگَ Nَ ًNٌb " )س,ًcطڈH‰HڈO‰Oًcàًcً ‡XپX‡_پ_َcَ -ًcًL ô '¨,َ  'َ ً 'ôH	ذ'Jô 	ٌ "ذ"EسFًVطڈH‰HڈO‰OًVà/ًVً ‡XپX‡_پ_َVَ GًVً0 ô'¨lَ 'َ ً'ô2	ذ,Sô 	ٌ "ذ"IسJًط	ڈ‰ڈ‰ًà3ًً ‡XپX‡_پ_ََ Kًô $ذ-ô $ًNطڈ8‰8ڈ?‰?ًàک#کsک(‘Oًً 
ُr$   