
    oic1                        S SK r S SKrS SKJr  S SKJr  S SKrS SKJr  S SKrS SK	J
r
JrJr  S SKJr  S SKJr  SS	/rS
q\ R&                  R)                  SS5      S:H  r\=(       a    \ R&                  R)                  SS5      S:H  r\R.                  R1                  \S9r\R.                  R5                  \S9r\R8                       SS j5       r  " S S\R<                  R>                  5      r SS jr!S r"SS jr#g)    N)OrderedDict)
MethodType)fnmatch)_infer_device_type_get_device_moduleget_device_states)set_device_states)DEVICE_TYPEpatch_tiled_mlp	patch_mlpTUNSLOTH_ENABLE_LOGGING01UNSLOTH_ENABLE_TILED_LOGGINGdevice_typec                     US-  S-  S-  U-  SU -  U-  -
  nSU -  SU-  -   U -   n[         R                  " XV-  5      n[        XGU-  U-  5      nU$ )N      
   )mathceilmax)hdmlp_sizenbytes	target_gbpadded_length	numeratordenominatormax_flat_qlens           O/home/james-whalen/.local/lib/python3.13/site-packages/unsloth_zoo/tiled_mlp.pyget_max_flat_qlenr#   ,   sl     D 4'$.71R4=IIb51X:%*KIIi56M(F-'WXM    c                   h    \ rS rSr\S 5       r\S 5       r\\S 5       5       r\\	S 5       5       r
Srg)TiledMLPB   c                     [        U [        5      (       aS  U(       d  U SS  H  nUR                  / 5        M     [        U SS 5       H  u  p4X   R                  U5        M     U S   $ U $ )z4Extract main output and append extras to their lists   Nr   )
isinstancetupleappend	enumerate)outputextra_lists_iextras        r"   handle_outputTiledMLP.handle_outputC   sj     fe$$A&&r* $ &fQRj1%%e, 2!9r$   c                 t    U(       d  U $ U Vs/ s H  n[         R                  " USS9PM     nnU /UQ7$ s  snf )zReconstruct original structuredim)torchcat)main_outputr/   
extra_listextrass       r"   structure_outputTiledMLP.structure_outputQ   s@     BMN+J%))JB/+N%f%% Os   5c           
      x   SU l         UR                  u  pxn	XPl        U(       a"  [        XgU-  5      n
[	        SXx-  X-  -
  5      nO&[        Xx-  [        [	        SU5      Xx-  5      5      u  pU
/U-  nUS:w  a  UR                  U5        Xl        [        (       a  [        (       d  [        (       a  [        SUS    35        Sq[        U5      U l        Xl        X l        Uc  Sn[!        U5      U l        U R%                  U5        U R"                  (       ag  [&        R(                  " 5       U l        SU l        [/        U R                  5      n[1        USS5      (       a  SU l        [3        U5      u  U l        U l        SnS n/ nUR9                  SU	5      n[&        R:                  " 5          [&        R<                  " X0R                  SS	9n[?        U5       H  u  nnURA                  S5      n[B        RE                  U" U5      U5      nUc+  [&        RF                  " XxU	URH                  URJ                  S
9nURM                  5       nUR9                  S5      RO                  SUUS9RQ                  U5      RS                  U5        UU-  nM     S S S 5        [B        RU                  UU5      $ ! , (       d  f       N$= f)Nr6   r   r)   z=Unsloth: Enabling TiledMLP to reduce VRAM usage! chunk size: FT_initializedr7   )devicedtyper8   startlength)+	shard_dimshape
num_shardsminr   divmodr,   split_sizes
FIRST_PASSr   r   printr   r   mlp_forward
mlp_moduleboolpreserve_rng_statesave_for_backwardr9   get_rng_statefwd_cpu_statehad_device_in_fwdr   getattrr   fwd_devicesfwd_device_statesviewno_gradsplitr-   	unsqueezer&   r3   emptyrC   rD   numelnarrowview_ascopy_r>   )ctxrP   rQ   xrS   rJ   r!   BSHqlen_chunk_size	remainderrM   device_module	start_idxfinal_outputextra_outputsx_splitsr1   x_splitout
split_sizes                         r"   forwardTiledMLP.forwardZ   sd    ''a#!-15OAqs_%AABI)/SQ
9KQS5Q)R&O&'
2>;--i8%J116R6RQR]^_R`QabcJ,Q/%#%!%!%&8!9 	a  !! % 3 3 5C$)C!.s?M}ne<<(,%9J19M6!6 	FF2qM]]_{{1oo1=H'1
7!++A.,,[-A=Q'#(;;qQszzQTQZQZ#[L$]]_
!!"%,,#% -  ''"55:Z'	 2  ((}EE _s   *C#J++
J9c                    / nU R                   S   nUR                  u  pVnU R                  (       a  U R                  (       a  U R                  n[
        R                  R                  X0R                  U R                  S9   U R                  (       aZ  [
        R                  " U R                  5        U R                  (       a)  [        U R                  U R                  U R                  S9  [
        R                  " U[
        R                  S9nUR                  SU5      n[
        R                   " X@R"                  SS9n	Sn
/ n[%        U	5       GH  u  pUR'                  S5      nUR)                  5       nUR                  S5      R+                  SU
US9R-                  U5      nUR                  S5      R+                  SU
US9R-                  U5      nUR/                  S5        Xl        [
        R2                  " 5          [4        R7                  U R9                  U5      U5      nS S S 5        [
        R:                  R=                  WU5        X-  n
GM     S S S 5        S S WS S S 4$ ! , (       d  f       NG= f! , (       d  f       N'= f)	Nr   )devicesenabledr   r   )memory_formatrB   r7   rE   T)saved_tensorsrI   rS   rW   rY   r9   randomfork_rngr   set_rng_staterV   r	   rZ   
zeros_likepreserve_formatr[   r]   rM   r-   r^   r`   ra   rb   requires_grad_gradenable_gradr&   r3   rP   autogradbackward)rd   grad_outputargsrng_devicesre   rf   rg   rh   x_gradientsro   rl   rn   r1   rp   rr   x_grad_slicegrad_output_shardoutputss                     r"   r   TiledMLP.backward   s    a ''a!!c&;&;//K\\"")?)?S__ # 
 %%##C$5$56((%coos7L7LZ]ZiZij**1E<Q<QRKr1A{{1oo1=HIM'1
!++A.$]]_
*//3::#%  ;   ''"	  %0$4$4R$8$?$?#% %@ % ''"	 " &&t,+&&(&44S__W5M}]G ) ''1BC'	+ 2
J T;dD88 )(?
 
s%   F I9&I('1I9(
I62I99
J N)__name__
__module____qualname____firstlineno__staticmethodr3   r>   torch_amp_custom_fwdrs   torch_amp_custom_bwdr   __static_attributes__r   r$   r"   r&   r&   B   sc      & & ?F  ?FB +9  +9r$   r&   c                   ^ ^^^ SmT R                  5        H2  u  pE[        U[        R                  R                  5      (       d  M0  Sm  O   T R
                  R                  T l        T R
                  R                  T l        U UUU4S jnU U4S jnU(       a  [        UT 5      T l        T $ [        UT 5      T l        T $ )NFTc                    > UR                   u  p#nX#-  n TR                  R                  n[        U[        [
        45      (       a  US   nTc3  [        R                  R                  S5      u  pxUS-  S-  S-  n	U	S-  n	U	m[        UUUR                  5       TTS9n
[        XZ5      u  p[        SU5      nU R                  R                  X R                  5      n[         R#                  UTUTX5      $ !   US-  n N= f)Nr      r         ?)r   r   r   r   r   r)   )rI   configintermediate_sizer*   listr+   r9   cudamem_get_infor#   element_sizerL   r   _unsloth_forward__get__	__class__r&   apply)selfre   bszqlenr   	flat_qlenr   freetotalfree_gbr!   n_shardsrj   inner_forwardrQ   r   rS   r   s                 r"   tiled_forward_target_gb*patch_mlp.<locals>.tiled_forward_target_gb   s   2H		' * 1 1 C C+dE];;$5a$8! **11!4KDTkD(4/GmGI)(^^%!)
 %Y>q(# --55dNNK~~mZ<NPXhh+	' "Qs   6C4 4C=c                    > UR                   u  p#n[        SU5      n[        X55      u  pg[        SU5      nU R                  R	                  X R
                  5      n[        R                  UT	UT
Xe5      $ )Nr)   )rI   r   rL   r   r   r   r&   r   )r   re   rf   rg   rh   
chunk_sizer   rj   r   rQ   rS   s            r"   tiled_forward_arctic_size,patch_mlp.<locals>.tiled_forward_arctic_size   si    ''aAY
$Q3q(# --55dNNK~~mZ<NPXeer$   )
named_modulesr*   r9   nnDropoutr   rs   _original_forwardr   r   )	rQ   target_arcticr   r   nmr   r   rS   s	   ` ``    @r"   r   r      s    ((*a))**!% + $.#7#7#?#?J ","6"6">">Ji i>	f '(A:N
  ((?L
r$   c                     U R                  5       nU HK  nUR                  5       nSU;   d  SU;   a  [        X$5      (       a    gM3  UR                  U5      (       d  MK    g   g)N*?TF)lowerr   endswith)namecustom_modules
name_lowercustom_modulecustom_module_lowers        r"   is_custom_moduler   	  sd    J'+113-3-#7z77 8 ""#677 ( r$   c                 f   UR                  S5      nUS   S;   a  SnOSn[        U5      S:  a   [        US   5      nOS n[        U5      S:X  a-  US   R                  S	5       Vs/ s H  of(       d  M  UPM     nnO/ nS
nU R                  5        H  u  pSnU(       a  [	        X5      (       a  SnOXU	R                  5       R                  U5      (       a  SnO1U	R                  S5      (       a  [        U
5      R                  S;   a  SnU(       d  M  [        U
UUUS9  M     U $ !   S n N= fs  snf )N:r   )arcticr   TFr)   r      ,)z.mlpz.ffnz.feed_forwardz.ffz.densereludensez.block_sparse_moez.mixer)NemotronHMLPNemotronHMOE)r   r   r   )
r]   lenfloatr   r   r   r   typer   r   )modelpatch_options_strr   patch_options_strsr   r   re   r   attr_suffixesr   moduleshould_patchs               r"   r   r     s<   *005!/
"	034I 	
!#%7%:%@%@%EK%E!%EKM ++-55#ZZ\""=11L]]8$$f)>)>Bb)bL< -% -	 .$ LK	I
 Ls   D% &
D.4D.%D+)i   i 8  r   r   @   )TN   )r   r   )$osr   collectionsr   typesr   	functoolsr   r9   torch.utils.checkpointr   r   r   "unsloth_zoo.gradient_checkpointingr	   unsloth_zoo.device_typer
   __all__rN   environgetr   r   amp
custom_fwdr   
custom_bwdr   cacher#   r   Functionr&   r   r   r   r   r$   r"   <module>r      s  " 
  #     
 A / 
 
(@#F#M 5t"**..Igil:mqt:t yy++++F yy++++F 
	 & H9u~~&& H9T;z
.r$   