
    ΅i                         % S SK r S SKJr  S SKrS SKJr  S SKJr  S SKJ	r	   " S S\5      r
 " S S	\5      r " S
 S\5      r " S S\5      rSq\\R                   S-     S-  \S'   S\R$                  4S jrg)    N)chain)_get_device_index)Function)commc                   4    \ rS rSr\S 5       r\S 5       rSrg)	Broadcast
   c                 2  ^ [        S U 5       5      (       d  [        S5      eU Vs/ s H  n[        US5      PM     nnXl        [	        U5      S:X  a  g[	        U5      U l        US   R                  5       U l        [        R                  " X R                  5      n/ n[        U R                  SS  5       H*  u  mnU(       a  M  UR                  U4S jU 5       5        M,     U R                  " U6   [        [        R                   " U5      5      $ s  snf )Nc              3   R   #    U  H  oR                   R                  S :g  v   M     g7fcpuNdevicetype.0is     V/home/james-whalen/.local/lib/python3.13/site-packages/torch/nn/parallel/_functions.py	<genexpr>$Broadcast.forward.<locals>.<genexpr>        :6a88==E)6   %'z2Broadcast function not implemented for CPU tensorsTr       c              3   ,   >#    U  H	  oT   v   M     g 7fNr   )r   outputidxs     r   r   r      s     *MW6#;Ws   )allAssertionErrorr   target_gpuslen
num_inputs
get_deviceinput_devicer   broadcast_coalesced	enumerateneeds_input_gradextendmark_non_differentiabletupler   from_iterable)ctxr!   inputsxoutputsnon_differentiablesinput_requires_gradr   s          @r   forwardBroadcast.forward   s    :6::: !UVV;FG;a(D1;G%v;!V!!9//1**6??C (1#2F2Fqr2J(K$C$&&#***MW*MM )L 	##%89U((122 Hs   Dc                 `    S[         R                  " U R                  U R                  /UQ76 -   $ )Nr   )ReduceAddCoalescedapplyr%   r#   r-   grad_outputss     r   backwardBroadcast.backward   s4    +11cnn
/;
 
 	
    r   N__name__
__module____qualname____firstlineno__staticmethodr3   r:   __static_attributes__r   r<   r   r   r   
   s(    3 3" 
 
r<   r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r6   $   c                    [        S[        U5      U5       Vs/ s H  oCU   R                  5       PM     snU l        [        S[        U5      U5       Vs/ s H	  oCXDU-    PM     nn[        R
                  " XQ5      $ s  snf s  snf )Nr   )ranger"   r$   r!   r   reduce_add_coalesced)r-   destinationr#   gradsr   grads_s         r   r3   ReduceAddCoalesced.forward%   s     ,1CJ
+K
+Ka!H!+K
 6;1c%j*5UV5U
N+5UV((==
 Ws   A=Bc                 J    S[         R                  " U R                  /UQ76 -   $ )NNN)r   r7   r!   r8   s     r   r:   ReduceAddCoalesced.backward.   s(    
 OOCOO;l;< 	<r<   r   Nr=   r   r<   r   r6   r6   $   s(    > > < <r<   r6   c                   4    \ rS rSr\S 5       r\S 5       rSrg)Gather6   c                   ^  [        S U 5       5      (       d  [        S5      eUS:X  a  ST l        O[        US5      nUT l        UT l        [        S U 5       5      T l        [        S U 5       5      (       a5  US:X  a/  [        S U 5       5      n[        R                  " S	S
S9  ST l	        OST l	        [        U 4S jU 5       5      T l
        [        R                  " UT R                  T R                  5      $ )Nc              3   R   #    U  H  oR                   R                  S :g  v   M     g7fr   r   r   s     r   r   !Gather.forward.<locals>.<genexpr>9   r   r   z/Gather function not implemented for CPU tensorsr   Tc              3   @   #    U  H  oR                  5       v   M     g 7fr   )r$   r   s     r   r   rU   A   s     >v!||~~vs   c              3   F   #    U  H  oR                  5       S :H  v   M     g7fr   N)dimr   ts     r   r   rU   B   s     ,Vuuw!|Vs   !r   c              3   B   #    U  H  oR                  S 5      v   M     g7f)r   N)viewrZ   s     r   r   rU   C   s     5f66!99fs   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.   )
stacklevelFc              3   X   >#    U  H  oR                  TR                  5      v   M!     g 7fr   )sizerY   )r   r   r-   s     r   r   rU   M   s     @Aswws   '*)r   r    target_devicer   rY   r+   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr   gather)r-   rb   rY   r.   s   `   r   r3   Gather.forward7   s    :6::: !RSSE! %C-mTBM -C>v>>,V,,,5f55FMM' 	 %)C!$)C!@@@{{6377C,=,=>>r<   c                     [         R                  U R                  U R                  U R                  U5      nU R
                  (       a  [        S U 5       5      nSU-   $ )Nc              3   *   #    U  H	  oS    v   M     g7frX   r   )r   gs     r   r   "Gather.backward.<locals>.<genexpr>V   s     #B/QaD/s   rN   )Scatterr7   rc   rg   rY   rf   r+   )r-   grad_outputscattered_gradss      r   r:   Gather.backwardP   sK    !--NNCOOSWWk
   ##B/#BBOo--r<   r   Nr=   r   r<   r   rQ   rQ   6   s(    ? ?0 . .r<   rQ   c                   4    \ rS rSr\S 5       r\S 5       rSrg)rn   Z   c                    U Vs/ s H  n[        US5      PM     nnX0l        UR                  R                  S:w  a  UR	                  5       OSU l        S n[        R                  R                  5       (       a?  U R
                  S:X  a/  U Vs/ s H"  n[        [        R                  " U5      5      PM$     nn[        R                  " XAX R                  U5      nUb  [        U5       Hq  u  p[        R                  R                  X   5         [        R                  R                  5       nUR                  Xi   5        U
R!                  U5        S S S 5        Ms     U$ s  snf s  snf ! , (       d  f       M  = f)NTr   )r   rY   r   r   r$   r%   torchacceleratoris_available_get_streamr   scatterr'   device_indexcurrent_streamwait_streamrecord_stream)r-   r!   chunk_sizesrY   inputr/   streamsr   r0   r   r   main_streams               r   r3   Scatter.forward[   s,   ;FG;a(D1;G161B1Be1K5++-QS))++0@0@B0FGRS{V{5<<#78{GS,,u;Q&w/	&&33KNC"'"3"3"B"B"DK++GJ7((5 DC 0
  H T
 DCs   E)E$AE))
E8	c                 b    S S S [         R                  " U R                  U R                  /UQ76 4$ r   )rQ   r7   r%   rY   )r-   ro   s     r   r:   Scatter.backwardn   s+    T4c.>.>!V+!VVVr<   r   Nr=   r   r<   r   rn   rn   Z   s*     $ W Wr<   rn   _streamsr   c                 Z   U R                   S:X  d#  [        R                  R                  5       (       d  g[        R                  R	                  5       R                   U R                   :w  aA  [        S[        R                  R	                  5       R                    SU R                    35      e[        c"  S/[        R                  R                  5       -  q[        U R                     c1  [        R                  " U R                  5      [        U R                  '   [        U R                     $ )zBGet a background stream for copying between CPU and target device.r   Nz"Expected current accelerator type z to match device type )
r   rv   rw   rx   current_acceleratorr    r   device_countindexStream)r   s    r   ry   ry   w   s     {{e5#4#4#A#A#C#C,,.33v{{B01B1B1V1V1X1]1]0^ _$$*KK=2
 	
 6E--::<<%!&fll!;FLL!!r<   )rd   	itertoolsr   rv   torch._utilsr   torch.autogradr   torch.nn.parallelr   r   r6   rQ   rn   r   listr   __annotations__r   ry   r   r<   r   <module>r      sy       * # "
 
4< <$!.X !.HWh W4 .2$u||d"
#d
* 1" "r<   