
    hg-                         S SK r S SKJr  S SKJr  S SKrS SKrSSK	J
r
  SSKJr  \(       a  S SKr S SKrSr " S
 S\\S\4   5      rg! \ a    S	rSr Nf = f)    N)Mapping)TYPE_CHECKING   )config   )TensorFormatterTFc                      ^  \ rS rSrSU 4S jjrS rS rS rS\4S jr	S\
R                  S	\4S
 jrS\
R                  S	S4S jrS\
R                  S	\4S jrSrU =r$ )TorchFormatter(   c                 X   > [         TU ]  XS9  X0l        [        (       d  [	        S5      eg )N)featurestoken_per_repo_idz%PyTorch is required but not available)super__init__torch_tensor_kwargs_torch_availableImportError)selfr   r   r   	__class__s       ]/home/james-whalen/.local/lib/python3.13/site-packages/datasets/formatting/torch_formatter.pyr   TorchFormatter.__init__)   s0    (P#6 EFF      c                    ^ [        U[        5      (       a  U(       d  U$ US   m[        T[        R                  5      (       d  U$ [	        U4S jU 5       5      (       a  [        R
                  " U5      $ U$ )z@Smarter consolidation that only stacks when safe and beneficial.r   c              3     >#    U  H  n[        U[        R                  5      =(       aY    UR                  TR                  :H  =(       a9    UR                  TR                  :H  =(       a    UR
                  TR
                  :H  v   M     g 7fN)
isinstancetorchTensorshapedtypedevice).0xfirsts     r   	<genexpr>.TorchFormatter._consolidate.<locals>.<genexpr>;   sm      

 	 q%,,' )5;;&)5;;&) ELL() s   BB
)r   listr   r   allstack)r   columnr$   s     @r   _consolidateTorchFormatter._consolidate0   sl    &$''vM q	%..M  

 
 
 
 ;;v&&r   c                 b   [        U[        [        [        S5      45      (       a  U$ [        U[        R
                  [        R                  45      (       aD  [        R                  " UR                  [        R
                  5      (       a  UR                  5       $ [        R                  (       Ga  S[        R                  ;   a  SSKn[        XR                  R                  5      (       a  [        R                   " U5      nUR"                  S:X  a  USS2SS2[        R$                  4   n[        R&                  " USS5      nUR(                  R*                  (       d  [        R,                  " U5      nUR(                  R.                  (       d  UR1                  5       n[2        R4                  " U5      $ [        R6                  (       a,  S[        R                  ;   a  SSKJn  [        X5      (       a  U$ [        R<                  (       a0  S[        R                  ;   a  SS	KJ nJ!n  [        XU45      (       a  U$ [E        US
5      (       a/  [        U[2        RF                  5      (       d  URI                  5       n[        U[        R                  5      (       Ga  [        R                  " UR                  [        RJ                  5      (       GaQ  U RL                  R1                  5       nURO                  S[2        RP                  5      nUR                  [        RR                  [        RT                  4;   a  URW                  [        RP                  5      nU[2        RP                  :X  aA  UR(                  R.                  (       d  UR1                  5       n[2        R4                  " U5      $ UR(                  R.                  (       d  UR1                  5       nURY                  SU5        [2        RZ                  " U40 UD6$ UR                  [        R\                  :X  Ga;  [        R^                  " U[        R`                  " [        RP                  5      Rb                  :*  5      (       a  URW                  [        RP                  5      nU[2        RP                  :X  aA  UR(                  R.                  (       d  UR1                  5       n[2        R4                  " U5      $ UR(                  R.                  (       d  UR1                  5       nURY                  SU5        [2        RZ                  " U40 UD6$ URY                  SU5        [2        Rd                  " U40 UD6$ UR                  [        RP                  :X  aU  U[2        RP                  :X  aA  UR(                  R.                  (       d  UR1                  5       n[2        R4                  " U5      $ UR(                  R.                  (       d  UR1                  5       nURY                  SU5        [2        RZ                  " U40 UD6$ [        R                  " UR                  [        Rf                  5      (       Ga  U RL                  R1                  5       nURO                  S[2        Rh                  5      nUR                  [        Rh                  :X  aU  U[2        Rh                  :X  aA  UR(                  R.                  (       d  UR1                  5       n[2        R4                  " U5      $ UR(                  R.                  (       d  UR1                  5       nURY                  SU5        [2        RZ                  " U40 UD6$ UR(                  R.                  (       d  UR1                  5       n[2        R4                  " U5      $ [        U[        Rj                  5      (       Ga  U RL                  R1                  5       n[        R                  " UR                  [        RJ                  5      (       a7  URY                  S[2        RP                  5        [2        RZ                  " U40 UD6$ [        R                  " UR                  [        Rf                  5      (       a7  URY                  S[2        Rh                  5        [2        RZ                  " U40 UD6$ [2        RZ                  " U40 UD6$ [        U[l        [n        45      (       aC   [        Rp                  " U5      nUR                  Rr                  S;   a  U Ru                  U5      $  0 n	[        U[z        [|        45      (       a:  [        U[z        5      (       a  S[2        RP                  0n	OS[2        Rh                  0n	[2        Rd                  " U40 0 U	EU RL                  ED6$ ! [v        [x        4 a     Nf = f)z:Zero/low-copy tensor conversion with smart dtype handling.NPILr   r   torchvision)VideoReader
torchcodec)AudioDecoderVideoDecoder	__array__r    iuf)?r   strbytestypenp	characterndarray
issubdtyper    tolistr   PIL_AVAILABLEsysmodules	PIL.ImageImageasarrayndimnewaxismoveaxisflagsc_contiguousascontiguousarray	writeablecopyr   
from_numpyTORCHVISION_AVAILABLEtorchvision.ior1   TORCHCODEC_AVAILABLEtorchcodec.decodersr3   r4   hasattrr   r5   integerr   getint64uint16uint32astype
setdefault	as_tensoruint64r(   iinfomaxtensorfloatingfloat32numberr'   tuplearraykind
_tensorize
ValueError	TypeErrorintfloat)
r   valuer.   arrr1   r3   r4   kwargstarget_dtypedefault_dtypes
             r   re   TorchFormatter._tensorizeF   sd    ec5$t*566L ebllBJJ788R]]5;;XZXdXd=e=e<<>! ES[[$8%11jj'88q=aBJJ./Ckk#r1-yy--..s3Cyy**((*C'',, ''MS[[,H2%--&&<3;;+FF%!=>> 5+&&z%/N/NOO%E eRZZ((}}U[["**5511668%zz'5;;? ;;299bii"88!LL2E#u{{2${{44$)JJLE$//66${{44$)JJLE))'<@$u???[[BII-vverxx'9'='==>> %RXX 6'5;;6#(;;#8#8(-

#(#3#3E#::#(;;#8#8(-

"--g|D#(??5#CF#CC ))'<@$||E<V<< {{bhh.<5;;3N${{44$)JJLE$//66  %{{44$)JJLE))'<@$u??? u{{BKK8811668%zz'5==A;;"**,1N ;;00 %

 ++E22 !;;00 %

%%g|< ??5;F;; {{,,!JJLE''.. ryy))--224F}}U[["**55!!'5;;7u777u{{BKK88!!'5==9u777u777 e}--hhuo99>>U*??3// + ec5\**%%%!(%++ 6!(%-- 8||ES%R%R9Q9Q%RSS 	* s   A f f.-f.c                    [        US5      (       a/  [        U[        R                  5      (       d  UR	                  5       n[        U[
        R                  5      (       aF  UR                  [        :X  a1  U Vs/ s H  o R                  U5      PM     nnU R                  U5      $ O[        U[        [        45      (       a1  U Vs/ s H  o R                  U5      PM     nnU R                  U5      $ [        U[        5      (       a4  UR                  5        VVs0 s H  u  pEX@R                  U5      _M     snn$ U R                  U5      $ s  snf s  snf s  snnf )z8Optimized recursive walker with reduced Python overhead.r5   )rR   r   r   r   r5   r:   r<   r    object_recursive_tensorizer+   r'   rb   dictitemsre   )r   data_structitemresultkeyrj   s         r   rr   #TorchFormatter._recursive_tensorize   s$    ;,,ZU\\5Z5Z%//1K k2::..  F*FQRkd33D9kR((00 +
 dE]33BMN+$//5+FN$$V,,T**LWL]L]L_`L_jcC22599L_`` {++ S O as   8EEEru   c                 $    U R                  U5      $ )z+Public interface maintaining compatibility.)rr   )r   ru   s     r   recursive_tensorize"TorchFormatter.recursive_tensorize   s    ((55r   pa_tablereturnc                     U R                  5       R                  U5      nU R                  R                  U5      nU R	                  U5      $ r   )numpy_arrow_extractorextract_rowpython_features_decoder
decode_rowr{   )r   r}   rows      r   
format_rowTorchFormatter.format_row   sB    ((*66x@**55c:'',,r   torch.Tensorc                     U R                  5       R                  U5      nU R                  R                  X!R                  S   5      nU R                  U5      nU R                  U5      nU$ )Nr   )r   extract_columnr   decode_columncolumn_namesr{   r+   )r   r}   r*   s      r   format_columnTorchFormatter.format_column   sb    ++-<<XF--;;FDYDYZ[D\]))&1""6*r   c                     U R                  5       R                  U5      nU R                  R                  U5      nU R	                  U5      nU H  nU R                  X#   5      X#'   M     U$ r   )r   extract_batchr   decode_batchr{   r+   )r   r}   batchcolumn_names       r   format_batchTorchFormatter.format_batch  sf    **,::8D,,99%@((/ K!%!2!253E!FE !r   )r   )NN)__name__
__module____qualname____firstlineno__r   r+   re   rr   rs   r{   paTabler   r   r   r   __static_attributes____classcell__)r   s   @r   r
   r
   (   sq    G,XTt,.6t 6-288 - -
bhh > RXX '  r   r
   r   )r@   collections.abcr   typingr   numpyr:   pyarrowr    r   
formattingr   r   r   r   r
    r   r   <module>r      sh      #      ' e_Wng%EF e  Es   A	 	
AA