
    h                         S SK r S SKJr  S SKrS SKrSSKJr  SSK	J
r
  SSKJr   " S S	\\\R                  \4   5      rg)
    N)Mapping   )config)
map_nested   )TensorFormatterc                      ^  \ rS rSrSU 4S jjrS rS rS rS\4S jr	S\
R                  S	\4S
 jrS\
R                  S	\R                  4S jrS\
R                  S	\4S jrSrU =r$ )NumpyFormatter   c                 ,   > [         TU ]  XS9  X0l        g )N)featurestoken_per_repo_id)super__init__np_array_kwargs)selfr   r   r   	__class__s       Z/home/james-whalen/.local/lib/python3.13/site-packages/datasets/formatting/np_formatter.pyr   NumpyFormatter.__init__   s    (P.    c                    ^ [        T[        5      (       a`  T(       a0  [        U4S jT 5       5      (       a  [        R                  " T5      $ [        R
                  " [        T5      [        S9nTUS S & U$ T$ )Nc              3      >#    U  He  n[        U[        R                  5      =(       a?    UR                  TS    R                  :H  =(       a    UR                  TS    R                  :H  v   Mg     g7f)r   N)
isinstancenpndarrayshapedtype).0xcolumns     r   	<genexpr>.NumpyFormatter._consolidate.<locals>.<genexpr>!   sU      qwlm
1bjj)gagg.HgQWWX^_`XaXgXgMggqws   A-A0)r   )r   listallr   stackemptylenobject)r   r    outs    ` r   _consolidateNumpyFormatter._consolidate   sf    fd### qw   xx''
 hhs6{&9A
r   c                 0   [        U[        [        [        S 5      45      (       a  U$ [        U[        R
                  [        R                  45      (       a6  [        R                  " UR                  [        R
                  5      (       a  U$ [        U[        R                  5      (       a  U$ 0 n[        U[        R                  5      (       aG  [        R                  " UR                  [        R                  5      (       a  S[        R                  0nOe[        U[        R                  5      (       aF  [        R                  " UR                  [        R                  5      (       a  S[        R                  0n[        R                  (       a]  S[         R"                  ;   aI  SS Kn[        XR&                  R&                  5      (       a!  [        R(                  " U40 U R*                  D6$ [        R,                  (       a,  S[         R"                  ;   a  SSKJn  [        X5      (       a  U$ [        R2                  (       a0  S[         R"                  ;   a  SSKJnJn  [        XU45      (       a  U$ [        R(                  " U40 0 UEU R*                  ED6$ )Nr   PILr   torchvision)VideoReader
torchcodec)AudioDecoderVideoDecoder)r   strbytestyper   	characterr   
issubdtyper   numberintegerint64floatingfloat32r   PIL_AVAILABLEsysmodules	PIL.ImageImageasarrayr   TORCHVISION_AVAILABLEtorchvision.ior/   TORCHCODEC_AVAILABLEtorchcodec.decodersr1   r2   )r   valuedefault_dtyper-   r/   r1   r2   s          r   
_tensorizeNumpyFormatter._tensorize.   s   ec5$t*566Lbjj9::r}}U[[Z\ZfZf?g?gLryy))LeRZZ((R]]5;;

-S-S$bhh/Mrzz**r}}U[["++/V/V$bjj1MES[[$8%11zz%@4+?+?@@''MS[[,H2%--&&<3;;+FF%!=>>zz%M#Lm#Lt7K7K#LMMr   c                 \   [         R                  (       ap  S[        R                  ;   a\  SS Kn[        XR                  5      (       a>  U R                  UR                  5       R                  5       R                  5       S   5      $ [        US5      (       aN  [        U[        R                  [        R                  [        R                  45      (       d  UR!                  5       n[        U[        R                  5      (       aD  UR"                  [$        :X  a0  U R'                  U Vs/ s H  o0R)                  U5      PM     sn5      $ [        U[*        [,        45      (       a0  U R'                  U Vs/ s H  o0R)                  U5      PM     sn5      $ U R                  U5      $ s  snf s  snf )Ntorchr    	__array__)r   TORCH_AVAILABLEr>   r?   rL   r   TensorrI   detachcpunumpyhasattrr   r   r6   r8   rN   r   r(   r*   recursive_tensorizer#   tuple)r   data_structrL   	substructs       r   _recursive_tensorize#NumpyFormatter._recursive_tensorizeO   s8   !!g&<+||44{'9'9';'?'?'A'G'G'I"'MNN;,,ZbjjZ\ZfZfhjhqhqMr5s5s%//1Kk2::..  F*((_j)k_jR[*B*B9*M_j)kllkD%=11$$[f%g[fi&>&>y&I[f%ghh{++ *l%gs   'F$2F)rW   c                 ,    [        U R                  USS9$ )NF)map_list)r   rY   )r   rW   s     r   rU   "NumpyFormatter.recursive_tensorize`   s    $33[5QQr   pa_tablereturnc                     U R                  5       R                  U5      nU R                  R                  U5      nU R	                  U5      $ N)numpy_arrow_extractorextract_rowpython_features_decoder
decode_rowrU   )r   r^   rows      r   
format_rowNumpyFormatter.format_rowc   sB    ((*66x@**55c:'',,r   c                     U R                  5       R                  U5      nU R                  R                  X!R                  S   5      nU R                  U5      nU R                  U5      nU$ )Nr   )rb   extract_columnrd   decode_columncolumn_namesrU   r*   )r   r^   r    s      r   format_columnNumpyFormatter.format_columnh   sb    ++-<<XF--;;FDYDYZ[D\]))&1""6*r   c                     U R                  5       R                  U5      nU R                  R                  U5      nU R	                  U5      nU H  nU R                  X#   5      X#'   M     U$ ra   )rb   extract_batchrd   decode_batchrU   r*   )r   r^   batchcolumn_names       r   format_batchNumpyFormatter.format_batcho   sf    **,::8D,,99%@((/ K!%!2!253E!FE !r   )r   )NN)__name__
__module____qualname____firstlineno__r   r*   rI   rY   dictrU   paTabler   rg   r   r   rm   rt   __static_attributes____classcell__)r   s   @r   r
   r
      su    /NB,"Rt R-288 - -
bhh 2:: RXX '  r   r
   )r>   collections.abcr   rS   r   pyarrowr{    r   utils.py_utilsr   
formattingr   r   r
   rM   r   r   <module>r      s:     #    ' '[_Wbjj'%AB [r   