
    hh              
          S SK r S SKrS SKJrJrJr  S SKJr  S SKJ	r	J
r
JrJrJrJr  S SKrS SKrS SKrSSKJr  SSKJrJrJrJr  SSKJr  SS	KJr  \" S
5      r \" S5      r!\" S5      r"\" S5      r#S\$S\%4S jr&S\	4S jr'S\S\\(\)\$\*\4   S\S\R:                  4S jr+S\S\\(\)\$\*\4   S\R:                  4S jr,S\RZ                  S\%4S jr. " S S\\!\"\#4   5      r/S\0\*\1\    4   S\0\*\ 4   4S jr2 " S S\/\R:                  \RZ                  \R:                  4   5      r3 " S S\/\0\1\04   5      r4 " S  S!\/\0\Rj                  \04   5      r6 " S" S#\/\Rn                  \Rp                  \Rn                  4   5      r9 " S$ S%5      r: " S& S'5      r; " S( S)\5      r< " S* S+\<5      r= " S, S-\<5      r> " S. S/\\!\"\#4   5      r? " S0 S1\?\!\"\#4   5      r@ " S2 S3\?\!\"\#4   5      rA " S4 S5\A\R:                  \RZ                  \R:                  4   5      rB " S6 S7\?\\1\4   5      rC " S8 S9\A\Rn                  \Rp                  \Rn                  4   5      rD " S: S;\?\0\"\04   5      rES\*S<\1\*   SS4S= jrFS\\(\)\$\4   S>\(SS4S? jrGS\\(\)\$\*\4   S\*4S@ jrH SES\S\\(\)\$\*\4   S\\   S\R:                  4SA jjrI  SFS\S\\(\)\$\*\4   SB\?SC\\1   4SD jjrJg)G    N)IterableMappingMutableMapping)partial)AnyCallableGenericOptionalTypeVarUnion   )Features)_ArrayXDExtensionType_is_zero_copy_onlydecode_nested_examplepandas_types_mapper)Table)no_op_if_value_is_nullT	RowFormatColumnFormatBatchFormatkeyreturnc                 `    U R                   S:H  =(       a    U R                  U R                  :  $ )N   )stepstopstartr   s    X/home/james-whalen/.local/lib/python3.13/site-packages/datasets/formatting/formatting.py_is_range_contiguousr"   (   s"    88q=2SXX22    c                 8    [        SU  S[        U 5       S35      e)NzWrong key type: 'z' of type 'z6'. Expected one of int, slice, range, str or Iterable.)	TypeErrortyper    s    r!   _raise_bad_key_typer'   ,   s&    

C5DI;6lm r#   tableindicesc           
         [        U[        5      (       aI  UR                  XR                  -  S5      R	                  S5      S   R                  5       n[        X5      $ [        U[        5      (       a!  [        UR                  UR                  5      6 n[        U[        5      (       a  [        U5      (       ay  UR                  S:  ai  [        XR                  UR                  UR                  UR                  -
  5      R	                  S5       Vs/ s H  o3R                  5       PM     sn5      $  [        U[        5      (       a:  U R                  U/5      n [        XR	                  S5      R                  5       5      $ [        U[         5      (       aK  [        X Vs/ s H4  o2R                  US5      R	                  S5      S   R                  5       PM6     sn5      $ [#        U5        gs  snf s  snf )a1  
Query a pyarrow Table to extract the subtable that correspond to the given key.
The :obj:`indices` parameter corresponds to the indices mapping in case we cant to take into
account a shuffling or an indices selection for example.
The indices table must contain one column named "indices" of type uint64.
r   r   N)
isinstanceint
fast_slicenum_rowscolumnas_py_query_tablesliceranger)   r"   r   r   strselect	to_pylistr   r'   )r(   r   r)   is       r!   !_query_table_with_indices_mappingr8   2   s    #s  '7'7!7;BB1EaHNNPE''#uS[[!1!123#u$$a+=+=ciiTWT]T]I]+^+e+efg+hi+ha	+hi  #scU#E>>!#4#>#>#@AA#x  E[^#_[^VW$6$6q!$<$C$CA$Fq$I$O$O$Q[^#_`` j $`s   G/
!;G4
c                 p   [        U[        5      (       a  U R                  XR                  -  S5      $ [        U[        5      (       a!  [        UR                  U R                  5      6 n[        U[
        5      (       aT  [        U5      (       aC  UR                  S:  a3  U R                  UR                  UR                  UR                  -
  5      $  [        U[        5      (       a=  U R                  R                  U R                   Vs/ s H  o"U:w  d  M
  UPM     sn5      $ [        U[        5      (       am  [        R                   " U[        R"                  5      n[%        U5      S:X  a  U R                  R	                  SS5      $ U R'                  XR                  -  5      $ [)        U5        gs  snf )zQ
Query a pyarrow Table to extract the subtable that correspond to the given key.
r   r   N)r+   r,   r-   r.   r2   r3   r)   r"   r   r   r4   r(   dropcolumn_namesr   npfromiterint64lenfast_gatherr'   )r(   r   r/   s      r!   r1   r1   P   s?    #snn 4a88#uS[[01#u$$a##CIIsxx#))/CDD#s{{e6H6H Z6HFVYM6H Z[[#x  kk#rxx(s8q=;;$$Q**  ~~!566 ![s   	F3F3pa_arrayc                      U R                   S:  $ Nr   )
null_count)rA   s    r!   _is_array_with_nullsrE   i   s    ""r#   c                       \ rS rSrSrS\R                  S\4S jrS\R                  S\	4S jr
S\R                  S\4S jrSrg	)
BaseArrowExtractorm   z
Arrow extractor are used to extract data from pyarrow tables.
It makes it possible to extract rows, columns and batches.
These three extractions types have to be implemented.
pa_tabler   c                     [         eNNotImplementedErrorselfrI   s     r!   extract_rowBaseArrowExtractor.extract_rowt       !!r#   c                     [         erK   rL   rN   s     r!   extract_column!BaseArrowExtractor.extract_columnw   rR   r#   c                     [         erK   rL   rN   s     r!   extract_batch BaseArrowExtractor.extract_batchz   rR   r#    N)__name__
__module____qualname____firstlineno____doc__par   r   rP   r   rT   r   rW   __static_attributes__rY   r#   r!   rG   rG   m   sL    "BHH " ""rxx "L ""bhh "; "r#   rG   py_dictc                 ^    U R                  5        VVs0 s H
  u  pXS   _M     snn$ s  snnf )z:Return the first element of a batch (dict) as a row (dict)r   )items)ra   r   arrays      r!   _unnestre   ~   s(    ,3MMO<OjcCqMO<<<s   )c                       \ rS rSrS\R
                  S\R
                  4S jrS\R
                  S\R                  4S jrS\R
                  S\R
                  4S jr	Sr
g)	SimpleArrowExtractor   rI   r   c                     U$ rK   rY   rN   s     r!   rP    SimpleArrowExtractor.extract_row       r#   c                 $    UR                  S5      $ rC   )r/   rN   s     r!   rT   #SimpleArrowExtractor.extract_column   s    q!!r#   c                     U$ rK   rY   rN   s     r!   rW   "SimpleArrowExtractor.extract_batch   rk   r#   rY   N)rZ   r[   r\   r]   r_   r   rP   ArrayrT   rW   r`   rY   r#   r!   rg   rg      sS    BHH  "rxx "BHH "bhh 288 r#   rg   c                       \ rS rSrS\R
                  S\4S jrS\R
                  S\4S jr	S\R
                  S\4S jr
Srg)	PythonArrowExtractor   rI   r   c                 4    [        UR                  5       5      $ rK   )re   	to_pydictrN   s     r!   rP    PythonArrowExtractor.extract_row   s    x))+,,r#   c                 @    UR                  S5      R                  5       $ rC   )r/   r6   rN   s     r!   rT   #PythonArrowExtractor.extract_column   s    q!++--r#   c                 "    UR                  5       $ rK   )ru   rN   s     r!   rW   "PythonArrowExtractor.extract_batch   s    !!##r#   rY   N)rZ   r[   r\   r]   r_   r   dictrP   listrT   rW   r`   rY   r#   r!   rr   rr      sG    -BHH - -.rxx .D .$bhh $4 $r#   rr   c                       \ rS rSrS rS\R                  S\4S jrS\R                  S\	R                  4S jrS\R                  S\4S jrS\R                  S\	R                  4S	 jrS
rg)NumpyArrowExtractor   c                     Xl         g rK   np_array_kwargs)rO   r   s     r!   __init__NumpyArrowExtractor.__init__   s    .r#   rI   r   c                 6    [        U R                  U5      5      $ rK   )re   rW   rN   s     r!   rP   NumpyArrowExtractor.extract_row   s    t))(344r#   c                 B    U R                  XR                  S      5      $ rC   )_arrow_array_to_numpyr;   rN   s     r!   rT   "NumpyArrowExtractor.extract_column   s!    ))(3H3H3K*LMMr#   c                 h    UR                    Vs0 s H  o"U R                  X   5      _M     sn$ s  snf rK   )r;   r   )rO   rI   cols      r!   rW   !NumpyArrowExtractor.extract_batch   s2    JRJ_J_`J_3T//>>J_```s   /rA   c                   ^ [        U[        R                  5      (       a  [        UR                  [        5      (       aT  [        UR                  R                  SS9nUR                   VVs/ s H  o3R                  US9  H  oDPM     M     snnmGO[        UR                  5      =(       a    [        S UR                   5       5      nUR                   VVs/ s H  o3R                  US9  H  oDPM     M     snnmO[        UR                  [        5      (       a.  [        UR                  R                  SS9nUR                  US9mOI[        UR                  5      =(       a    [        U5      (       + nUR                  US9R                  5       m[        T5      S:  a~  [        U4S jT 5       5      (       ad  [        R                  R!                  [        R"                  5      S:  a  [        R$                  " T[&        S9$ [        R(                  " TS	[&        S
9$ [        R                  R!                  [        R"                  5      S:  a  [        R$                  " T5      $ [        R(                  " TS	S9$ s  snnf s  snnf )NT)unnest)zero_copy_onlyc              3   B   #    U  H  n[        U5      (       + v   M     g 7frK   )rE   ).0chunks     r!   	<genexpr><NumpyArrowExtractor._arrow_array_to_numpy.<locals>.<genexpr>   s       KAP,U333s   r   c              3   :  >#    U  H  n[        U[        R                  5      =(       a6    UR                  [        :H  =(       d    UR
                  TS    R
                  :g  =(       d-    [        U[        5      =(       a    [        R                  " U5      v   M     g7f)r   N)r+   r<   ndarraydtypeobjectshapefloatisnan)r   xrd   s     r!   r   r      so       A Arzz*_60A0^QWWPUVWPXP^P^E^ :q%(8RXXa[:s   BBz2.0.0b1)r   F)copyr   r   )r+   r_   ChunkedArrayr&   r   r   storage_dtypechunksto_numpyallrE   tolistr?   anyr<   libNumpyVersion__version__asarrayr   rd   )rO   rA   r   r   rowrd   s        @r!   r   )NumpyArrowExtractor._arrow_array_to_numpy   s   h00(--)>??!3HMM4O4OX\!]%-__%4E^l@mC@mC_ "4HMM!B "s KAIK H &.__%4E^l@mC@mC_ (--)>??!3HMM4O4OX\!]&//~/N!3HMM!B!iK_`hKiGi&//~/NUUWu:>    
 66&&r~~6)C::e6::xxE@@66r~~.);::e$$88E..?s   -!J !Jr   N)rZ   r[   r\   r]   r   r_   r   r{   rP   r<   r   rT   rW   rp   r   r`   rY   r#   r!   r~   r~      sq    /5BHH 5 5Nrxx NBJJ Nabhh a4 a$/bhh $/2:: $/r#   r~   c                       \ rS rSrS\R
                  S\R                  4S jrS\R
                  S\R                  4S jr
S\R
                  S\R                  4S jrSrg)	PandasArrowExtractor   rI   r   c                 B    UR                  SS9R                  [        S9$ )Nr   )lengthtypes_mapper)r2   	to_pandasr   rN   s     r!   rP    PandasArrowExtractor.extract_row   s"    ~~Q~'11?R1SSr#   c                 h    UR                  S/5      R                  [        S9UR                  S      $ )Nr   r   )r5   r   r   r;   rN   s     r!   rT   #PandasArrowExtractor.extract_column   s4    s#--;N-OPXPePefgPhiir#   c                 (    UR                  [        S9$ )Nr   )r   r   rN   s     r!   rW   "PandasArrowExtractor.extract_batch   s    !!/B!CCr#   rY   N)rZ   r[   r\   r]   r_   r   pd	DataFramerP   SeriesrT   rW   r`   rY   r#   r!   r   r      s\    TBHH T Tjrxx jBII jDbhh D2<< Dr#   r   c                       \ rS rSr SS\\   S\\\\\\	S4   4      4S jjr
S\S\4S jrS	\S
\S\4S jrS\S\4S jrSrg)PythonFeaturesDecoder   Nfeaturestoken_per_repo_idc                     Xl         X l        g rK   r   r   rO   r   r   s      r!   r   PythonFeaturesDecoder.__init__   s     !!2r#   r   r   c                 n    U R                   (       a#  U R                   R                  XR                  S9$ U$ N)r   )r   decode_exampler   )rO   r   s     r!   
decode_row PythonFeaturesDecoder.decode_row   s.    ^b^k^kt}}++CCYCY+Ztqttr#   r/   column_namec                 p    U R                   (       a$  U R                   R                  XU R                  S9$ U$ r   )r   decode_columnr   )rO   r/   r   s      r!   r   #PythonFeaturesDecoder.decode_column   s9     }} MM''tOeOe'f	
 	
r#   batchc                 n    U R                   (       a#  U R                   R                  XR                  S9$ U$ r   )r   decode_batchr   rO   r   s     r!   r   "PythonFeaturesDecoder.decode_batch   s.    ^b^k^kt}}))%CYCY)Zvqvvr#   r   rK   )rZ   r[   r\   r]   r
   r   r{   r4   r   boolr   r   r|   r   r   r`   rY   r#   r!   r   r      s    mq3 *3?GSRWX[]acgXgRhMhHi?j3ud ut u
D 
s 
t 
w$ w4 wr#   r   c                       \ rS rSrS\\   4S jrS\R                  S\R                  4S jr	S\R                  S\S\R                  4S	 jrS
\R                  S\R                  4S jrSrg)PandasFeaturesDecoder   r   c                     Xl         g rK   r   )rO   r   s     r!   r   PandasFeaturesDecoder.__init__   s     r#   r   r   c                 j   U R                   (       ag  U R                   R                  5        VVs0 s H?  u  p#U R                   R                  U   (       d  M%  U[        [	        [
        U5      5      _MA     snnO0 nU(       a+  UR                  U5      U[        UR                  5       5      '   U$ s  snnf rK   )	r   rc   _column_requires_decodingr   r   r   	transformr|   keys)rO   r   r   featuredecodes        r!   r    PandasFeaturesDecoder.decode_row   s     }} -1MM,?,?,A,A(K==::;G ]3G<QSZ4[\\,A  	 '*}}V'<CV[[]#$
s   $B/B/r/   r   c                    U R                   (       aS  X R                   ;   aD  U R                   R                  U   (       a&  [        [        [        U R                   U   5      5      OS nU(       a  UR                  U5      nU$ rK   )r   r   r   r   r   r   )rO   r/   r   r   s       r!   r   #PandasFeaturesDecoder.decode_column   sd     }}!=$--BiBijuBv #7+@$--P[B\#]^ 	
 %%f-Fr#   r   c                 $    U R                  U5      $ rK   )r   r   s     r!   r   "PandasFeaturesDecoder.decode_batch	  s    u%%r#   r   N)rZ   r[   r\   r]   r
   r   r   r   r   r   r   r4   r   r   r`   rY   r#   r!   r   r      sk    !(!3 !bll r|| BII C BII &",, &2<< &r#   r   c                       \ rS rSrSrS\R                  SS4S jrS rS r	S	 r
SS jrS rS rS rS rS rS rS rS r\SS j5       rS rS rSrg
)LazyDicti  zeA dictionary backed by Arrow data. The values are formatted on-the-fly when accessing the dictionary.rI   	formatter	Formatterc                     Xl         X l        [        R                  UR                  5      U l        [        U R
                  R                  5       5      U l        g rK   )	rI   r   r{   fromkeysr;   datasetr   keys_to_format)rO   rI   r   s      r!   r   LazyDict.__init__  s:     "MM("7"78	!$)).."23r#   c                 ,    [        U R                  5      $ rK   )r?   r   rO   s    r!   __len__LazyDict.__len__  s    499~r#   c                     U R                   U   nXR                  ;   a:  U R                  U5      nX R                   U'   U R                  R                  U5        U$ rK   )r   r   formatremoverO   r   values      r!   __getitem__LazyDict.__getitem__  sM    		#%%%KK$E"IIcN&&s+r#   c                 t    XR                   ;   a  U R                   R                  U5        X R                  U'   g rK   r   r   r   r   s      r!   __setitem__LazyDict.__setitem__"  s-    %%%&&s+		#r#   Nc                 r    XR                   ;   a  U R                   R                  U5        U R                  U	 g rK   r   rO   r   s     r!   __delitem__LazyDict.__delitem__'  s-    %%%&&s+IIcNr#   c                 ,    [        U R                  5      $ rK   )iterr   r   s    r!   __iter__LazyDict.__iter__,  s    DIIr#   c                     XR                   ;   $ rK   )r   r   s     r!   __contains__LazyDict.__contains__/  s    iir#   c                 L    U R                  5         [        U R                  5      $ rK   )_format_allreprr   r   s    r!   __repr__LazyDict.__repr__2  s    DIIr#   c                    [        U[        5      (       a}  U R                  5       nUR                  5       nUR                  5         U=R                  UR
                  R                  5       -  sl        UR
                  UR
                  -  Ul        U$ [        U[        5      (       aI  U R                  5       nU=R                  UR                  5       -  sl        UR
                  U-  Ul        U$ [        $ rK   	r+   r   r   r
  r   r   r   r{   NotImplementedrO   otherinsts      r!   __or__LazyDict.__or__6  s    eX&&99;DJJLE5::??#44		EJJ.DIKeT""99;D5::</		E)DIKr#   c                    [        U[        5      (       a}  U R                  5       nUR                  5       nUR                  5         U=R                  UR
                  R                  5       -  sl        UR
                  UR
                  -  Ul        U$ [        U[        5      (       aH  U R                  5       nU=R                  UR                  5       -  sl        XR
                  -  Ul        U$ [        $ rK   r  r  s      r!   __ror__LazyDict.__ror__E  s    eX&&99;DJJLE5::??#44

TYY.DIKeT""99;D5::</		)DIKr#   c                 |   [        U[        5      (       an  UR                  5       nUR                  5         U =R                  UR
                  R                  5       -  sl        U =R
                  UR
                  -  sl        U $ U =R                  UR                  5       -  sl        U =R
                  U-  sl        U $ rK   )r+   r   r   r
  r   r   r   )rO   r  s     r!   __ior__LazyDict.__ior__T  s    eX&&JJLE5::??#44II#I  5::</IIIr#   c                 B   U R                   R                  U R                   5      nUR                  R                  U R                  5        U R                  S   R	                  5       UR                  S'   U R                  S   R	                  5       UR                  S'   U$ )Nr   r   )	__class____new____dict__updater   )rO   r  s     r!   __copy__LazyDict.__copy___  sw    ~~%%dnn5T]]+ $f 5 : : <f*.--8H*I*N*N*P&'r#   c                 ,    SS K nUR                  U 5      $ rC   r   )rO   r   s     r!   r   LazyDict.copyh  s    yyr#   c                     [         erK   rL   )clsiterabler   s      r!   r   LazyDict.fromkeysm  s    !!r#   c                     [         erK   rL   r   s     r!   r   LazyDict.formatq  rR   r#   c                     U R                    H!  nU R                  U5      U R                  U'   M#     U R                   R                  5         g rK   )r   r   r   clearr   s     r!   r
  LazyDict._format_allt  s:    &&C![[-DIIcN '!!#r#   )r   r   r   rI   )r   NrK   )rZ   r[   r\   r]   r^   r_   r   r   r   r   r   r   r  r  r  r  r  r  r!  r   classmethodr   r   r
  r`   rY   r#   r!   r   r     sv    o4 4k 4

 	
 " ""$r#   r   c                       \ rS rSrS rSrg)LazyRowiz  c                 r    U R                   R                  U R                  R                  U/5      5      S   $ rC   r   format_columnrI   r5   r   s     r!   r   LazyRow.format{  s-    ~~++DMM,@,@#,GHKKr#   rY   NrZ   r[   r\   r]   r   r`   rY   r#   r!   r0  r0  z  s    Lr#   r0  c                       \ rS rSrS rSrg)	LazyBatchi  c                 l    U R                   R                  U R                  R                  U/5      5      $ rK   r2  r   s     r!   r   LazyBatch.format  s(    ~~++DMM,@,@#,GHHr#   rY   Nr5  rY   r#   r!   r7  r7    s    Ir#   r7  c                      \ rS rSrSr\r\r\	r
\r  SS\\   S\\\\\\S4   4      4S jjrS\R*                  S\S	\\\\4   4S
 jrS\R*                  S	\4S jrS\R*                  S	\4S jrS\R*                  S	\4S jrSrg)r   i  z
A formatter is an object that extracts and formats data from pyarrow tables.
It defines the formatting for rows, columns and batches.
Nr   r   c                     Xl         X l        [        U R                   U R                  5      U l        [	        U R                   5      U l        g rK   )r   r   r   python_features_decoderr   pandas_features_decoderr   s      r!   r   Formatter.__init__  s9    
 !!2'<T]]DLbLb'c$'<T]]'K$r#   rI   
query_typer   c                     US:X  a  U R                  U5      $ US:X  a  U R                  U5      $ US:X  a  U R                  U5      $ g Nr   r/   r   )
format_rowr3  format_batch)rO   rI   r?  s      r!   __call__Formatter.__call__  sP    ??8,,8#%%h//7"$$X.. #r#   c                     [         erK   rL   rN   s     r!   rB  Formatter.format_row  rR   r#   c                     [         erK   rL   rN   s     r!   r3  Formatter.format_column  rR   r#   c                     [         erK   rL   rN   s     r!   rC  Formatter.format_batch  rR   r#   )r   r=  r<  r   NN)rZ   r[   r\   r]   r^   rg   simple_arrow_extractorrr   python_arrow_extractorr~   numpy_arrow_extractorr   pandas_arrow_extractorr
   r   r{   r4   r   r   r   r_   r   r   r   r   rD  rB  r3  rC  r`   rY   r#   r!   r   r     s    
 21/1 (,IML8$L $DeCtO.D)D$EFL/ /s /uYP\^iEi?j /"288 "	 ""bhh "< ""RXX "+ "r#   r   c                   "    \ rS rSrS\4S jrSrg)TensorFormatteri  data_structc                     [         erK   rL   )rO   rS  s     r!   recursive_tensorize#TensorFormatter.recursive_tensorize  rR   r#   rY   N)rZ   r[   r\   r]   r{   rU  r`   rY   r#   r!   rR  rR    s    "t "r#   rR  c                   *    \ rS rSr% \\S'   \\S'   Srg)TableFormatteri  
table_typecolumn_typerY   N)rZ   r[   r\   r]   r4   __annotations__r`   rY   r#   r!   rX  rX    s    Or#   rX  c                       \ rS rSrSrSrS\R                  S\R                  4S jrS\R                  S\R                  4S jr
S\R                  S\R                  4S jrS	rg
)ArrowFormatteri  zarrow tablezarrow arrayrI   r   c                 @    U R                  5       R                  U5      $ rK   )rM  rP   rN   s     r!   rB  ArrowFormatter.format_row  s    **,88BBr#   c                 @    U R                  5       R                  U5      $ rK   )rM  rT   rN   s     r!   r3  ArrowFormatter.format_column  s    **,;;HEEr#   c                 @    U R                  5       R                  U5      $ rK   )rM  rW   rN   s     r!   rC  ArrowFormatter.format_batch  s    **,::8DDr#   rY   N)rZ   r[   r\   r]   rY  rZ  r_   r   rB  rp   r3  rC  r`   rY   r#   r!   r]  r]    sf    JKC288 C CFbhh F288 FERXX E"(( Er#   r]  c                      ^  \ rS rSrS	U 4S jjrS\R                  S\4S jrS\R                  S\	4S jr
S\R                  S\4S jrSrU =r$ )
PythonFormatteri  c                 0   > [         TU ]  X5        X l        g rK   )superr   lazy)rO   r   rh  r   r  s       r!   r   PythonFormatter.__init__  s    5	r#   rI   r   c                     U R                   (       a  [        X5      $ U R                  5       R                  U5      nU R                  R                  U5      nU$ rK   )rh  r0  rN  rP   r<  r   rO   rI   r   s      r!   rB  PythonFormatter.format_row  sH    998**))+77A**55c:
r#   c                     U R                  5       R                  U5      nU R                  R                  X!R                  S   5      nU$ rC   )rN  rT   r<  r   r;   rO   rI   r/   s      r!   r3  PythonFormatter.format_column  B    ,,.==hG--;;FDYDYZ[D\]r#   c                     U R                   (       a  [        X5      $ U R                  5       R                  U5      nU R                  R                  U5      nU$ rK   )rh  r7  rN  rW   r<  r   rO   rI   r   s      r!   rC  PythonFormatter.format_batch  sH    99X,,++-;;HE,,99%@r#   )rh  )NFN)rZ   r[   r\   r]   r   r_   r   r   rB  r|   r3  rC  r`   __classcell__r  s   @r!   re  re    sR    288  bhh 4 
RXX '  r#   re  c                       \ rS rSrSrSrS\R                  S\R                  4S jr
S\R                  S\R                  4S jrS\R                  S\R                  4S jrS	rg
)PandasFormatteri  zpandas dataframezpandas seriesrI   r   c                 z    U R                  5       R                  U5      nU R                  R                  U5      nU$ rK   )rP  rP   r=  r   rk  s      r!   rB  PandasFormatter.format_row  s6    ))+77A**55c:
r#   c                     U R                  5       R                  U5      nU R                  R                  X!R                  S   5      nU$ rC   )rP  rT   r=  r   r;   rn  s      r!   r3  PandasFormatter.format_column  rp  r#   c                 z    U R                  5       R                  U5      nU R                  R                  U5      nU$ rK   )rP  rW   r=  r   rk  s      r!   rC  PandasFormatter.format_batch  s6    ))+99(C**77<
r#   rY   N)rZ   r[   r\   r]   rY  rZ  r_   r   r   r   rB  r   r3  rC  r`   rY   r#   r!   rw  rw    s]    #J!K288  
bhh 299 
RXX ",, r#   rw  c                      ^  \ rS rSrSrSS\\/\4   4U 4S jjjrS\R                  S\4S jr
S\R                  S\4S jrS\R                  S\4S	 jrS
rU =r$ )CustomFormatteri  a  
A user-defined custom formatter function defined by a ``transform``.
The transform must take as input a batch of data extracted for an arrow table using the python extractor,
and return a batch.
If the output batch is not a dict, then output_all_columns won't work.
If the output batch has several fields, then querying a single column won't work since we don't know which field
to return.
r   c                 ,   > [         TU ]  X#S9  Xl        g )Nr   )rg  r   r   )rO   r   r   r   kwargsr  s        r!   r   CustomFormatter.__init__  s    (P"r#   rI   r   c                 ~    U R                  U5      n [        U5      $ ! [         a  n[        SU 35      UeS nAff = f)Nz]Custom formatting function must return a dict of sequences to be able to pick a row, but got )rC  re   	Exceptionr%   rO   rI   formatted_batchexcs       r!   rB  CustomFormatter.format_row  sU    ++H5	?++ 	op  pA  B	s   
 
<7<c                 P   U R                  U5      n[        US5      (       aD  [        UR                  5       5      S:  a&  [	        S[        UR                  5       5       S35      eO[	        SU 35      e X!R                  S      $ ! [         a  n[	        SU 35      UeS nAff = f)Nr   r   zTried to query a column but the custom formatting function returns too many columns. Only one column was expected but got columns .zPCustom formatting function must return a dict to be able to pick a row, but got r   )rC  hasattrr?   r   r%   r|   r;   r  r  s       r!   r3  CustomFormatter.format_column  s    ++H5?F++?'')*Q.DDHI]I]I_D`Caabd  / bcrbst 	"#8#8#;<< 	bcrbst	s   6B 
B%B  B%c                     U R                  5       R                  U5      nU R                  R                  U5      nU R	                  U5      $ rK   )rN  rW   r<  r   r   rr  s      r!   rC  CustomFormatter.format_batch  s@    ++-;;HE,,99%@~~e$$r#   )r   rL  )rZ   r[   r\   r]   r^   r   r{   r   r_   r   rB  r   r3  rC  r`   rt  ru  s   @r!   r  r    sn    #(D64<"8 # #288  bhh < &%RXX %$ % %r#   r  columnsc                 0    X;  a  [        SU  SU 35      eg )NzColumn z5 not in the dataset. Current columns in the dataset: )KeyError)r   r  s     r!   _check_valid_column_keyr     s(    
%Z[bZcdee r#   sizec                    [        U [        5      (       a%  U S:  a  X-   S:  d  X:  a  [        SU  SU 35      eg [        U [        5      (       a  g [        U [        5      (       a7  [        U 5      S:  a'  [        [        U 5      US9  [        [        U 5      US9  g g [        U [        5      (       aI  [        U 5      S:  a9  [        [        [        U 5      5      US9  [        [        [        U 5      5      US9  g g [        U 5        g )Nr   zInvalid key: z is out of bounds for size )r  )r+   r,   
IndexErrorr2   r3   r?   _check_valid_index_keymaxminr   r'   )r   r  s     r!   r  r  %  s    #s!G
QCK}SE1LTFSTT	C			C		s8a<"3s8$7"3s8$7  
C	"	"s8a<"3s3x=t<"3s3x=t<  	C r#   c                     [        U [        R                  5      (       a  g[        U [        5      (       a  g[        U [        [
        [        45      (       a  g[        U 5        g rA  )r+   numbersIntegralr4   r2   r3   r   r'   r    s    r!   key_to_query_typer  8  sG    #w''((	C			C%1	2	2r#   c                    [        U[        [        [        [        [
        45      (       d   [        R                  " U5      n[        U[        5      (       a  [        XR                  5        O&Ub  UR                  OU R                  n[        X5        Uc  [        X5      nU$ [        XUS9nU$ ! [         a    [        U5         Nf = f)a  
Query a Table to extract the subtable that correspond to the given key.

Args:
    table (``datasets.table.Table``): The input Table to query from
    key (``Union[int, slice, range, str, Iterable]``): The key can be of different types:
        - an integer i: the subtable containing only the i-th row
        - a slice [i:j:k]: the subtable containing the rows that correspond to this slice
        - a range(i, j, k): the subtable containing the rows that correspond to this range
        - a string c: the subtable containing all the rows but only the column c
        - an iterable l: the subtable that is the concatenation of all the i-th rows for all i in the iterable
    indices (Optional ``datasets.table.Table``): If not None, it is used to re-map the given key to the table rows.
        The indices table must contain one column named "indices" of type uint64.
        This is used in case of shuffling or rows selection.


Returns:
    ``pyarrow.Table``: the result of the query on the input table
)r)   )r+   r,   r2   r3   r4   r   operatorindexr%   r'   r  r;   r.   r  r1   r8   )r(   r   r)   r  pa_subtables        r!   query_tabler  B  s    2 cCsH=>>	%..%C #s%7%78#*#6wENNs)"5.  8GT  	%$	%s   B/ /CCr   format_columnsc                   ^ [        U [        5      (       a  U R                  nOU n[        U5      n[	        UR
                  S9nTc  U" XVS9$ US:X  a  UT;   a  U" XV5      $ U" XVS9$ UR                  U4S jUR                   5       5      nU" XS9n	U(       aa  [        U	[        5      (       a>  UR                  U4S jUR                   5       5      n
U" XS9nU	R                  U5        U	$ [        SU	 35      eU	$ )a  
Format a Table depending on the key that was used and a Formatter object.

Args:
    table (``datasets.table.Table``): The input Table to format
    key (``Union[int, slice, range, str, Iterable]``): Depending on the key that was used, the formatter formats
        the table as either a row, a column or a batch.
    formatter (``datasets.formatting.formatting.Formatter``): Any subclass of a Formatter such as
        PythonFormatter, NumpyFormatter, etc.
    format_columns (:obj:`List[str]`, optional): if not None, it defines the columns that will be formatted using the
        given formatter. Other columns are discarded (unless ``output_all_columns`` is True)
    output_all_columns (:obj:`bool`, defaults to False). If True, the formatted output is completed using the columns
        that are not in the ``format_columns`` list. For these columns, the PythonFormatter is used.


Returns:
    A row, column or batch formatted object defined by the Formatter:
    - the PythonFormatter returns a dictionary for a row or a batch, and a list for a column.
    - the NumpyFormatter returns a dictionary for a row or a batch, and a np.array for a column.
    - the PandasFormatter returns a pd.DataFrame for a row or a batch, and a pd.Series for a column.
    - the TorchFormatter returns a dictionary for a row or a batch, and a torch.Tensor for a column.
    - the TFFormatter returns a dictionary for a row or a batch, and a tf.Tensor for a column.
r   )r?  r/   c              3   6   >#    U  H  oT;  d  M
  Uv   M     g 7frK   rY   r   r   r  s     r!   r   format_table.<locals>.<genexpr>  s     *m:O3^lSl33:O   		c              3   6   >#    U  H  oT;   d  M
  Uv   M     g 7frK   rY   r  s     r!   r   r    s      @#8C><QCC#8r  z\Custom formatting function must return a dict to work with output_all_columns=True, but got )r+   r   r(   r  re  r   r:   r;   r   r   r%   )r(   r   r   r  output_all_columnsrI   r?  python_formatterpa_table_to_formatformatted_outputpa_table_with_remaining_columnsremaining_columns_dicts      `        r!   format_tabler  m  s!   < %;;"3'J&	0B0BC99	x	. X22#HDD%]]*m(:O:O*mm$%7O*N;;2:-- @#+#8#8@ 3/ *::Y)q& ''(>?
    r  tD  sE  F   r#   rK   )NF)Kr  r  collections.abcr   r   r   	functoolsr   typingr   r   r	   r
   r   r   numpyr<   pandasr   pyarrowr_   r   r   features.featuresr   r   r   r   r(   r   utils.py_utilsr   r   r   r   r   r3   r   r"   r'   r,   r2   r4   r8   r1   rp   rE   rG   r{   r|   re   rg   rr   r   r~   r   r   r   r   r   r   r0  r7  r   rR  rX  r]  re  rw  r  r  r  r  r  r  rY   r#   r!   <module>r     s     = =  D C     u u  3 CLK 	~&m$3e 3 3S S%X=>INXX< E#ueS(*J$K PRPXPX 2#288 # #"L+!EF ""=T#tAw,' =DaL =
-bhh"((.JK $-dD$.>? $1/,T2::t-CD 1/hD-bllBIIr||.ST Dw w*& &@j$~ j$ZLh L
I I
$"	<<= $"N"i	< DE "
Yy,CD 
E^BHHbhh$@A Eiw 67 2nR\\299bll%JK (-%ilD 89 -%`f ftCy fT f
!c5%&A B !# !RV !&5eUC!AB s   $((	sE5#x/	0( e_( XX	(^ &*9 9 	sE5#x/	09  9  TN	9 r#   