
    h1                        S SK r S SKJrJr  S SKJrJr  S SKrS SK	r
S SKrS SKJrJrJrJrJrJrJrJrJrJr  S SKJr  \(       a  S SKr\R4                  R6                  R9                  \5      rSS/r\ " S S	\R@                  5      5       r! " S
 S\RD                  5      r#S\RH                  S\%4S jr&S\4S jr'S\RP                  S\
RR                  4S jr*S\RH                  S\%4S jr+\ " S S5      5       r,\ " S S5      5       r-S\4S jr.S\
RR                  4S jr/S\RH                  S\%4S jr0S\4S jr1S\RP                  S\
Rd                  4S jr3S\4S jr4S r5S\6S\7S \7S\
Rd                  4S! jr8S"\S\7S \74S# jr9S$ r:S% r;S&\74S' jr<S(\RH                  S\4S) jr=S0S"\4S* jjr>S"\S\74S+ jr?S\%4S, jr@S\%4S- jrAS\%4S. jrBS/ rCg)1    N)	dataclassfield)TYPE_CHECKINGOptional)
Array2DArray3DArray4DArray5DFeatures	LargeListListValue_ArrayXD_arrow_to_datasets_dtype)cast_table_to_featuresz.h5z.hdf5c                   V    \ rS rSr% SrSr\\   \S'   Sr	\\
R                     \S'   Srg)
HDF5Config    zBuilderConfig for HDF5.N
batch_sizefeatures )__name__
__module____qualname____firstlineno____doc__r   r   int__annotations__r   datasetsr   __static_attributes__r       ]/home/james-whalen/.local/lib/python3.13/site-packages/datasets/packaged_modules/hdf5/hdf5.pyr   r       s(    ! $J$,0Hhx(()0r!   r   c                   .    \ rS rSrSr\rS rS rS r	Sr
g)HDF5(   zXArrowBasedBuilder that converts HDF5 files to Arrow tables using the HF extension types.c                 R    [         R                  " U R                  R                  S9$ )N)r   )r   DatasetInfoconfigr   selfs    r"   _info
HDF5._info-   s    ##T[[-A-ABBr!   c           	      D   SS K nU R                  R                  (       d"  [        SU R                  R                   35      eSUR                  l        UR                  U R                  R                  5      n/ nUR                  5        H  u  pV[        U[        5      (       a  U/nU Vs/ s H  oqR                  U5      PM     nnU R                  R                  cq  [        R                  R                  U5       HN  n[!        US5       n	UR"                  " U	S5       n
[%        U
5      U R                  l        S S S 5        S S S 5          O   UR'                  [(        R*                  " USU0S95        M     U$ s  snf ! , (       d  f       NK= f! , (       d  f       NT= f)Nr   z=At least one data file must be specified, but got data_files=Trbrfiles)name
gen_kwargs)h5pyr(   
data_files
ValueErrordownload_configextract_on_the_flydownload_and_extractitems
isinstancestr
iter_filesinfor   	itertoolschainfrom_iterableopenFile_recursive_infer_featuresappendr   SplitGenerator)r*   
dl_managerr3   r4   splits
split_namer0   file
first_filefh5s              r"   _split_generatorsHDF5._split_generators0   sM   {{%%\]a]h]h]s]s\tuvv8<
""544T[[5K5KL
!+!1!1!3J%%%=BCUT**40UECyy!!)"+//"?"?"FJj$/1!YYq#."1J21NDII. / 0 	 #G
 MM(11zwX]N^_` "4  D
 /. 0/s*   +E;F F 	;F 
F
F
Fc           
   #     #    SS K nU R                  R                  n[        [        R
                  R                  U5      5       GH\  u  pE [        US5       nUR                  " US5       nU R                  R                  c  [        U5      U R                  l
        [        XpR                  R                  5      nUc,  [        R                  SU S35         S S S 5        S S S 5        M  U=(       d    U R                  =(       d    Un	[!        SX5       Hw  n
[#        X-   U5      n[%        XpR                  R                  X5      nUc  [        R                  SU S35        MO  U SU
 3['        XR                  R                  5      4v   My     S S S 5        S S S 5        GM_     g ! , (       d  f       N= f! , (       d  f       GM  = f! [(         a-  n[        R+                  SU S[-        U5       S	U 35        e S nAff = f7f)
Nr   r.   r/   zFile z contains no data, skipping..._zFailed to read file 'z' with error z: )r3   r(   r   	enumerater>   r?   r@   rA   rB   r=   r   rC   _check_dataset_lengthsloggerwarning_writer_batch_sizerangemin_recursive_load_arraysr   r5   errortype)r*   r0   r3   batch_size_cfgfile_idxrI   rK   rL   num_rowseffective_batchstartendpa_tablees                 r"   _generate_tablesHDF5._generate_tablesG   s    //'	(E(Ee(LMNH$%1c*b99--51J21NDII.#9"ii>P>P#Q#+"NNU4&8V+WX$ + &% +9*_D<S<S*_W_%*1h%HE"%e&=x"HC'=b))BTBTV['aH'/ &tf<Z/[ \ (%-Jaw"79OPXZcZcZlZl9m"mm &I + &% N +* &%"  4TF-QyPRSTRUVWs   A	HGF8,A.F'F8"G*H,B%F'F8G!H'
F51F88
G	GHG
H(G==HHr   N)r   r   r   r   r   r   BUILDER_CONFIG_CLASSr+   rM   rc   r    r   r!   r"   r$   r$   (   s    b%C.r!   r$   dtypereturnc                 p    U R                   S:X  a  gU R                  b  [        U R                  S   5      $ g)NcTr   F)kindsubdtype_is_complex_dtyperf   s    r"   rl   rl   h   s2    zzS~~! !233r!   c                    U R                   R                  b  U R                   R                  u  pOU R                  SS  nU R                   nU[        R                  :X  a  [        S5      nODU[        R                  :X  a  [        S5      nO$[        R                  SU S35        [        S5      n[        [        X#5      [        X#5      S.5      $ )N   float32float64zFound complex dtype z0 that is not supported. Converting to float64...realimag)rf   rk   shapenp	complex64r   
complex128rS   rT   r   _create_sized_feature_impl)dsetrf   
data_shape
value_types       r"   _create_complex_featuresr}   p   s    zz& JJ//zZZ^


9%
	"--	9%
-eW4def9%
.zF.zF	
 r!   arrc                     [         R                  R                  R                  U R                  5      [         R                  R                  R                  U R                  5      S.n[
        R                  R                  US   US   /SS/S9$ )Nrr   rs   rt   names)r   r   numpy_to_pyarrow_listarrayrs   rt   paStructArrayfrom_arrays)r~   datas     r"   _convert_complex_to_nestedr      st    !!**EEchhO!!**EEchhOD >>%%tF|T&\&B6SYJZ%[[r!   c                      U R                   S:H  $ NV)rj   rm   s    r"   _is_compound_dtyper      s    ::r!   c                   H    \ rS rSr% S\S'   Sr\R                  \S'   S rSr	g)_CompoundGroup   zh5py.Datasetrz   Nr   c              #      #    U R                   R                  R                   H6  nU R                   R                  U   nU[        U R                  X5      4v   M8     g 7fN)rz   rf   r   _CompoundFieldr   )r*   
field_namefield_dtypes      r"   r9   _CompoundGroup.items   sE     ))////J))//*5KnTYY
PPP 0s   AAr   )
r   r   r   r   r   r   rv   ndarrayr9   r    r   r!   r"   r   r      s    
D"**Qr!   r   c                       \ rS rSr% \\R                     \S'   \\S'   \R                  \S'   \
" SS9r\\S4   \S'   S	 rS
 rSrg)r      r   r1   rf   F)init.ru   c                     U R                   b  [        U R                   5      OS4U R                  R                  -   U l        g Nr   )r   lenrf   ru   r)   s    r"   __post_init___CompoundField.__post_init__   s.    (,		(=c$))n1FIYIYY
r!   c                 :    U R                   U   U R                     $ r   )r   r1   )r*   keys     r"   __getitem___CompoundField.__getitem__   s    yy~dii((r!   )ru   N)r   r   r   r   r   rv   r   r   r;   rf   r   ru   tupler   r   r   r    r   r!   r"   r   r      sA    
2::

I88O".E5c?.Z)r!   r   c                 .    [        U 5      n[        U5      $ r   )r   rC   )rz   
mock_groups     r"   _create_compound_featuresr      s    %J$Z00r!   c                 V    [        XS9n[        U5      n[        X#S[        U 5      5      $ )N)r   r   )r   r   rX   r   )r~   rz   r   r   s       r"   _convert_compound_to_nestedr      s)    /J(.H!*3s8DDr!   c                 H    U R                   (       a  SU R                   ;   a  gg)NvlenTF)metadatarm   s    r"   _is_vlen_dtyper      s    ~~&ENN2r!   c                     U R                   R                  S   nU[        [        4;   a  [	        S5      $ [        U5      n[        U5      $ )Nr   string)rf   r   r;   bytesr   _np_to_pa_to_hf_valuer   )rz   
vlen_dtypeinner_features      r"   _create_vlen_featuresr      sA    $$V,Jc5\!X)*5Mr!   c                 T    [         R                  R                  R                  U 5      $ r   )r   r   r   )r~   s    r"   _convert_vlen_to_arrayr      s    %%@@EEr!   c                     0 nU R                  5        HY  u  p#[        U5      (       a  [        U5      nU(       a  XAU'   M-  M/  [        U5      (       d  MA  [	        U5      nU(       d  MU  XAU'   M[     [        U5      $ r   )r9   	_is_grouprC   _is_dataset_infer_featurer   )h5_objfeatures_dictpathrz   r   s        r"   rC   rC      sk    Mlln
T??06H&.d# %d+Hx&.d# % M""r!   c                 *   [        U R                  5      (       a  [        U 5      $ [        U R                  5      (       d  U R                  R                  S:X  a  [        U 5      $ [        U R                  5      (       a  [        U 5      $ [        U 5      $ r   )	rl   rf   r}   r   rj   r   r   r   _create_sized_feature)rz   s    r"   r   r      sj    $$'--	DJJ	'	'4::??c+A(..	

	#	#$T** &&r!   r   r_   r`   c                    XU n[        U R                  5      (       a  [        U5      $ [        U R                  5      (       a  [	        U5      $ [        U R                  5      (       a  [        X@5      $ U R                  R                  S:X  a  [        SU S35      e[        S U R                  SS   5       5      (       aZ  [        R                  " U R                  5      n[        R                  " U Vs/ s H  n/ PM     sn[        R                  " U5      S9$ [        R                   R                   R#                  U5      $ s  snf )NOzObject dtype dataset 'z' is not supported. For variable-length data, please use h5py.vlen_dtype() when creating the HDF5 file. See: https://docs.h5py.org/en/stable/special.html#variable-length-stringsc              3   *   #    U  H	  oS :H  v   M     g7fr   Nr   .0dims     r"   	<genexpr>_load_array.<locals>.<genexpr>  s     2>Cax>   ro   )rZ   )r   rf   r   rl   r   r   r   rj   r5   anyru   r   from_numpy_dtypearraylist_r   r   r   )rz   r   r_   r`   r~   
inner_typerP   s          r"   _load_arrayr      s   
S/Cdjj!!%c**	4::	&	&)#..	DJJ	'	'*355	C	$TF +X Y
 	
 24::ab>222,,TZZ8J88-AR-BHHZ4HII$$--HHMM .s   5Er   c                    0 nU R                  5        Hi  u  pVXQ;  a  M  [        U5      (       a  [        XaU   X#5      nO4[        U5      (       a  [	        XeX#5      nO[        S[        U5       35      eUc  Me  XtU'   Mk     [        U 5      (       a  [        R                  R                  U5      $ U(       a  S/ / pnUR                  5        HX  u  p[        U[        R                  5      (       a  SnUR                  5       nU	R                  U5        U
R                  U5        MZ     [        R                  R!                  XS9nU(       a  [        R"                  " U5      $ U$ g )NzUnexpected type FTr   )r9   r   rX   r   r   r5   rZ   _is_filer   Tablefrom_pydictr:   ChunkedArraycombine_chunksrD   r   r   chunked_array)r   r   r_   r`   
batch_dictr   rz   r~   should_chunkkeysvalueskvsarrs                 r"   rX   rX     s,   Jlln
T??(~uJCd%5C/T
|<==?"t % xx##J//%*BF$$&DA!R__--#$$&KKNMM! ' ~~))&)=)5r%?4? r!   c                 `    U R                   SS  n[        U R                  5      n[        X5      $ )Nro   )ru   r   rf   ry   )rz   
dset_shapevalue_features      r"   r   r   1  s*    ABJ)$**5M%j@@r!   c           	      .   UR                   n[        S U  5       5      (       a*  [        R                  SU  SU SU S35        [	        U5      $ [        U 5      nUS:X  a  U$ US:X  a  [	        XS   S9$ US	::  a  [        U5      " XS
9$ [        SU S35      e)Nc              3   *   #    U  H	  oS :H  v   M     g7fr   r   r   s     r"   r   -_create_sized_feature_impl.<locals>.<genexpr>9  s     
*z!8zr   z*HDF5 to Arrow: Found a dataset with shape z and dtype z\ that has a dimension with size 0. Shape information will be lost in the conversion to List(z).r   ro   )length   )ru   rf   Arrayz.D not supported. Maximum 5 dimensions allowed.)rf   r   rS   rT   r   r   _sized_arrayxd	TypeError)r   r   	dtype_strranks       r"   ry   ry   7  s    ##I

*z
***8KPY{  [w  xE  wF  FH  I	
 M""z?Dqy	MQ-88	d#*FF%v%STUUr!   r   c                 6    [         [        [        [        S.U    $ )N)         r   )r   r   r	   r
   )r   s    r"   r   r   J  s    7w7;DAAr!   numpy_dtypec                 N    [        [        [        R                  " U 5      5      S9$ )Nrm   )r   r   r   r   )r   s    r"   r   r   N  s    /0C0CK0PQRRr!   c                     U R                  5        HO  u  p4X1;  a  M  [        U5      (       a  [        XAU   U U S3S9nUb  Us  $ M7  [        U5      (       d  MI  U U 3s  $    g )N/)prefix)r9   r   _first_datasetr   )r   r   r   r   rz   founds         r"   r   r   R  sl    lln
T??"4$6(4&PQ@RSE  !XdV$$ %r!   c           	         [        X5      nUc  g X   R                  S   nU R                  5        HR  u  pEXA;  a  M  [        U5      (       d  M  UR                  S   U:w  d  M3  [	        SU SUR                  S    SU 35      e   U$ )Nr   z	Dataset 'z' has length z but expected )r   ru   r9   r   r5   )r   r   
first_pathr]   r   rz   s         r"   rR   rR   ^  s    1J!''*Hlln
tzz!}( 9TF-

1n]e\f!ghh % Or!   c                 b    SS K n[        XR                  5      =(       d    [        U [        5      $ r   )r3   r:   Groupr   r   r3   s     r"   r   r   m  s     fjj)OZ-OOr!   c                 b    SS K n[        XR                  5      =(       d    [        U [        5      $ r   )r3   r:   Datasetr   r   s     r"   r   r   s  s     fll+Qz&./QQr!   c                 4    SS K n[        XR                  5      $ r   )r3   r:   rB   r   s     r"   r   r   y  s    fii((r!   c                 :   [        U [        5      (       a  [        S U R                   5       5      $ [        U [        5      (       a+  U R
                  S:H  =(       d    [        U R                  5      $ [        U [        5      (       a  [        U R                  5      $ g)Nc              3   *   #    U  H	  oS :H  v   M     g7fr   r   r   s     r"   r   '_has_zero_dimensions.<locals>.<genexpr>  s     5}!8}r   r   F)	r:   r   r   ru   r   r   _has_zero_dimensionsfeaturer   )r   s    r"   r   r     sn    '8$$5w}}555	GT	"	"~~"K&:7??&KK	GY	'	'#GOO44r!   ) )Dr>   dataclassesr   r   typingr   r   numpyrv   pyarrowr   r   datasets.features.featuresr   r   r	   r
   r   r   r   r   r   r   datasets.tabler   r3   utilslogging
get_loggerr   rS   
EXTENSIONSBuilderConfigr   ArrowBasedBuilderr$   rf   boolrl   r}   r   r   r   r   r   r   r   r   r   r   r   r   rC   r   r;   r   r   rX   r   ry   r   r   r   rR   r   r   r   r   r   r!   r"   <module>r     sQ    ( *      2 				*	*8	4W
 1'' 1 188%% 8@RXX $ h 2\BJJ \2>> \bhh 4  Q Q Q 
) 
) 
)1x 1
Ebnn E"(( t 8 F

 Frxx F# #'NC N N# N"(( N4@X @c @ @HAV&B BSrxx SE S	%X 	%X # P PR4 R) )r!   