
    h(                     :   S SK r S SKrS SKrS SKJr  S SKJrJr  S SKrS SK	r
S SKrS SKrS SKJr  S SKJrJr  \R&                  R(                  R+                  \5      r " S S\R0                  5      rS r/ S	Qr\\l        / S
Qr\\l        / SQr\\l        S\4S jrS\4S jr S\4S jr!S\4S jr"S\4S jr#S\4S jr$S\4S jr%0 S\_S\_S\_S\&_S\&_S\&_S\&_S\&_S\RN                  _S\RN                  _S\ _S\ _S \!_S!\!_S"\"_S#\#_S$\$_S%\%0Er(\(\l(        g)&    N)islice)AnyCallable)cast_to_python_objects)-SINGLE_FILE_COMPRESSION_EXTENSION_TO_PROTOCOL	xbasenamec                       \ rS rSr% Sr\\   \S'   \\   \S'   \\   \S'   \\\	\
/\
4   4   \S'   Sr\S 5       rS	\R                  4S
 jrS rS rSrg)
WebDataset   d   IMAGE_EXTENSIONSAUDIO_EXTENSIONSVIDEO_EXTENSIONSDECODERS   c              #     #    0 n[         R                  " S5      n[        R                  " 5       nU GH}  u  pg[	        U5      u  pUc  M  U(       a7  US   U:w  a.  UR                  S5      US'   UR                  S5      US'   Uv   0 nXS'   XS'   UR                  5       X9'   U	R                  S5      S   R                  5       [        ;   a  UR                  XcU	   5        UR                  SU 35      n
[         R                  " U
5       nUR                  5       X9'   S S S 5        UR                  U5        [        U
5      R                  S5      S   R                  5       nO"U	R                  S5      S   R                  5       nXR                  ;   d  GMd  U R                  U   " X9   5      X9'   GM     U(       a  Uv   g g ! , (       d  f       N= f7f)Nmemory__key____url__.z	memory://)fsspec
filesystemdatasetsStreamingDownloadManagerbase_plus_extpopreadsplitlowerr   write_bytesextractopendeleter   r   )clstar_pathtar_iteratorcurrent_examplefsstreaming_download_managerfilenamefexample_key
field_nameextracted_file_pathdata_extensions               i/home/james-whalen/.local/lib/python3.13/site-packages/datasets/packaged_modules/webdataset/webdataset.py_get_pipeline_from_tar!WebDataset._get_pipeline_from_tar   s    (.(9(9((C%-%F%F%H"'KH&3H&=#K"?9#=#L-<-@-@-K	*-<-@-@-K	*%%"$)4I&)1I&*+&&(O'$R(..04aax)DE&@&H&H9U]T^I_&`#[[!4523&&(O/ 6		(#!*+>!?!E!Ec!J2!N!T!T!V!+!1!1#!6r!:!@!@!B-.1ll>.J?Kf.g+/ (0 !!  65s%   DG
GA4G+G
G	Greturnc                 ,    [         R                  " 5       $ )N)r   DatasetInfo)selfs    r1   _infoWebDataset._info;   s    ##%%    c           
        ^ U R                   R                  (       d"  [        SU R                   R                   35      eUR                  U R                   R                  5      n/ nUR	                  5        Hc  u  pE[        U[        5      (       a  U/nU Vs/ s H  oaR                  U5      PM     nnUR                  [        R                  " XEUS.S95        Me     U R                  R                  (       Gd  U R                  WS   WS   5      n[        [        XR                   5      5      m[#        U4S jT 5       5      (       a  [        S5      eT V	s/ s H+  n	[$        R&                  R)                  [+        U	/SS95      PM-     n
n	[$        R,                  " U
S	S
9R.                  n[        R0                  R3                  U5      nTS    H  nUR5                  SS5      S   R7                  5       nXR8                  ;   a  [        R:                  " 5       X'   XR<                  ;   a  [        R>                  " 5       X'   XR@                  ;   d  M  [        RB                  " 5       X'   M     XR                  l        U$ s  snf s  sn	f )z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=)	tar_pathstar_iterators)name
gen_kwargsr   c              3   j   >#    U  H(  oR                  5       TS    R                  5       :g  v   M*     g7f)r   N)keys).0examplefirst_exampless     r1   	<genexpr>/WebDataset._split_generators.<locals>.<genexpr>R   s(     \^'<<>^A%6%;%;%==^s   03zThe TAR archives of the dataset should be in WebDataset format, but the files in the archive don't share the same prefix or the same types.T)only_1d_for_numpydefault)promote_optionsr      r   )"config
data_files
ValueErrordownloaditems
isinstancestriter_archiveappendr   SplitGeneratorinfofeaturesr2   listr   #NUM_EXAMPLES_FOR_FEATURES_INFERENCEanypaTablefrom_pylistr   concat_tablesschemaFeaturesfrom_arrow_schemarsplitr    r   Imager   Audior   Video)r7   
dl_managerrL   splits
split_namer<   r&   r=   pipelinerC   	pa_tablesinferred_arrow_schemarV   r.   	extensionrD   s                  @r1   _split_generatorsWebDataset._split_generators>   s.    {{%%\]a]h]h]s]s\tuvv(()?)?@
%/%5%5%7!J)S))&K	OXYy844X>yMYMM''#Zg0h	 &8 yy!!!229Q<qAQRH!&3[3["\]N\^\\\ b   .-G $$%;WIY]%^_-   %'$4$4YPY$Z$a$a!((::;PQH,Q/
&--c15b9??A	 5 55+3>>+;H( 5 55+3>>+;H( 5 55+3>>+;H( 0 "*IIG Zs   I?2Jc              #     #    U R                   R                  R                  5        VVs/ s H(  u  p4[        U[        R
                  5      (       d  M&  UPM*     nnnU R                   R                  R                  5        VVs/ s H(  u  p4[        U[        R                  5      (       d  M&  UPM*     nnn[        U R                   R                  R                  5       5      n[        [        X5      5       Hn  u  nu  p[        U R                  X5      5       HH  u  pU H  nX<;  d  M
  S X'   M     XV-    H  nX   c  M
  US   S-   U-   X   S.X'   M     U SU 3U4v   MJ     Mp     g s  snnf s  snnf 7f)Nr   r   )pathbytes_)rU   rV   rO   rP   r   rb   rc   rW   rA   	enumeratezipr2   )r7   r<   r=   r.   featureimage_field_namesaudio_field_namesall_field_namestar_idxr&   r'   example_idxrC   s                r1   _generate_examplesWebDataset._generate_examplesm   s`    26))2D2D2J2J2L
2L.:PZ[bdldrdrPsJ2L 	 
 37))2D2D2J2J2L
2L.:PZ[bdldrdrPsJ2L 	 
 tyy1166891:3y;X1Y-G-h(1$2M2Mh2e(f$"1J!0.2+ #2 #4"GJ*6$+I$6$<z$I%,%8/+ #H !	;-0'99 )g 2Z

s5   (E5%E)E)*E5%E/,E/2A3E5)E5?6E5 N)__name__
__module____qualname____firstlineno__DEFAULT_WRITER_BATCH_SIZErW   rQ   __annotations__dictr   r   rX   classmethodr2   r   r6   r8   rl   rz   __static_attributes__r|   r:   r1   r
   r
      sv     #3i3i3i3#,,--*+'" ">&x++ &-^:r:   r
   c                     [         R                  " SU 5      nU(       d  gUR                  S5      UR                  S5      4$ )z6Split off all file extensions.

Returns base, allext.
z^((?:.*/|)[^.]+)[.]([^/]*)$)NNrJ      )rematchgroup)ro   r   s     r1   r   r      s6    
 HH3T:E;;q>5;;q>))r:   )?blpbmpdibbufrcurpcxdcxddspsepsfitfitsfliflcftcftugbrgifgribh5hdfpngapngjp2j2kjpcjpfjpxj2cicnsicoimiimtiftiffjfifjpejpgjpegmpgmpegmsppcdpxrpbmpgmppmpnmpsdbwrgbrgbasgirastgaicbvdavstwebpwmfemfxbmxpm)aiffauavrcafflachtksvxmat4mat5mpc2koggpafpvfrawrf64sd2sdsircamvocw64wavnistwavexwveximp3opus)z.mkvz.mp4z.aviz.mpegz.movdatac                 $    U R                  S5      $ )Nzutf-8)decoder   s    r1   
text_loadsr     s    ;;wr:   c                 0    SSK Jn  UR                  U 5      $ )NrJ   )_tenbin) r   decode_buffer)r   r   s     r1   tenbin_loadsr     s      &&r:   c                 ,    SS K nUR                  U 5      $ Nr   )msgpackunpackb)r   r   s     r1   msgpack_loadsr   !  s    ??4  r:   c                 ~    SS K n[        R                  " U 5      nUR                  R                  R                  USS9$ )Nr   Fallow_pickle)numpy.lib.formatioBytesIOlibformat
read_array)r   numpystreams      r1   	npy_loadsr   '  s3    ZZF99&&vE&BBr:   c                 T    [         R                  " [        R                  " U 5      SS9$ )NFr   )nploadr   r   r   s    r1   	npz_loadsr  .  s    772::d#%88r:   c                 ,    SS K nUR                  U 5      $ r   )cborloads)r   r  s     r1   
cbor_loadsr  2  s    ::dr:   c                 R    SS K nUR                  [        R                  " U 5      SS9$ )Nr   T)weights_only)torchr  r   r   )r   r
  s     r1   torch_loadsr  8  s!    ::bjj&T:::r:   txttext
transcriptr%   cls2indexinxidjsonjsntentbmpmsgnpynpzr  pth))r   r  r   	itertoolsr   typingr   r   r   r   r  pyarrowrZ   r   datasets.features.featuresr   datasets.utils.file_utilsr   r   utilslogging
get_loggerr}   loggerGeneratorBasedBuilderr
   r   r   r   r   rp   r   r   r   r   r  r  r  intr  r   r|   r:   r1   <module>r'     s   	  	        = ^ 
			*	*8	4m:// m:b*.@ B /
   : /
   /
  U  'u '! !CE C9E 9U ;e ;	:
J * 
3	
 C S 
3 	# DJJ 
4:: 
< 	, 	- 
= 
9  
9!" J#$ 
;%( 
 r:   