
    hJ                       % S SK Jr  S SKrS SKJrJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJrJrJr  S SKJr  S SKJrJr  S S	KJr  S S
KJrJrJr  S SKJr  \(       a  S SKrS SK r!S SK"J#r#  S SK$J%r%   " S S5      r& " S S\5      r'\ " S S\'5      5       r(\ " S S\'5      5       r)S?S jr*    S@S jr+0 SS_SS_SS_SS _S!S"_S#S$_S%S&_S'S(_S)S*_S+S,_S-S._S/S0_S1S2_S3S4_S5S6_S7S8_S9S:_S;S<S=.Er,S\-S>'   g)A    )annotationsN)ABCabstractmethod)	dataclass)partial)perf_counter)TYPE_CHECKINGAnyLiteral)eprintverbose)ComputeError)IcebergStatisticsLoader)IdentityTransformedPartitionValuesBuilder_scan_pyarrow_dataset_impl)ScanCastOptions)Table)	LazyFramec                      \ rS rSrSrSSSSS.           SS jjrSS jrSS jrSSSSS	.         SS
 jjrSSSSS	.         SS jjr	SS jr
SS jrSS jrSS jrSrg)IcebergDataset   z Dataset interface for PyIceberg.NT)snapshot_idiceberg_storage_propertiesreader_overrideuse_metadata_statisticsc                   S U l         S U l        X l        X0l        X@l        XPl        [        U[        5      (       a  Xl         g Xl        g N)_metadata_path_table_snapshot_id_iceberg_storage_properties_reader_override_use_metadata_statistics
isinstancestr)selfsourcer   r   r   r   s         S/home/james-whalen/.local/lib/python3.13/site-packages/polars/io/iceberg/dataset.py__init__IcebergDataset.__init__   sE     #'+E(GV(?%
 fc"""( K    c                "    U R                  5       $ )zFetch the schema of the table.)arrow_schemar&   s    r(   schemaIcebergDataset.schema;   s      ""r+   c                V    SSK Jn  U" U R                  5       R                  5       5      $ )z$Fetch the arrow schema of the table.r   schema_to_pyarrow)pyiceberg.io.pyarrowr3   tabler/   )r&   r3   s     r(   r-   IcebergDataset.arrow_schema?   s    : !4!4!677r+   existing_resolved_version_keylimit
projectionfilter_columnsc               h    U R                  UUUUS9=n c  gUR                  5       UR                  4$ )zConstruct a LazyFrame scan.r7   N)_to_dataset_scan_implto_lazyframesnapshot_id_key)r&   r8   r9   r:   r;   	scan_datas         r(   to_dataset_scanIcebergDataset.to_dataset_scanE   sS     33.K%-	 4  I  %%')B)BBBr+   c                   SSK Jn  SS KnUR                  R                  R                  5       nU(       a.  [        SU R                   SU SU SU SU R                   3
5        U R                  5       nU(       a"  [        SUR                  R                   35        U R                  n	S n
U	be  UR                  U	5      nUc  S	U	 3n[        U5      eUR                  n
U
c  S
U	 S3n[        U5      eUR                  5       U
   nUR                    nOHUR#                  5       nUR                  R$                  n
UR'                  5       =nb  UR                    OSnUb  X:X  a  U(       a  [        SU< S35        g U R(                  =(       d    [*        R,                  " S5      nU(       a  US;  a  SU S3n[        U5      eUS:X  a  SO UR.                  S::  d  SUR.                   3OS nUc  SO
[1        U5      nUS:X  a  UOUR2                  " U6 n/ n[5        UU5      nU R                  (       a  Ub  [7        XR2                  " U6 5      OS n0 nUS:w  Ga  U(       Gd  SSKJnJn  U(       a  [        S5        [?        5       nURA                  U	UUS9nSn[C        URE                  5       5       GHn  u  nnURF                  RH                  URJ                  :w  a  SURF                  RH                   3n  GO,URL                  (       a  / UU'   URL                   H|  nURN                  URP                  :w  a  SURN                   3n  ORURH                  URJ                  :w  a  SURH                   3n  O'UU   RS                  URT                  5        US-  nM~     U(       a    OURW                  UURF                  RX                  URF                  RZ                  S9  Ub  UR]                  URF                  5        URS                  URF                  RT                  5        GMq     U(       a  [?        5       U-
  n [        S U S! S"35        U(       d  U(       aC  [_        U5      S:X  a  SOS#n!WS:X  a  SOS#n"[        S$[_        U5       S%U! S&U	 S'U
 S(U S)U" 35        U" U5      n#URa                  5       n$Ub  URa                  [_        U5      U$5      OS n%U Rb                  b  [e        U Rb                  5      OS n&[g        UUU#U$UU%UU&US*9	$ US+:X  a  S,U 3n[i        U5      eU(       a  [        S-U 35        [k        [l        UU	UUS.9n'U" UR#                  5       5      n([n        Rp                  Rs                  U(U'S/S/S09n)[u        U)US19$ )2Nr   r2   z0IcebergDataset: to_dataset_scan(): snapshot ID: z	, limit: z, projection: z, filter_columns: z!, self._use_metadata_statistics: zEIcebergDataset: to_dataset_scan(): tbl.metadata.current_snapshot_id: ziceberg snapshot ID not found: z#IcebergDataset: requested snapshot z did not contain a schema ID zCIcebergDataset: to_dataset_scan(): early return (snapshot_id_key = )POLARS_ICEBERG_READER_OVERRIDE)native	pyicebergz-iceberg: unknown value for reader_override: 'z*', expected one of ('native', 'pyiceberg')rH   z"forced reader_override='pyiceberg'   z"unsupported table format version: )*)DataFileContent
FileFormatz7IcebergDataset: to_dataset_scan(): begin path expansion)r   r9   selected_fieldsznon-parquet format: z unsupported deletion file type: z"unsupported deletion file format:    )current_indexpartition_spec_idpartition_valuesz:IcebergDataset: to_dataset_scan(): finish path expansion (z.3fzs)sz:IcebergDataset: to_dataset_scan(): native scan_parquet(): z sourcez, snapshot ID: z, schema ID: z, z deletion file)	sourcesprojected_iceberg_schemacolumn_mappingdefault_valuesdeletion_filesmin_max_statisticsstatistics_loaderstorage_options_snapshot_id_keyrG   z)iceberg reader_override='native' failed: zGIcebergDataset: to_dataset_scan(): fallback to python[pyiceberg] scan: )r   n_rowswith_columnsT)pyarrowis_pure)lfr[   );r4   r3   polars._utils.logging_utilsloggingr   r   r    r#   r5   metadatacurrent_snapshot_idsnapshot_by_id
ValueError	schema_idschemasr   r/   current_schema_idcurrent_snapshotr"   osgetenvformat_versiontupleselectr   r   pyiceberg.manifestrK   rL   r   scan	enumerate
plan_filesfilefile_formatPARQUETdelete_filescontentPOSITION_DELETESappend	file_pathpush_partition_valuesspec_id	partitionpush_file_statisticslenfinishr!   0_convert_iceberg_to_object_store_storage_options_NativeIcebergScanDatar   r   r   plr   _scan_python_function_PyIcebergScanData)*r&   r8   r9   r:   r;   r3   polarsr   tblr   rh   snapshotmsgiceberg_schemar?   vr   fallback_reasonrM   rT   rS   missing_field_defaultsrY   rW   rK   rL   
start_timerr   total_deletion_filesi	file_infodeletion_fileelapsedrR   s2rU   identity_transformed_valuesrX   rZ   funcr-   r`   s*                                             r(   r=   $IcebergDataset._to_dataset_scan_implZ   s4    	;$--''//1  $ 1 12 3 !)l +##1"2 32262O2O1PR jjl558\\5U5U4VX
 ''	"))+6H7}E o% **I 9+ G2 2  !o% [[]95N!)!5!5 6O ZZ\N66I -0,@,@,B'Bq&O1==/UW 
 *5-@+'+1.
  // 
299,4
 6MM#$$NP  S/! +- 1 %%* 6c6H6H5IJ 	 %/$6&E*<M &( &&8 	! !J$"
 ,,1K $C)>)>)OP 	
 02k)/FPQ%J88' /  D $%  )$//*; <9>>--1C1CC.y~~/I/I.JK $ ))(*N1%)2)?)?(00O4T4TT B#0#8#8"9!; , "(44
8J8JJ D#0#<#<"=!? , "&q)001H1HI,1,! *@$ #&<<"#&/nn&<&<%.^^%=%= =  %0%::9>>Jy~~778Q !=T &.:5..5c]">
 g,!+B/14R#.7|nGA3 /$$/= 1""+B+,N2$@ /~>N*@*G*G*I' %0 "((W7RS  33? A44   *)A-:-#5"3 /!0
 
 (=o=NOCs##77F6GI
 &##
 )6\\//	 0 
 "R/JJr+   c                    U R                   c9  U R                  c  Sn[        U5      eU R                  5       R                  U l         U R                   $ )zFetch the metadata path.1impl error: both metadata_path and table are None)r   r   rg   r5   metadata_location)r&   r   s     r(   metadata_pathIcebergDataset.metadata_path[  sG    &{{"I o%"&**,"@"@D"""r+   c                (   U R                   cz  U R                  c  Sn[        U5      e[        5       (       a  [	        SU R                  < 35        SSKJn  UR                  U R                  U R                  =(       d    0 S9U l         U R                   $ )z!Fetch the PyIceberg Table object.r   z;IcebergDataset: construct table from self._metadata_path = r   )StaticTable)r   
properties)	r   r   rg   r   r   pyiceberg.tabler   from_metadatar!   )r&   r   r   s      r(   r5   IcebergDataset.tablef  s    ;;""*I o%yyUt?R?R>VWX3%33"&"5"5;;Ar 4 DK
 {{r+   c           
        U R                  5       U R                  U R                  U R                  S.n[	        5       (       a?  US   nUS   =nb  SU S3OS n[        US   5      nUS   n[        SU SU S	U S
U 35        U$ )N)r   r   r   r   r   r   'r   r   #IcebergDataset: getstate(): path: 'z', snapshot_id: z, iceberg_storage_properties: , reader_override: )r   r    r!   r"   r   _redact_dict_valuesr   )r&   state	path_reprr   r   	keys_reprr   s          r(   __getstate__IcebergDataset.__getstate__  s    !//1,,*.*J*J#44	
 99o.I,1-,@'@q&MAaS(SWK+E2N,OPI#$56O# %  +} -//8k :$$3#4	6 r+   c           
         [        5       (       a4  US   nUS   n[        US   5      nUS   n[        SU SU SU SU 35        [        R	                  U US   US   US   US   S	9  g )
Nr   r   r   r   r   z', snapshot_id: 'z', iceberg_storage_properties: r   )r   r   r   )r   r   r   r   r)   )r&   r   r   r   r   r   s         r(   __setstate__IcebergDataset.__setstate__  s    99o.I.K+E2N,OPI#$56O# %!!, .//8k :$$3#4	6 	/"m,',-I'J!"34 	  	
r+   )r!   r   r"   r    r   r#   )r'   zstr | Tabler   
int | Noner   zdict[str, Any] | Noner   z%Literal['native', 'pyiceberg'] | Noner   boolreturnNone)r   z	pa.schema)
r8   
str | Noner9   r   r:   list[str] | Noner;   r   r   ztuple[LazyFrame, str] | None)
r8   r   r9   r   r:   r   r;   r   r   z2_NativeIcebergScanData | _PyIcebergScanData | Noner   r%   )r   r   )r   dict[str, Any])r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r)   r/   r-   rA   r=   r   r5   r   r   __static_attributes__ r+   r(   r   r      s   * #'<@AE(,!!  	!
 %:! ?! "&! 
!8#8 59 '++/C (2C 	C
 %C )C 
&C0 59 '++/{K (2{K 	{K
 %{K ){K 
<{KB	#40
r+   r   c                  F    \ rS rSr\SS j5       r\\SS j5       5       rSrg)_ResolvedScanDataBasei  c                    g r   r   r.   s    r(   r>   "_ResolvedScanDataBase.to_lazyframe  s    ,/r+   c                    g r   r   r.   s    r(   r?   %_ResolvedScanDataBase.snapshot_id_key  s    &)r+   r   Nr   pl.LazyFramer   )	r   r   r   r   r   r>   propertyr?   r   r   r+   r(   r   r     s&    / /)  )r+   r   c                      \ rS rSr% SrS\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S\S'   S\S'   SS jr\SS j5       rSr	g)r   i  z.Resolved parameters for a native Iceberg scan.z	list[str]rS   zpyiceberg.schema.SchemarT   z	pa.SchemarU   zdict[int, pl.Series | str]rV   zdict[int, list[str]]rW   zpl.DataFrame | NonerX   zIcebergStatisticsLoader | NonerY   zdict[str, str] | NonerZ   r%   r[   c                    SSK Jn  U" U R                  [        R                  " 5       SSU R
                  SU R                  4SU R                  4SU R                  4U R                  S9	$ )	Nr   )scan_parquetinsertignoreziceberg-column-mappingicebergziceberg-position-delete)cast_optionsmissing_columnsextra_columnsrZ   _column_mapping_default_values_deletion_files_table_statistics)
polars.io.parquet.functionsr   rS   r   _default_icebergrZ   rU   rV   rW   rX   )r&   r   s     r(   r>   #_NativeIcebergScanData.to_lazyframe  sg    <LL(99;$" 005t7J7JK&(;(;<68K8KL"55

 
	
r+   c                    U R                   $ r   r[   r.   s    r(   r?   &_NativeIcebergScanData.snapshot_id_key      $$$r+   r   Nr   r   
r   r   r   r   r   __annotations__r>   r   r?   r   r   r+   r(   r   r     sV    855..((++
 65**
 % %r+   r   c                  L    \ rS rSr% SrS\S'   S\S'   SS jr\SS j5       rS	r	g
)r   i  z.Resolved parameters for reading via PyIceberg.r   r`   r%   r[   c                    U R                   $ r   )r`   r.   s    r(   r>   _PyIcebergScanData.to_lazyframe  s    wwr+   c                    U R                   $ r   r   r.   s    r(   r?   "_PyIcebergScanData.snapshot_id_key  r   r+   r   Nr   r   r   r   r+   r(   r   r     s,    8 	 % %r+   r   c                    [        U [        5      (       a!  U R                  5        Vs0 s H  oS_M     sn$ U b  S[        U 5      R                   S3$ S$ s  snf )NREDACTED<z object>r   )r$   dictkeystyper   )objks     r(   r   r     sd     c4   !$
+
1J
+ ? c##$H-
 +s   Adict[str, str]c                    0 nU R                  5        H0  u  p#[        R                  U5      =n b  X1U'   M$  SU;  d  M,  X1U'   M2     U$ )N.)items&ICEBERG_TO_OBJECT_STORE_CONFIG_KEY_MAPget)r   rZ   r   r   translated_keys        r(   r   r     s\     O*002DHHKKN /0N+\
 "#A 3  r+   zs3.endpointaws_endpoint_urlzs3.access-key-idaws_access_key_idzs3.secret-access-keyaws_secret_access_keyzs3.session-tokenaws_session_tokenz	s3.region
aws_regionzs3.proxy-uri	proxy_urlzs3.connect-timeoutconnect_timeoutzs3.request-timeouttimeoutzs3.force-virtual-addressing aws_virtual_hosted_style_requestzadls.account-nameazure_storage_account_namezadls.account-keyazure_storage_account_keyzadls.sas-tokenazure_storage_sas_keyzadls.tenant-idazure_storage_tenant_idzadls.client-idazure_storage_client_idzadls.client-secretazure_storage_client_secretzadls.account-hostazure_storage_authority_hostz
adls.tokenazure_storage_tokenbearer_tokentoken)zgcs.oauth2.tokenzhf.tokenr   )r   r
   r   r
   )r   r   r   r   ).
__future__r   rl   abcr   r   dataclassesr   	functoolsr   timer   typingr	   r
   r   polars._reexport	_reexportr   ra   r   r   polars.exceptionsr   polars.io.iceberg._utilsr   r   r   #polars.io.scan_options.cast_optionsr   r^   papyiceberg.schemarH   r   r   polars.lazyframe.framer   r   r   r   r   r   r   r   r   r   r+   r(   <module>r     s   " 	 # !   . .  1 * 
 @%0Q
 Q
h*C * "%2 "% "%J %. % %  .4:%: +: 3	:
 +: : K: +: ): "#E: 5: 3: -: /:  /!:" 7#:$ 7%:& '':* '/: & r+   