
    h              &         S SK Jr  S SKrS SKrS SKJr  S SKJrJr  S SK	J
r
  S SKJrJrJrJrJr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJr  S S	KJ r J!r!  S S
K"J#r#J$r$J%r%  S SK&J'r'J(r(J)r)J*r*  S SK+J,r,  S SK-J.r.J/r/  S SK0J1r1  \Rd                  " \35         S SK4J5r5J6r6  SSS5        \(       a$  S SKJ7r7  S SK8J9r9J:r:  S SK;J<r<J=r=J>r>  S SK?J@r@  S SK+JArA  \" SSSS9\" SSSS9\" SSSS9SSSSSS S S SSSS!S!S!SS\#S"SS#S!S!S!SS SS S$S%SS!S!SS&.!                                                                     S3S' jj5       5       5       rBSSSSS S S SSSS!S!S!S\#S"SS#S!S!S SS S$S%SS!S!SS(.                                                             S4S) jjrC\" SSSS9\" SSSS9\" SSSS9SSSSSS S S SSS!S!S!S\#S*SS#S!S!S SS S$S%SS!S!S+.                                                           S5S, jj5       5       5       rD\" SSSS9\" SSSS9\" SSSS9SSSS S S SSSS!S!SSS\#SS#S!S!S SS S!S%SSS!S!SSS-S.SSS/."                                                                       S6S0 jj5       5       5       rESSSS S S SSSS!S!SS\#SS#S!S!S SS S!S%SSS!SSSS.SSS1.                                                                    S7S2 jjrFg! , (       d  f       GN= f)8    )annotationsN)Sequence)BytesIOStringIO)Path)IOTYPE_CHECKINGAnyCallableLiteral)deprecate_renamed_parameter)_process_null_valuesis_path_or_str_sequenceis_str_sequencenormalize_filepathqualified_type_name)wrap_dfwrap_ldf)N_INFER_DEFAULTStringparse_into_dtype)is_glob_patternparse_columns_argparse_row_index_argsprepare_file_arg)!_init_credential_provider_builder)_check_arg_is_1byte_update_columns)BatchedCsvReader)PyDataFramePyLazyFrame)Mapping)	DataFrame	LazyFrame)CsvEncodingPolarsDataType
SchemaDict)CredentialProviderFunction)CredentialProviderBuilderdtypesschema_overridesz0.20.31)versionrow_count_namerow_index_namez0.20.4row_count_offsetrow_index_offsetT,"Fi    utf8i   
)!
has_headercolumnsnew_columns	separatorcomment_prefix
quote_char	skip_rows
skip_linesschemar+   null_valuesmissing_utf8_is_empty_stringignore_errorstry_parse_dates	n_threadsinfer_schemainfer_schema_length
batch_sizen_rowsencoding
low_memoryrechunkuse_pyarrowstorage_optionsskip_rows_after_headerr.   r0   sample_sizeeol_charraise_if_emptytruncate_ragged_linesdecimal_commaglobc       !           [        SUSS9  [        SUSS9  [        SUSS9  [        U5      u  n"nU=(       d    0 nU(       a3  U(       d,  U H&  n#U#R                  S5      (       a  M  Sn$[        U$5      e   U
b(  [	        U
[
        [        45      (       d  S
n$[        U$5      eU(       Ga  U
Gc  UGc  UGc  U(       Gd|  UGcx  S	n%U(       a.  U(       d%  U V#s/ s H  n#S[        U#SS	 5      S-
   3PM     n%n#OUn%U(       d  U"(       a  U" V&s/ s H  n&SU& 3PM
     n%n&[        U S	SUUS9 n'SS	K
n(SS	Kn) U(R                  R                  U'U(R                  R                  UUU(       + US9U(R                  R                  UU(       a  UOSUS	L=(       a    US:H  S9U(R                  R!                  S	U%US95      n* S	S	S	5        U(       d>  W*R+                  U*R,                   V#s/ s H  n#S[        U#SS	 5      S-    3PM     sn#5      n*[&        R(                  R/                  W*US9n,U(       a  [1        U,U5      $ U,$ U"(       a  U
(       a  [	        U
[2        5      (       ak  [5        U"5      [5        U
5      :  a  Sn$[        U$5      e[6        /[9        U"5      S-   -  n-[;        U"5       H  u  n.n&U.[5        U
5      :  d  M  U
U.   U-U&'   M!     U-n
U(       aU  U
(       aN  [	        U
[2        5      (       a9  [5        U5      [5        U
5      :  a  Sn$[        U$5      e[        [=        X*5      5      n
U(       Ga  U
(       Ga  [	        U
[
        5      (       Ga~  S	n/U(       a4  [5        U5      [5        U5      :  a  Sn$[        U$5      eUS[5        U5       n/OU(       dx  U"(       aE  U(       a%  [5        U5      [5        U5      :  a  Sn$[        U$5      eU" V&s/ s H  n&SU&S-    3PM     n/n&O[?        S[5        U5      S-   5       V&s/ s H  n&SU& 3PM
     n/n&O[[5        U
5      [5        U5      ::  aC  US[5        U
5        V0s/ s H  n0U0U
;   d  M  U
U0   PM     n1n0[5        U15      [5        U
5      :X  a  U1n
U/(       a`  [	        U
[
        5      (       aK  [        [=        UU/5      5      n2U
RA                  5        V3V4s0 s H  u  n3n4U2RC                  U3U35      U4_M     n
n3n4U(       d  Sn[	        U
[        5      n5US;   n6[D        RF                  " S5      S:H  =(       d    [D        RF                  " S5      S:H  n7U7(       dh  [	        U =n8[$        [H        45      (       Gax  [%        U85      R                  S5      (       d+  [D        RF                  " S5      S:X  Ga>  U5(       Gd6  U6(       Ga.  [	        U [$        [H        45      (       a  [K        U SS9n O*[M        U SS 9(       a  U  V s/ s H  n [K        U SS9PM     sn n U7(       d,  U5(       a  S!n$[        U$5      eU6(       d  S"U S#3n$[        U$5      e[O        U 40 S$U_SU_S%U_SU_S&U_S'U_S(U
_S)U	_S*U_S+U_S,U_S-U_S.U_S/U_S0U_S1U_S2U_S3U_S4U_S5U_SU_S6U_S7U_S8U _S9U!_6n9U(       a  U9RQ                  U5      n9O,U"(       a%  U9RQ                  [R        RT                  " U"5      5      n9U9RW                  5       n,O[        U USUUS9 n'[Y        U'40 S$U_S:U(       a  UOU"_SU_S%U_SU_S&U_S'U_S(U
_S)U	_S*U_S+U_S,U_S-U_S;U_S.U_S<U_S/U_S0US=:X  a  UOS>_S1U_S2U_S3U_S4U_S5U_SU_S6U_S7U_S8U _S9U!_6n,S	S	S	5        U(       a  [1        W,U5      $ W,$ s  sn#f s  sn&f ! U(R"                   a?  n+U(       d  S[%        U+5      ;  a  e [&        R(                  " 5       s S	n+A+sS	S	S	5        $ S	n+A+ff = f! , (       d  f       GN= fs  sn#f s  sn&f s  sn&f s  sn0f s  sn4n3f s  sn f ! , (       d  f       N= f)?uv   
Read a CSV file into a DataFrame.

Polars expects CSV data to strictly conform to RFC 4180, unless documented
otherwise. Malformed data, though common, may lead to undefined behavior.

.. versionchanged:: 0.20.31
    The `dtypes` parameter was renamed `schema_overrides`.
.. versionchanged:: 0.20.4
    * The `row_count_name` parameter was renamed `row_index_name`.
    * The `row_count_offset` parameter was renamed `row_index_offset`.

Parameters
----------
source
    Path to a file or a file-like object (by "file-like object" we refer to objects
    that have a `read()` method, such as a file handler like the builtin `open`
    function, or a `BytesIO` instance). If `fsspec` is installed, it will be used
    to open remote files. For file-like objects, the stream position may not be
    updated accordingly after reading.
has_header
    Indicate if the first row of the dataset is a header or not. If set to False,
    column names will be autogenerated in the following format: `column_x`, with
    `x` being an enumeration over every column in the dataset, starting at 1.
columns
    Columns to select. Accepts a list of column indices (starting
    at zero) or a list of column names.
new_columns
    Rename columns right after parsing the CSV file. If the given
    list is shorter than the width of the DataFrame the remaining
    columns will have their original name.
separator
    Single byte character to use as separator in the file.
comment_prefix
    A string used to indicate the start of a comment line. Comment lines are skipped
    during parsing. Common examples of comment prefixes are `#` and `//`.
quote_char
    Single byte character used for csv quoting, default = `"`.
    Set to None to turn off special handling and escaping of quotes.
skip_rows
    Start reading after ``skip_rows`` rows. The header will be parsed at this
    offset. Note that we respect CSV escaping/comments when skipping rows.
    If you want to skip by newline char only, use `skip_lines`.
skip_lines
    Start reading after `skip_lines` lines. The header will be parsed at this
    offset. Note that CSV escaping will not be respected when skipping lines.
    If you want to skip valid CSV rows, use ``skip_rows``.
schema
    Provide the schema. This means that polars doesn't do schema inference.
    This argument expects the complete schema, whereas `schema_overrides` can be
    used to partially overwrite a schema. Note that the order of the columns in
    the provided `schema` must match the order of the columns in the CSV being read.
schema_overrides
    Overwrite dtypes for specific or all columns during schema inference.
null_values
    Values to interpret as null values. You can provide a:

    - `str`: All values equal to this string will be null.
    - `List[str]`: All values equal to any string in this list will be null.
    - `Dict[str, str]`: A dictionary that maps column name to a
      null value string.

missing_utf8_is_empty_string
    By default a missing value is considered to be null; if you would prefer missing
    utf8 values to be treated as the empty string you can set this param True.
ignore_errors
    Try to keep reading lines if some lines yield errors.
    Before using this option, try to increase the number of lines used for schema
    inference with e.g `infer_schema_length=10000` or override automatic dtype
    inference for specific columns with the `schema_overrides` option or use
    `infer_schema=False` to read all columns as `pl.String` to check which
    values might cause an issue.
try_parse_dates
    Try to automatically parse dates. Most ISO8601-like formats can
    be inferred, as well as a handful of others. If this does not succeed,
    the column remains of data type `pl.String`.
    If `use_pyarrow=True`, dates will always be parsed.
n_threads
    Number of threads to use in csv parsing.
    Defaults to the number of physical cpu's of your system.
infer_schema
    When `True`, the schema is inferred from the data using the first
    `infer_schema_length` rows.
    When `False`, the schema is not inferred and will be `pl.String` if not
    specified in `schema` or `schema_overrides`.
infer_schema_length
    The maximum number of rows to scan for schema inference.
    If set to `None`, the full data may be scanned *(this is slow)*.
    Set `infer_schema=False` to read all columns as `pl.String`.
batch_size
    Number of lines to read into the buffer at once.
    Modify this to change performance.
n_rows
    Stop reading from CSV file after reading `n_rows`.
    During multi-threaded parsing, an upper bound of `n_rows`
    rows cannot be guaranteed.
encoding : {'utf8', 'utf8-lossy', 'windows-1252', 'windows-1252-lossy', ...}
    Lossy means that invalid utf8 values are replaced with `�`
    characters. When using other encodings than `utf8` or
    `utf8-lossy`, the input is first decoded in memory with
    python. Defaults to `utf8`.
low_memory
    Reduce memory pressure at the expense of performance.
rechunk
    Make sure that all columns are contiguous in memory by
    aggregating the chunks into a single array.
use_pyarrow
    Try to use pyarrow's native CSV parser. This will always
    parse dates, even if `try_parse_dates=False`.
    This is not always possible. The set of arguments given to
    this function determines if it is possible to use pyarrow's
    native parser. Note that pyarrow and polars may have a
    different strategy regarding type inference.
storage_options
    Extra options that make sense for `fsspec.open()` or a
    particular storage connection.
    e.g. host, port, username, password, etc.
skip_rows_after_header
    Skip this number of rows when the header is parsed.
row_index_name
    Insert a row index column with the given name into the DataFrame as the first
    column. If set to `None` (default), no row index column is created.
row_index_offset
    Start the row index at this offset. Cannot be negative.
    Only used if `row_index_name` is set.
sample_size
    Set the sample size. This is used to sample statistics to estimate the
    allocation needed.

    .. deprecated:: 1.10.0
        This parameter is now a no-op.
eol_char
    Single byte end of line character (default: `\n`). When encountering a file
    with windows line endings (`\r\n`), one can go with the default `\n`. The extra
    `\r` will be removed when processed.
raise_if_empty
    When there is no data in the source, `NoDataError` is raised. If this parameter
    is set to False, an empty DataFrame (with no columns) is returned instead.
truncate_ragged_lines
    Truncate lines that are longer than the schema.
decimal_comma
    Parse floats using a comma as the decimal separator instead of a period.
glob
    Expand path given via globbing rules.

Returns
-------
DataFrame

See Also
--------
scan_csv : Lazily read from a CSV file or multiple files via glob patterns.

Warnings
--------
Calling `read_csv().lazy()` is an antipattern as this forces Polars to materialize
a full csv file and therefore cannot push any optimizations into the reader.
Therefore always prefer `scan_csv` if you want to work with `LazyFrame` s.

Notes
-----
If the schema is inferred incorrectly (e.g. as `pl.Int64` instead of `pl.Float64`),
try to increase the number of lines used to infer the schema with
`infer_schema_length` or override the inferred dtype for those columns with
`schema_overrides`.

Examples
--------
>>> pl.read_csv("data.csv", separator="|")  # doctest: +SKIP

Demonstrate use against a BytesIO object, parsing string dates.

>>> from io import BytesIO
>>> data = BytesIO(
...     b"ID,Name,Birthday\n"
...     b"1,Alice,1995-07-12\n"
...     b"2,Bob,1990-09-20\n"
...     b"3,Charlie,2002-03-08\n"
... )
>>> pl.read_csv(data, try_parse_dates=True)
shape: (3, 3)
┌─────┬─────────┬────────────┐
│ ID  ┆ Name    ┆ Birthday   │
│ --- ┆ ---     ┆ ---        │
│ i64 ┆ str     ┆ date       │
╞═════╪═════════╪════════════╡
│ 1   ┆ Alice   ┆ 1995-07-12 │
│ 2   ┆ Bob     ┆ 1990-09-20 │
│ 3   ┆ Charlie ┆ 2002-03-08 │
└─────┴─────────┴────────────┘
r8   Fcan_be_emptyr:   TrN   column_aspecified column names do not start with 'column_', but autogenerated header names were requestedN1`schema_overrides` should be of type list or dictf      )rG   rJ   rO   rK   r   )r;   skip_rows_after_namesautogenerate_column_namesrG   r2   )	delimiterr:   double_quote)column_typesinclude_columnsinclude_missing_columnsz	Empty CSV)rI   Cmore schema overrides are specified than there are selected columnszCmore dtypes overrides are specified than there are selected columnsCmore new column names are specified than there are selected columns>   r3   
utf8-lossyPOLARS_FORCE_NEW_STREAMING1POLARS_AUTO_NEW_STREAMINGzhf://POLARS_FORCE_ASYNCcheck_not_directory	allow_strzCpassing a list to `schema_overrides` is unsupported for hf:// pathszunsupported encoding z for hf:// pathsr5   r9   r;   r<   r+   r=   r>   r?   r@   rA   rD   rF   rG   rH   rI   rL   r.   r0   rO   rP   rQ   rR   r6   rB   rE   re   r3   )-r   r   
startswith
ValueError
isinstancedictr   	TypeErrorintr   pyarrowpyarrow.csvcsvread_csvReadOptionsParseOptionsConvertOptionsArrowInvalidstrplr#   rename_columnscolumn_names_from_arrowr   listlenr   max	enumerateziprangeitemsgetosgetenvr   r   r   _scan_csv_implselectFnthcollect_read_csv_impl):sourcer5   r6   r7   r8   r9   r:   r;   r<   r=   r+   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   r.   r0   rM   rN   rO   rP   rQ   rR   
projectioncolumnmsgra   
column_idxdatapart   tblerrdfdtypes_listidxcurrent_columnsnew_column_name
dtype_listnew_to_currentcolumn_namecolumn_dtypeschema_overrides_is_listencoding_supported_in_lazynew_streamingvlfs:                                                             Q/home/james-whalen/.local/lib/python3.13/site-packages/polars/io/csv/functions.pyrw   rw   .   sE	   R YUCjtD
H5A+G4J%+OzF$$Y//E  !o%  #J4*- - Bn 	$N04 LS"S7Qs6!":':&;#<7"S"): CMM*J:,/*OM)+
  &ffooFF&&"+.D6@.!)	 '  FF''"+1;:%/t%;%Q
c@Q ( 
 FF))%)(70= * 
F  $$?B?O?OP?OV73vabz?Q./0?OPC \\%%c7%;"2{33	&:6F+M+Mz?S!122WCS/! .4HJ!8K,L(4OCS)***:3*?J'  5 '#
3CT(J(Jw<#.//WCS/!
  G >?'J7G,N,N
 7|c+..  !o% &a#k*:;O s7|c+.>>#  %S/) BL#AK:gj1n-.   # ',As;/?!/C&D#&D
 j\*&D   # #$K(88 ,7q3?O;P+Q+Q&*:: 6$_5+Q   z?c*:&;;'1$z*:DAA!#k?"CDN 2B1G1G1I 1I-K "";<lJ1I   
   **:HE!)-C!C 			./36 	99901S8 
 ;1d,, Fg&& 		./360. fsDk**'EJF$Vu= %$F #6uE$F
 '[ o%--hZ7GH o%
!
  
 *	

 "
  
 "
 .
 
 $
 *F
 (
 ,
 !4
 
  !
" "#
$ %
& $:'
( *)
* .+
, -
. */
0 #81
2 (3
4 5
: 7#B155,-BZZ\ )+
 % $+
 $	
  . & $ & "2  ( .J , !0 $  %8!" &#$ %& &.%=6'( &)*  +, (>-.  ./0 "212 "34  .56 '<78 ,9: ;B
N r;//I[ #T N@ ?? &![C%@||~%C
 
<&=
 
N Qz## Nd
 
sz   ]	]'	^01B] __2_,
_:	_# __A0_!^-.+^(^-^0(^--^00
^?!
_/)r5   r6   r8   r9   r:   r;   r<   r=   r+   r>   r?   r@   rA   rB   rD   rE   rF   rG   rH   rI   rL   r.   r0   rM   rN   rO   rP   rQ   rR   c                  [        U [        [        45      (       a  [        U SS9nOZS n[        U [        5      (       a  U R                  5       n [        U [        5      (       a  U R                  5       R                  5       n S nS n U	b  [        U	[        5      (       a9  / nU	R                  5        H"  u  n!n"UR                  U![        U"5      45        M$     O2[        U	[        5      (       a  U	n OS[        U	5      < 3n#[        U#5      e[        U
5      n$[        U[        5      (       a  U/n[        U [        5      (       a  [!        U 5      (       a  S n%Ub  [        U5      n%U b  Sn#[#        U#5      eSSKJn&  U&" U 40 SU_SU_S	U_S
U_SU_SU_SU_SU%_SU
_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_6n'Uc  U'R)                  5       $ [+        USS9(       a  U'R-                  U5      R)                  5       $ Sn#[#        U#5      e[/        U5      u  n(n[0        R2                  " U UUUUUUUU(UUUUUUUU UUUU$UUU[5        UU5      4UUUUUS .6n)[7        U)5      $ )!NFrj   z7`schema_overrides` should be of type list or dict, got zcannot use glob patterns and unnamed dtypes as `schema_overrides` argument

Use `schema_overrides`: Mapping[str, Type[DataType]]r   )scan_csvr5   r8   r9   r:   r;   r<   r=   r+   r>   r?   r@   rD   rF   rH   rI   rL   r.   r0   rN   rO   rP   rQ   rR   rl   zccannot use glob patterns and integer based projection as `columns` argument

Use columns: List[str])rN   rO   rP   rQ   r=   )rp   r|   r   r   r   getvaluer   encoderq   r   appendr   r   r   rr   r   r   ro   polarsr   r   r   r   r   r    rw   r   r   )*r   r5   r6   r8   r9   r:   r;   r<   r=   r+   r>   r?   r@   rA   rB   rD   rE   rF   rG   rH   rI   rL   r.   r0   rM   rN   rO   rP   rQ   rR   pathr   dtype_slicekr   r   processed_null_valuesdtypes_dictr   scanr   pydfs*                                             r   r   r   J  s#   D &3+&&!&eDfg&&__&Ffh''__&--/F>BJ37K#&--J(..01!!1&6q&9":; 1((33*KKL_`pLqKtuCC. 0='3)&#?6#:#:!z*K"K  S/!#
!
  
 *	

 "
  
 "
 
 )
 $
 *F
 (
 !4
 
 "
  !
" $:#
$ *%
& .'
( )
* *+
, #8-
. (/
0 1
4 ?<<>!W6;;w'//11-  S/!+G4J$^-=>34 %3#=D@ 4=    iP  )r5   r6   r7   r8   r9   r:   r;   r<   r+   r>   r?   r@   rA   rB   rD   rE   rF   rG   rH   rI   rL   r.   r0   rM   rN   rO   rP   rQ   c                  [        U5      u  nnU(       a3  U(       d,  U H&  nUR                  S5      (       a  M  Sn[        U5      e   U(       a  U	(       a  [        U	[        5      (       ak  [        U5      [        U	5      :  a  Sn[        U5      e[        /[        U5      S-   -  n [        U5       H  u  n!n"U![        U	5      :  d  M  U	U!   U U"'   M!     U n	U(       aU  U	(       aN  [        U	[        5      (       a9  [        U5      [        U	5      :  a  Sn[        U5      e[        [        X)5      5      n	U(       Ga  U	(       Ga  [        U	[        5      (       Ga~  Sn#U(       a4  [        U5      [        U5      :  a  Sn[        U5      eUS[        U5       n#OU(       dx  U(       aE  U(       a%  [        U5      [        U5      :  a  Sn[        U5      eU V"s/ s H  n"SU"S-    3PM     n#n"O[        S[        U5      S-   5       V"s/ s H  n"SU" 3PM
     n#n"O[[        U	5      [        U5      ::  aC  US[        U	5        V$s/ s H  n$U$U	;   d  M  U	U$   PM     n%n$[        U%5      [        U	5      :X  a  U%n	U#(       a`  [        U	[        5      (       aK  [        [        UU#5      5      n&U	R                  5        V'V(s0 s H  u  n'n(U&R                  U'U'5      U(_M     n	n'n([        U 40 SU_S	U(       a  UOU_S
U_SU_SU_SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_SUS:X  a  UOS_SU_SU_SU_SU_SU_S U_S!U_S"U_S#U_S$U_6$ s  sn"f s  sn"f s  sn$f s  sn(n'f )%u  
Read a CSV file in batches.

Upon creation of the `BatchedCsvReader`, Polars will gather statistics and
determine the file chunks. After that, work will only be done if `next_batches`
is called, which will return a list of `n` frames of the given batch size.

.. versionchanged:: 0.20.31
    The `dtypes` parameter was renamed `schema_overrides`.
.. versionchanged:: 0.20.4
    * The `row_count_name` parameter was renamed `row_index_name`.
    * The `row_count_offset` parameter was renamed `row_index_offset`.

Parameters
----------
source
    Path to a file or a file-like object (by "file-like object" we refer to objects
    that have a `read()` method, such as a file handler like the builtin `open`
    function, or a `BytesIO` instance). If `fsspec` is installed, it will be used
    to open remote files. For file-like objects, the stream position may not be
    updated accordingly after reading.
has_header
    Indicate if the first row of the dataset is a header or not. If set to False,
    column names will be autogenerated in the following format: `column_x`, with
    `x` being an enumeration over every column in the dataset, starting at 1.
columns
    Columns to select. Accepts a list of column indices (starting
    at zero) or a list of column names.
new_columns
    Rename columns right after parsing the CSV file. If the given
    list is shorter than the width of the DataFrame the remaining
    columns will have their original name.
separator
    Single byte character to use as separator in the file.
comment_prefix
    A string used to indicate the start of a comment line. Comment lines are skipped
    during parsing. Common examples of comment prefixes are `#` and `//`.
quote_char
    Single byte character used for csv quoting, default = `"`.
    Set to None to turn off special handling and escaping of quotes.
skip_rows
    Start reading after ``skip_rows`` rows. The header will be parsed at this
    offset. Note that we respect CSV escaping/comments when skipping rows.
    If you want to skip by newline char only, use `skip_lines`.
skip_lines
    Start reading after `skip_lines` lines. The header will be parsed at this
    offset. Note that CSV escaping will not be respected when skipping lines.
    If you want to skip valid CSV rows, use ``skip_rows``.
schema_overrides
    Overwrite dtypes during inference.
null_values
    Values to interpret as null values. You can provide a:

    - `str`: All values equal to this string will be null.
    - `List[str]`: All values equal to any string in this list will be null.
    - `Dict[str, str]`: A dictionary that maps column name to a
      null value string.

missing_utf8_is_empty_string
    By default a missing value is considered to be null; if you would prefer missing
    utf8 values to be treated as the empty string you can set this param True.
ignore_errors
    Try to keep reading lines if some lines yield errors.
    First try `infer_schema_length=0` to read all columns as
    `pl.String` to check which values might cause an issue.
try_parse_dates
    Try to automatically parse dates. Most ISO8601-like formats can
    be inferred, as well as a handful of others. If this does not succeed,
    the column remains of data type `pl.String`.
n_threads
    Number of threads to use in csv parsing.
    Defaults to the number of physical cpu's of your system.
infer_schema_length
    The maximum number of rows to scan for schema inference.
    If set to `0`, all columns will be read as `pl.String`.
    If set to `None`, the full data may be scanned *(this is slow)*.
batch_size
    Number of lines to read into the buffer at once.

    Modify this to change performance.
n_rows
    Stop reading from CSV file after reading `n_rows`.
    During multi-threaded parsing, an upper bound of `n_rows`
    rows cannot be guaranteed.
encoding : {'utf8', 'utf8-lossy', ...}
    Lossy means that invalid utf8 values are replaced with `�`
    characters. When using other encodings than `utf8` or
    `utf8-lossy`, the input is first decoded in memory with
    python. Defaults to `utf8`.
low_memory
    Reduce memory pressure at the expense of performance.
rechunk
    Make sure that all columns are contiguous in memory by
    aggregating the chunks into a single array.
skip_rows_after_header
    Skip this number of rows when the header is parsed.
row_index_name
    Insert a row index column with the given name into the DataFrame as the first
    column. If set to `None` (default), no row index column is created.
row_index_offset
    Start the row index at this offset. Cannot be negative.
    Only used if `row_index_name` is set.
sample_size
    Set the sample size. This is used to sample statistics to estimate the
    allocation needed.

    .. deprecated:: 1.10.0
        Is a no-op.
eol_char
    Single byte end of line character (default: `\n`). When encountering a file
    with windows line endings (`\r\n`), one can go with the default `\n`. The extra
    `\r` will be removed when processed.
raise_if_empty
    When there is no data in the source,`NoDataError` is raised. If this parameter
    is set to False, `None` will be returned from `next_batches(n)` instead.
truncate_ragged_lines
    Truncate lines that are longer than the schema.
decimal_comma
    Parse floats using a comma as the decimal separator instead of a period.

Returns
-------
BatchedCsvReader

See Also
--------
scan_csv : Lazily read from a CSV file or multiple files via glob patterns.

Examples
--------
>>> reader = pl.read_csv_batched(
...     "./pdsh/tables_scale_100/lineitem.tbl",
...     separator="|",
...     try_parse_dates=True,
... )  # doctest: +SKIP
>>> batches = reader.next_batches(5)  # doctest: +SKIP
>>> for df in batches:  # doctest: +SKIP
...     print(df)

Read big CSV file in batches and write a CSV file for each "group" of interest.

>>> seen_groups = set()
>>> reader = pl.read_csv_batched("big_file.csv")  # doctest: +SKIP
>>> batches = reader.next_batches(100)  # doctest: +SKIP

>>> while batches:  # doctest: +SKIP
...     df_current_batches = pl.concat(batches)
...     partition_dfs = df_current_batches.partition_by("group", as_dict=True)
...
...     for group, df in partition_dfs.items():
...         if group in seen_groups:
...             with open(f"./data/{group}.csv", "a") as fh:
...                 fh.write(df.write_csv(file=None, include_header=False))
...         else:
...             df.write_csv(file=f"./data/{group}.csv", include_header=True)
...         seen_groups.add(group)
...
...     batches = reader.next_batches(100)
rV   rW   rc   r[   Nrd   r   r5   r6   r8   r9   r:   r;   r<   r+   r>   r?   r@   rA   rB   rD   rE   rF   rG   re   r3   rH   rI   rL   r.   r0   rN   r7   rO   rP   rQ   )r   rn   ro   rp   r   r   r   r   r   rq   r   r   r   r   r   ))r   r5   r6   r7   r8   r9   r:   r;   r<   r+   r>   r?   r@   rA   rB   rD   rE   rF   rG   rH   rI   rL   r.   r0   rM   rN   rO   rP   rQ   r   r   r   r   r   r   r   r   r   r   r   r   s)                                            r   read_csv_batchedr     s   H ,G4JzF$$Y//E  !o%  &:6F+M+Mz?S!122WCS/! .4HJ!8K,L(4OCS)***:3*?J'  5 '#
3CT(J(Jw<#.//WCS/!
  G >?'J7G,N,N
 7|c+..[ o% &a#k*:;O s7|c+.>>_C$S/) BL#AK:gj1n-.   # ',As;/?!/C&D#&D
 j\*&D   # #$K(88 ,7q3?O;P+Q+Q&*:: 6$_5+Q   z?c*:&;;'1$z*:DAA!#k?"CDN 2B1G1G1I 1I-K "";<lJ1I   
  #
 	
 &    *   &B $ (  0  !" #$ &56%& '( )*  6+, &-. */0 12  34 &56 478 $9 G## s   MM=
M	M4 Mauto   )"r5   r8   r9   r:   r;   r<   r=   r+   r>   r?   r@   cachewith_column_namesrC   rD   rF   rG   rH   rI   rL   r.   r0   rA   rN   r7   rO   rP   rQ   rR   rK   credential_providerretriesfile_cache_ttlinclude_file_pathsc       "        H  ^ Ub(  [        U[        [        45      (       d  Sn#[        U#5      eT(       d/  [        U[        5      (       a  S[	        U5      < 3n#[        U#5      eT(       aM  U(       a  Sn#[        U#5      eU(       a*  [        U[        5      (       a  [        [        TU5      5      nS,U4S jjn[        SUSS9  [        SUS	S9  [        U [        [        45      (       a  [        U SS
9n O*[        U SS9(       a  U  V s/ s H  n [        U SS
9PM     sn n U(       d  Sn[        UU US5      n$A[        U 40 SU_SU_SU_SU_SU_SU_SU_SU_SU	_SU
_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_S U_S!U_S"U_S#U_S$U_S%U_S&U_S'U _S(U_S)U$_S*U!_S+U"_6$ s  sn f )-u!  
Lazily read from a CSV file or multiple files via glob patterns.

This allows the query optimizer to push down predicates and
projections to the scan level, thereby potentially reducing
memory overhead.

.. versionchanged:: 0.20.31
    The `dtypes` parameter was renamed `schema_overrides`.
.. versionchanged:: 0.20.4
    * The `row_count_name` parameter was renamed `row_index_name`.
    * The `row_count_offset` parameter was renamed `row_index_offset`.

Parameters
----------
source
    Path(s) to a file or directory
    When needing to authenticate for scanning cloud locations, see the
    `storage_options` parameter.
has_header
    Indicate if the first row of the dataset is a header or not. If set to False,
    column names will be autogenerated in the following format: `column_x`, with
    `x` being an enumeration over every column in the dataset, starting at 1.
separator
    Single byte character to use as separator in the file.
comment_prefix
    A string used to indicate the start of a comment line. Comment lines are skipped
    during parsing. Common examples of comment prefixes are `#` and `//`.
quote_char
    Single byte character used for csv quoting, default = `"`.
    Set to None to turn off special handling and escaping of quotes.
skip_rows
    Start reading after ``skip_rows`` rows. The header will be parsed at this
    offset. Note that we respect CSV escaping/comments when skipping rows.
    If you want to skip by newline char only, use `skip_lines`.
skip_lines
    Start reading after `skip_lines` lines. The header will be parsed at this
    offset. Note that CSV escaping will not be respected when skipping lines.
    If you want to skip valid CSV rows, use ``skip_rows``.
schema
    Provide the schema. This means that polars doesn't do schema inference.
    This argument expects the complete schema, whereas `schema_overrides` can be
    used to partially overwrite a schema. Note that the order of the columns in
    the provided `schema` must match the order of the columns in the CSV being read.
schema_overrides
    Overwrite dtypes during inference; should be a {colname:dtype,} dict or,
    if providing a list of strings to `new_columns`, a list of dtypes of
    the same length.
null_values
    Values to interpret as null values. You can provide a:

    - `str`: All values equal to this string will be null.
    - `List[str]`: All values equal to any string in this list will be null.
    - `Dict[str, str]`: A dictionary that maps column name to a
      null value string.

missing_utf8_is_empty_string
    By default a missing value is considered to be null; if you would prefer missing
    utf8 values to be treated as the empty string you can set this param True.
ignore_errors
    Try to keep reading lines if some lines yield errors.
    First try `infer_schema=False` to read all columns as
    `pl.String` to check which values might cause an issue.
cache
    Cache the result after reading.
with_column_names
    Apply a function over the column names just in time (when they are determined);
    this function will receive (and should return) a list of column names.
infer_schema
    When `True`, the schema is inferred from the data using the first
    `infer_schema_length` rows.
    When `False`, the schema is not inferred and will be `pl.String` if not
    specified in `schema` or `schema_overrides`.
infer_schema_length
    The maximum number of rows to scan for schema inference.
    If set to `None`, the full data may be scanned *(this is slow)*.
    Set `infer_schema=False` to read all columns as `pl.String`.
n_rows
    Stop reading from CSV file after reading `n_rows`.
encoding : {'utf8', 'utf8-lossy'}
    Lossy means that invalid utf8 values are replaced with `�`
    characters. Defaults to "utf8".
low_memory
    Reduce memory pressure at the expense of performance.
rechunk
    Reallocate to contiguous memory when all chunks/ files are parsed.
skip_rows_after_header
    Skip this number of rows when the header is parsed.
row_index_name
    If not None, this will insert a row index column with the given name into
    the DataFrame.
row_index_offset
    Offset to start the row index column (only used if the name is set).
try_parse_dates
    Try to automatically parse dates. Most ISO8601-like formats
    can be inferred, as well as a handful of others. If this does not succeed,
    the column remains of data type `pl.String`.
eol_char
    Single byte end of line character (default: `\n`). When encountering a file
    with windows line endings (`\r\n`), one can go with the default `\n`. The extra
    `\r` will be removed when processed.
new_columns
    Provide an explicit list of string column names to use (for example, when
    scanning a headerless CSV file). If the given list is shorter than the width of
    the DataFrame the remaining columns will have their original name.
raise_if_empty
    When there is no data in the source, `NoDataError` is raised. If this parameter
    is set to False, an empty LazyFrame (with no columns) is returned instead.
truncate_ragged_lines
    Truncate lines that are longer than the schema.
decimal_comma
    Parse floats using a comma as the decimal separator instead of a period.
glob
    Expand path given via globbing rules.
storage_options
    Options that indicate how to connect to a cloud provider.

    The cloud providers currently supported are AWS, GCP, and Azure.
    See supported keys here:

    * `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
    * `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
    * `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
    * Hugging Face (`hf://`): Accepts an API key under the `token` parameter: \
      `{'token': '...'}`, or by setting the `HF_TOKEN` environment variable.

    If `storage_options` is not provided, Polars will try to infer the information
    from environment variables.
credential_provider
    Provide a function that can be called to provide cloud storage
    credentials. The function is expected to return a dictionary of
    credential keys along with an optional credential expiry time.

    .. warning::
        This functionality is considered **unstable**. It may be changed
        at any point without it being considered a breaking change.
retries
    Number of retries if accessing a cloud instance fails.
file_cache_ttl
    Amount of time to keep downloaded cloud files since their last access time,
    in seconds. Uses the `POLARS_FILE_CACHE_TTL` environment variable
    (which defaults to 1 hour) if not given.
include_file_paths
    Include the path of the source file(s) as a column with this name.

Returns
-------
LazyFrame

See Also
--------
read_csv : Read a CSV file into a DataFrame.

Examples
--------
>>> import pathlib
>>>
>>> (
...     pl.scan_csv("my_long_file.csv")  # lazy, doesn't do a thing
...     .select(
...         ["a", "c"]
...     )  # select only 2 columns (other columns will not be read)
...     .filter(
...         pl.col("a") > 10
...     )  # the filter is pushed down the scan, so less data is read into memory
...     .head(100)  # constrain number of returned results to 100
... )  # doctest: +SKIP

We can use `with_column_names` to modify the header before scanning:

>>> df = pl.DataFrame(
...     {"BrEeZaH": [1, 2, 3, 4], "LaNgUaGe": ["is", "hard", "to", "read"]}
... )
>>> path: pathlib.Path = dirpath / "mydf.csv"
>>> df.write_csv(path)
>>> pl.scan_csv(
...     path, with_column_names=lambda cols: [col.lower() for col in cols]
... ).collect()
shape: (4, 2)
┌─────────┬──────────┐
│ breezah ┆ language │
│ ---     ┆ ---      │
│ i64     ┆ str      │
╞═════════╪══════════╡
│ 1       ┆ is       │
│ 2       ┆ hard     │
│ 3       ┆ to       │
│ 4       ┆ read     │
└─────────┴──────────┘

You can also simply replace column names (or provide them if the file has none)
by passing a list of new column names to the `new_columns` parameter:

>>> df.write_csv(path)
>>> pl.scan_csv(
...     path,
...     new_columns=["idx", "txt"],
...     schema_overrides=[pl.UInt16, pl.String],
... ).collect()
shape: (4, 2)
┌─────┬──────┐
│ idx ┆ txt  │
│ --- ┆ ---  │
│ u16 ┆ str  │
╞═════╪══════╡
│ 1   ┆ is   │
│ 2   ┆ hard │
│ 3   ┆ to   │
│ 4   ┆ read │
└─────┴──────┘
rX   z(expected 'schema_overrides' dict, found zIcannot set both `with_column_names` and `new_columns`; mutually exclusivec                Z   > [        U 5      [        T5      :  a  TU [        T5      S  -   $ T$ )N)r   )colsr7   s    r   r   #scan_csv.<locals>.with_column_names5  s2    4y3{++"T#k*:*<%===""r   r8   FrT   r:   Trj   rl   r   r   r5   r9   r;   r<   r+   r=   r>   r?   r@   r   r   rD   rF   rH   rI   rL   rG   r.   r0   rA   rN   rO   rP   rQ   rR   r   rK   r   r   r   )r   	list[str]returnr   )rp   rq   r   rr   r   ro   r   r   r|   r   r   r   r   r   )%r   r5   r8   r9   r:   r;   r<   r=   r+   r>   r?   r@   r   r   rC   rD   rF   rG   rH   rI   rL   r.   r0   rA   rN   r7   rO   rP   rQ   rR   rK   r   r   r   r   r   credential_provider_builders%                            `           r   r   r     s~   N #J4*- - Bn:&6AA89LM]9^8abn	]CS/!
+;X F F#C5E$FG	# YUCjtD&3+&&#FF	 5	9PV
PVfv5APV
 "CV_j# 	"" " &	"
 " " " *" "  " &B" $" " ," 0"  !"" #"$ %"&  6'"( )"* &+", *-". (/"0 1"2 &3"4 45"6 $7"8 9": ;"< (="> 8?"@ &A"B .C" "
s   F) r5   r8   r9   r:   r;   r<   r=   r+   r>   r?   r@   r   r   rD   rF   rG   rH   rI   rL   r.   r0   rA   rN   rO   rP   rQ   rR   rK   r   r   r   r   c           	     d   S n!UbZ  [        U[        5      (       d  Sn"[        U"5      e/ n!UR                  5        H"  u  n#n$U!R	                  U#[        U$5      45        M$     [        U	5      n%[        U [        5      (       a  U n&S n O/ n&U(       a  [        UR                  5       5      nOS n[        R                  " U U&40 SU_SU_SU_SU_SU_SU_SU_S	U!_S
U_SU_SU_SU%_SU
_SU_SU_SU_SU_SU_S[        UU5      _SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_SU_S U _6n'[        U'5      $ )!Nz.expected 'schema_overrides' dict, found 'list'r8   r5   r@   r;   r<   rF   r   overwrite_dtyperH   r9   r:   r>   r?   rD   with_schema_modifyrI   rL   rG   	row_indexrA   rN   rO   rP   rQ   rR   r=   cloud_optionsr   r   r   r   )rp   rq   rr   r   r   r   r   r   r!   new_from_csvr   r   )(r   r5   r8   r9   r:   r;   r<   r=   r+   r>   r?   r@   r   r   rD   rF   rG   rH   rI   rL   r.   r0   rA   rN   rO   rP   rQ   rR   rK   r   r   r   r   r   r   r   r   r   sourcespylfs(                                           r   r   r   r  s   X ;?J#*D11BCC. 
$**,DAqq"21"567 -0=&$4467 ##" " 	"
 $" " " " " #" " &" " *" &B"  0!"" -#"$ %"&  6'"( )"* '~7GH+", (-". /"0 &1"2 43"4 $5"6 7"8 9": &;"< 0="> ?"@ &A"B .C"DF D>r   )Fr   z(str | Path | IO[str] | IO[bytes] | bytesr5   boolr6   $Sequence[int] | Sequence[str] | Noner7   Sequence[str] | Noner8   r|   r9   
str | Noner:   r   r;   rs   r<   rs   r=   SchemaDict | Noner+   >Mapping[str, PolarsDataType] | Sequence[PolarsDataType] | Noner>   +str | Sequence[str] | dict[str, str] | Noner?   r   r@   r   rA   r   rB   
int | NonerC   r   rD   r   rE   rs   rF   r   rG   CsvEncoding | strrH   r   rI   r   rJ   r   rK   dict[str, Any] | NonerL   rs   r.   r   r0   rs   rM   rs   rN   r|   rO   r   rP   r   rQ   r   rR   r   r   r#   )>r   zstr | Path | IO[bytes] | bytesr5   r   r6   r   r8   r|   r9   r   r:   r   r;   rs   r<   rs   r=   zNone | SchemaDictr+   z.None | (SchemaDict | Sequence[PolarsDataType])r>   r   r?   r   r@   r   rA   r   rB   r   rD   r   rE   rs   rF   r   rG   r%   rH   r   rI   r   rL   rs   r.   r   r0   rs   rM   rs   rN   r|   rO   r   rP   r   rQ   r   rR   r   r   r#   )<r   z
str | Pathr5   r   r6   r   r7   r   r8   r|   r9   r   r:   r   r;   rs   r<   rs   r+   r   r>   r   r?   r   r@   r   rA   r   rB   r   rD   r   rE   rs   rF   r   rG   r   rH   r   rI   r   rL   rs   r.   r   r0   rs   rM   rs   rN   r|   rO   r   rP   r   rQ   r   r   r   )Hr   zqstr | Path | IO[str] | IO[bytes] | bytes | list[str] | list[Path] | list[IO[str]] | list[IO[bytes]] | list[bytes]r5   r   r8   r|   r9   r   r:   r   r;   rs   r<   rs   r=   r   r+   z,SchemaDict | Sequence[PolarsDataType] | Noner>   r   r?   r   r@   r   r   r   r   'Callable[[list[str]], list[str]] | NonerC   r   rD   r   rF   r   rG   r%   rH   r   rI   r   rL   rs   r.   r   r0   rs   rA   r   rN   r|   r7   r   rO   r   rP   r   rQ   r   rR   r   rK   r   r   z3CredentialProviderFunction | Literal['auto'] | Noner   rs   r   r   r   r   r   r$   )Dr   zjstr | IO[str] | IO[bytes] | bytes | list[str] | list[Path] | list[IO[str]] | list[IO[bytes]] | list[bytes]r5   r   r8   r|   r9   r   r:   r   r;   rs   r<   rs   r=   r   r+   r   r>   r   r?   r   r@   r   r   r   r   r   rD   r   rF   r   rG   r%   rH   r   rI   r   rL   rs   r.   r   r0   rs   rA   r   rN   r|   rO   r   rP   r   rQ   r   rR   r   rK   r   r   z CredentialProviderBuilder | Noner   rs   r   r   r   r   r   r$   )G
__future__r   
contextlibr   collections.abcr   ior   r   pathlibr   typingr   r	   r
   r   r   polars._reexport	_reexportr}   polars.functions	functionsr   polars._utils.deprecationr   polars._utils.variousr   r   r   r   r   polars._utils.wrapr   r   polars.datatypesr   r   r   polars.io._utilsr   r   r   r   ,polars.io.cloud.credential_provider._builderr   polars.io.csv._utilsr   r   polars.io.csv.batched_readerr   suppressImportErrorpolars._plrr    r!   r"   r   r#   r$   polars._typingr%   r&   r'   polars.io.cloudr(   r)   rw   r   r   r   r    r   r   <module>r      s
   "  	 $    < <   A  1 F F  F 9%4 & '+FF:V X'99M-/?R/1CXV 48(,!%  $ 	?C).! &5"(-1"#!%"'KV4V V 2	V
 &V V V V V V V 	GV =V  #'!V" #V$ %V& 'V( )V* $+V, -V. /V0  1V2 3V4 5V6 7V8 +9V:  ;V< =V> ?V@ AVB CVD EVF  GVH IVJ KVL MV W S NVx 48!%  $GK?C).! &5""#!%"'?O*O O 2	O
 O O O O O O EO =O #'O O O  !O" $#O$ %O& 'O( )O* +O, -O.  /O0 1O2 3O4 5O6 7O8 9O:  ;O< =O> ?O@ AOd X'99M-/?R/1CXV 48(,!%  	?C).! &5"("#!%"'A{{ { 2	{
 &{ { { { { { 	G{ ={ #'{  !{" #{$ %{& $'{( ){* +{,  -{. /{0 1{2  3{4 5{6 7{8 9{: ;{< ={>  ?{@ A{B C{ W S N{|	 X'99M-/?R/1CXV !%  $EI?C).AE&5""#!%!(,"'-1OU!%%)_O		O O O  !O" #O$ %O& 'O( )O* C+O, =-O. #'/O0 1O2 3O4 ?5O6 7O8 $9O: ;O< =O> ?O@ AOB  COD EOF GOH IOJ KOL &MON OOP  QOR SOT UOV +WOX MYOZ [O\ ]O^ #_O` aO W S NOz
 !%  $*.?C).AE&5""#!%!"&-1<@!%%)Uee e e e e e  !e" #e$ (%e& ='e( #')e* +e, -e. ?/e0 $1e2 3e4 5e6 7e8 9e:  ;e< =e> ?e@ AeB CeD EeF  GeH IeJ KeL +MeN :OeP QeR SeT #UeV Wea* &%s   	M  
M