
    hR              
         S SK Jr  S SKrS SKJrJrJrJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  S S	KJr  \(       a"  S S
KJr  S SKJr  S SKJr  S SKJr  S SKJrJrJr  \SSSSSS.               SS jj5       r \SSSSS.               SS jj5       r \SSSSS.               SS jj5       r SSS\SS.               SS jjr \SSSSSSSS.                     S S jj5       r!\SSSSSSSSS.                     S!S jj5       r!\SSSSSSSSS.                     S"S jj5       r!SSSSSSSSS.                     S#S jjr!g)$    )annotationsN)TYPE_CHECKINGAnyLiteraloverload)import_optional)issue_unstable_warning)qualified_type_name)N_INFER_DEFAULT)ODBCCursorProxy)ConnectionExecutor)Iterator)
TextClause)
Selectable)	DataFrame)ConnectionOrCursorDbReadEngine
SchemaDict.)iter_batches
batch_sizeschema_overridesinfer_schema_lengthexecute_optionsc                   g N query
connectionr   r   r   r   r   s          V/home/james-whalen/.local/lib/python3.13/site-packages/polars/io/database/functions.pyread_databaser!      s         )r   r   r   r   c                   g r   r   r   s          r    r!   r!   $   s     r"   c                   g r   r   r   s          r    r!   r!   1   s     '*r"   Fc               |   [        U[        5      (       ab  [        R                  " SU[        R                  5      (       a  [        SSSS9n[        U5      nO SU;   a  Sn[        U5      eSn[        U5      e[        U5       n	U	R                  U US	9R                  UUUUS
9sSSS5        $ ! , (       d  f       g= f)u-!  
Read the results of a SQL query into a DataFrame, given a connection object.

Parameters
----------
query
    SQL query to execute (if using a SQLAlchemy connection object this can
    be a suitable "Selectable", otherwise it is expected to be a string).
connection
    An instantiated connection (or cursor/client object) that the query can be
    executed against. Can also pass a valid ODBC connection string (identified as
    such if it contains the string "Driver={...}"), in which case the `arrow-odbc`
    package will be used to establish the connection and return Arrow-native data
    to Polars. Async driver connections are also supported, though this is currently
    considered unstable. If using SQLAlchemy, you can configure the connection's
    `execution_options` before passing to `read_database` to refine its behaviour
    (see the `iter_batches` parameter for an example where this can be useful).

    .. warning::
        Use of asynchronous connections is currently considered **unstable**, and
        unexpected issues may arise; if this happens, please report them.
iter_batches
    Return an iterator of DataFrames, where each DataFrame represents a batch of
    data returned by the query; this can be useful for processing large resultsets
    in a more memory-efficient manner. If supported by the backend, this value is
    passed to the underlying query execution method (note that lower values will
    typically result in poor performance as they will cause many round-trips to
    the database). If the backend does not support changing the batch size then
    a single DataFrame is yielded from the iterator.

    .. note::
        If using SQLALchemy, you may also want to pass `stream_results=True` to the
        connection's `execution_options` method when setting this parameter, which
        will establish a server-side cursor; without this option some drivers (such
        as "psycopg2") will still materialise the entire result set client-side
        before batching the result locally.
batch_size
    Indicate the size of each batch when `iter_batches` is True (note that you can
    still set this when `iter_batches` is False, in which case the resulting
    DataFrame is constructed internally using batched return before being returned
    to you. Note that some backends (such as Snowflake) may support batch operation
    but not allow for an explicit size to be set; in this case you will still
    receive batches but their size is determined by the backend (in which case any
    value set here will be ignored).
schema_overrides
    A dictionary mapping column names to dtypes, used to override the schema
    inferred from the query cursor or given by the incoming Arrow data (depending
    on driver/backend). This can be useful if the given types can be more precisely
    defined (for example, if you know that a given column can be declared as `u32`
    instead of `i64`).
infer_schema_length
    The maximum number of rows to scan for schema inference. If set to `None`, the
    full data may be scanned *(this can be slow)*. This parameter only applies if
    the data is read as a sequence of rows and the `schema_overrides` parameter
    is not set for the given column; Arrow-aware drivers also ignore this value.
execute_options
    These options will be passed through into the underlying query execution method
    as kwargs. In the case of connections made using an ODBC string (which use
    `arrow-odbc`) these options are passed to the `read_arrow_batches_from_odbc`
    method.

Notes
-----
* This function supports a wide range of native database drivers (ranging from local
  databases such as SQLite to large cloud databases such as Snowflake), as well as
  generic libraries such as ADBC, SQLAlchemy and various flavours of ODBC. If the
  backend supports returning Arrow data directly then this facility will be used to
  efficiently instantiate the DataFrame; otherwise, the DataFrame is initialised
  from row-wise data.

* Support for Arrow Flight SQL data is available via the `adbc-driver-flightsql`
  package; see https://arrow.apache.org/adbc/current/driver/flight_sql.html for
  more details about using this driver (notable databases implementing Flight SQL
  include Dremio and InfluxDB).

* The `read_database_uri` function can be noticeably faster than `read_database`
  if you are using a SQLAlchemy or DBAPI2 connection, as `connectorx` and `adbc`
  optimise translation of the result set into Arrow format. Note that you can
  determine a connection's URI from a SQLAlchemy engine object by calling
  `conn.engine.url.render_as_string(hide_password=False)`.

* If Polars has to create a cursor from your connection in order to execute the
  query then that cursor will be automatically closed when the query completes;
  however, Polars will *never* close any other open connection or cursor.

* Polars is able to support more than just relational databases and SQL queries
  through this function. For example, you can load local graph database results
  from a `KùzuDB` connection in conjunction with a Cypher query, or use SurrealQL
  with SurrealDB.

See Also
--------
read_database_uri : Create a DataFrame from a SQL query using a URI string.

Examples
--------
Instantiate a DataFrame from a SQL query against a user-supplied connection:

>>> df = pl.read_database(
...     query="SELECT * FROM test_data",
...     connection=user_conn,
...     schema_overrides={"normalised_score": pl.UInt8},
... )  # doctest: +SKIP

Use a parameterised SQLAlchemy query, passing named values via `execute_options`:

>>> df = pl.read_database(
...     query="SELECT * FROM test_data WHERE metric > :value",
...     connection=alchemy_conn,
...     execute_options={"parameters": {"value": 0}},
... )  # doctest: +SKIP

Use 'qmark' style parameterisation; values are still passed via `execute_options`,
but in this case the "parameters" value is a sequence of literals, not a dict:

>>> df = pl.read_database(
...     query="SELECT * FROM test_data WHERE metric > ?",
...     connection=alchemy_conn,
...     execute_options={"parameters": [0]},
... )  # doctest: +SKIP

Batch the results of a large SQLAlchemy query into DataFrames, each containing
100,000 rows; explicitly establish a server-side cursor using the connection's
"execution_options" method to avoid loading the entire result locally before
batching (this is not required for all drivers, so check your driver's
documentation for more details):

>>> for df in pl.read_database(
...     query="SELECT * FROM test_data",
...     connection=alchemy_conn.execution_options(stream_results=True),
...     iter_batches=True,
...     batch_size=100_000,
... ):
...     do_something(df)  # doctest: +SKIP

Instantiate a DataFrame using an ODBC connection string (requires the `arrow-odbc`
package) setting upper limits on the buffer size of variadic text/binary columns:

>>> df = pl.read_database(
...     query="SELECT * FROM test_data",
...     connection="Driver={PostgreSQL};Server=localhost;Port=5432;Database=test;Uid=usr;Pwd=",
...     execute_options={"max_text_size": 512, "max_binary_size": 1024},
... )  # doctest: +SKIP

Load graph database results from a `KùzuDB` connection and a Cypher query:

>>> df = pl.read_database(
...     query="MATCH (a:User)-[f:Follows]->(b:User) RETURN a.name, f.since, b.name",
...     connection=kuzu_db_conn,
... )  # doctest: +SKIP

Load data from an asynchronous SQLAlchemy driver/engine; note that asynchronous
connections and sessions are also supported here:

>>> from sqlalchemy.ext.asyncio import create_async_engine
>>> async_engine = create_async_engine("sqlite+aiosqlite:///test.db")
>>> df = pl.read_database(
...     query="SELECT * FROM test_data",
...     connection=async_engine,
... )  # doctest: +SKIP

Load data from an `AsyncSurrealDB` client connection object; note that both the "ws"
and "http" protocols are supported, as is the synchronous `SurrealDB` client. The
async loop can be run with standard `asyncio` or with `uvloop`:

>>> import asyncio  # (or uvloop)
>>> async def surreal_query_to_frame(query: str, url: str):
...     async with AsyncSurrealDB(url) as client:
...         await client.use(namespace="test", database="test")
...         return pl.read_database(query=query, connection=client)
>>> df = asyncio.run(
...     surreal_query_to_frame(
...         query="SELECT * FROM test",
...         url="http://localhost:8000",
...     )
... )  # doctest: +SKIP

z\bdriver\s*=\s*{[^}]+?}
arrow_odbcz*use of ODBC connection string requires thepackage)module_name
err_prefix
err_suffixz://z<string URI is invalid here; call `read_database_uri` insteadz>unable to identify string connection as valid ODBC (no driver))r   options)r   r   r   r   N)
isinstancestrresearch
IGNORECASEr   r   
ValueErrorr   execute	to_polars)
r   r   r   r   r   r   r   _msgcxs
             r    r!   r!   >   s    x *c""99/R]]KK(G$A
 )4Jj PCS/!RCS/! 
J	'2zz#  
 )!%- 3	  
	 
(	'	's    B--
B;)partition_onpartition_rangepartition_numprotocolr   r   pre_execution_queryc                   g r   r   
r   urir7   r8   r9   r:   enginer   r   r;   s
             r    read_database_urir@          r"   )r7   r8   r9   r:   r?   r   r   r;   c                   g r   r   r=   s
             r    r@   r@   (  rA   r"   c                   g r   r   r=   s
             r    r@   r@   8  rA   r"   c                  SSK Jn
Jn  [        U[        5      (       d  S[        U5      < 3n[        U5      eUc  SnUS:X  a3  U(       a  Sn[        U5      eU	(       a  [        S5        U" U UUUUUUU	S9$ US:X  a?  [        U [        5      (       d  S	n[        U5      eU	(       a  S
n[        U5      eU
" U UUUS9$ SU< 3n[        U5      e)aO  
Read the results of a SQL query into a DataFrame, given a URI.

Parameters
----------
query
    Raw SQL query (or queries).
uri
    A connectorx or ADBC connection URI string that starts with the backend's
    driver name, for example:

    * "postgresql://user:pass@server:port/database"
    * "snowflake://user:pass@account/database/schema?warehouse=warehouse&role=role"

    The caller is responsible for escaping any special characters in the string,
    which will be passed "as-is" to the underlying engine (this is most often
    required when coming across special characters in the password).
partition_on
    The column on which to partition the result (connectorx).
partition_range
    The value range of the partition column (connectorx).
partition_num
    How many partitions to generate (connectorx).
protocol
    Backend-specific transfer protocol directive (connectorx); see connectorx
    documentation for more details.
engine : {'connectorx', 'adbc'}
    Selects the engine used for reading the database (defaulting to connectorx):

    * `'connectorx'`
      Supports a range of databases, such as PostgreSQL, Redshift, MySQL, MariaDB,
      Clickhouse, Oracle, BigQuery, SQL Server, and so on. For an up-to-date list
      please see the connectorx docs:
      https://github.com/sfu-db/connector-x#supported-sources--destinations
    * `'adbc'`
      Currently there is limited support for this engine, with a relatively small
      number of drivers available, most of which are still in development. For
      an up-to-date list of drivers please see the ADBC docs:
      https://arrow.apache.org/adbc/
schema_overrides
    A dictionary mapping column names to dtypes, used to override the schema
    given in the data returned by the query.
execute_options
    These options will be passed to the underlying query execution method as
    kwargs. Note that connectorx does not support this parameter and ADBC currently
    only supports positional 'qmark' style parameterization.
pre_execution_query
    SQL query or list of SQL queries executed before main query (connectorx>=0.4.2).
    Can be used to set runtime configurations using SET statements.
    Only applicable for Postgres and MySQL source.
    Only applicable with the connectorx engine.

    .. warning::
        This functionality is considered **unstable**. It may be changed
        at any point without it being considered a breaking change.

Notes
-----
For `connectorx`, ensure that you have `connectorx>=0.3.2`. The documentation
is available `here <https://sfu-db.github.io/connector-x/intro.html>`_.

For `adbc` you will need to have installed `pyarrow` and the ADBC driver associated
with the backend you are connecting to, eg: `adbc-driver-postgresql`.

If your password contains special characters, you will need to escape them.
This will usually require the use of a URL-escaping function, for example:

>>> from urllib.parse import quote, quote_plus
>>> quote_plus("pass word?")
'pass+word%3F'
>>> quote("pass word?")
'pass%20word%3F'

See Also
--------
read_database : Create a DataFrame from a SQL query using a connection object.

Examples
--------
Create a DataFrame from a SQL query using a single thread:

>>> uri = "postgresql://username:password@server:port/database"
>>> query = "SELECT * FROM lineitem"
>>> pl.read_database_uri(query, uri)  # doctest: +SKIP

Create a DataFrame in parallel using 10 threads by automatically partitioning
the provided SQL on the partition column:

>>> uri = "postgresql://username:password@server:port/database"
>>> query = "SELECT * FROM lineitem"
>>> pl.read_database_uri(
...     query,
...     uri,
...     partition_on="partition_col",
...     partition_num=10,
...     engine="connectorx",
... )  # doctest: +SKIP

Create a DataFrame in parallel using 2 threads by explicitly providing two
SQL queries:

>>> uri = "postgresql://username:password@server:port/database"
>>> queries = [
...     "SELECT * FROM lineitem WHERE partition_col <= 10",
...     "SELECT * FROM lineitem WHERE partition_col > 10",
... ]
>>> pl.read_database_uri(queries, uri, engine="connectorx")  # doctest: +SKIP

Read data from Snowflake using the ADBC driver:

>>> df = pl.read_database_uri(
...     "SELECT * FROM test_table",
...     "snowflake://user:pass@company-org/testdb/public?warehouse=test&role=myrole",
...     engine="adbc",
... )  # doctest: +SKIP

Pass a single parameter via `execute_options` into a query using the ADBC driver:

>>> df = pl.read_database_uri(
...     "SELECT * FROM employees WHERE hourly_rate > ?",
...     "sqlite:///:memory:",
...     engine="adbc",
...     execute_options={"parameters": (30,)},
... )  # doctest: +SKIP

Or pass multiple parameters:

>>> df = pl.read_database_uri(
...     "SELECT * FROM employees WHERE hourly_rate BETWEEN ? AND ?",
...     "sqlite:///:memory:",
...     engine="adbc",
...     execute_options={"parameters": (40, 20)},
... )  # doctest: +SKIP
r   )_read_sql_adbc_read_sql_connectorxz.expected connection to be a URI string; found 
connectorxzAthe 'connectorx' engine does not support use of `execute_options`z;the 'pre-execution-query' parameter is considered unstable.)connection_urir7   r8   r9   r:   r   r;   adbcz3only a single SQL query string is accepted for adbcz?the 'adbc' engine does not support use of `pre_execution_query`)rH   r   r   z2engine must be one of {'connectorx', 'adbc'}, got )	polars.io.database._utilsrE   rF   r,   r-   r
   	TypeErrorr1   r	   )r   r>   r7   r8   r9   r:   r?   r   r   r;   rE   rF   r5   s                r    r@   r@   H  s    f Oc3>?RSV?W>Z[n	UCS/!"M $%+'- 3	
 		
 
6	%%%GCS/!SCS/!-+	
 	
 EVJOor"   )r   str | TextClause | Selectabler   ConnectionOrCursor | strr   zLiteral[False]r   
int | Noner   SchemaDict | Noner   rN   r   dict[str, Any] | Nonereturnr   )r   rL   r   rM   r   zLiteral[True]r   rN   r   rO   r   rN   r   rP   rQ   zIterator[DataFrame])r   rL   r   rM   r   boolr   rN   r   rO   r   rN   r   rP   rQ   zDataFrame | Iterator[DataFrame])r   r-   r>   r-   r7   
str | Noner8   tuple[int, int] | Noner9   rN   r:   rS   r?   zLiteral['adbc']r   rO   r   rP   r;   str | list[str] | NonerQ   r   )r   list[str] | strr>   r-   r7   rS   r8   rT   r9   rN   r:   rS   r?   zLiteral['connectorx'] | Noner   rO   r   Noner;   rU   rQ   r   )r   r-   r>   r-   r7   rS   r8   rT   r9   rN   r:   rS   r?   DbReadEngine | Noner   rW   r   rP   r;   rU   rQ   r   )r   rV   r>   r-   r7   rS   r8   rT   r9   rN   r:   rS   r?   rX   r   rO   r   rP   r;   rU   rQ   r   )"
__future__r   r.   typingr   r   r   r   polars._dependenciesr   polars._utils.unstabler	   polars._utils.variousr
   polars.datatypesr   "polars.io.database._cursor_proxiesr   polars.io.database._executorr   collections.abcr   sqlalchemy.sql.elementsr   sqlalchemy.sql.expressionr   polarsr   polars._typingr   r   r   r!   r@   r   r"   r    <module>rf      sK   " 	 8 8 0 9 5 , > ;(24 KK 

 $' *-&)-0	(	(	 !		
 	 (	 $	 +	 	 
	 
 !*-&)-0	(	(	  		
 	 (	 $	 +	 	 
	 
 !*-&)-0	*(	*(	* 		*
 	* (	* $	* +	* %	* 
	*  !*.&5-1W
(W
(W
 	W

 W
 (W
 $W
 +W
 %W
t 

  $.2 $*.-126	 	
 ,    ( + 0  
 

  $.2 $+/*. 26	 	
 ,   ) (  0  
 

  $.2 $"&!-126	 	
 ,      + 0  
&  $.2 $"&*.-126||	| 	|
 ,| | |  | (| +| 0| |r"   