ó
    œþöh)*  ã                  ób   • S SK Jr  S SKJr  S SKJr  S SKJr  \(       a  S SKJ	r	   " S S5      r
g)	é    )Úannotations)ÚTYPE_CHECKING)Úqualified_type_name)Ú	wrap_expr)ÚExprc                  óf   • \ rS rSrSrSrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSSS jjrSrg
)ÚExprCatNameSpaceé   z.Namespace for categorical related expressions.Úcatc                ó&   • UR                   U l         g ©N©Ú_pyexpr)ÚselfÚexprs     ÚQ/home/james-whalen/.local/lib/python3.13/site-packages/polars/expr/categorical.pyÚ__init__ÚExprCatNameSpace.__init__   s   € Ø—|‘|ˆó    c                óH   • [        U R                  R                  5       5      $ )uœ  
Get the categories stored in this data type.

Examples
--------
>>> df = pl.Series(
...     "cats", ["foo", "bar", "foo", "foo", "ham"], dtype=pl.Categorical
... ).to_frame()
>>> df.select(pl.col("cats").cat.get_categories())  # doctest: +SKIP
shape: (3, 1)
â”Œâ”€â”€â”€â”€â”€â”€â”
â”‚ cats â”‚
â”‚ ---  â”‚
â”‚ str  â”‚
â•žâ•â•â•â•â•â•â•¡
â”‚ foo  â”‚
â”‚ bar  â”‚
â”‚ ham  â”‚
â””â”€â”€â”€â”€â”€â”€â”˜
)r   r   Úcat_get_categories©r   s    r   Úget_categoriesÚExprCatNameSpace.get_categories   s   € ô* ˜Ÿ™×8Ñ8Ó:Ó;Ð;r   c                óH   • [        U R                  R                  5       5      $ )u¼  
Return the byte-length of the string representation of each value.

Returns
-------
Expr
    Expression of data type :class:`UInt32`.

See Also
--------
len_chars

Notes
-----
When working with non-ASCII text, the length in bytes is not the same as the
length in characters. You may want to use :func:`len_chars` instead.
Note that :func:`len_bytes` is much more performant (_O(1)_) than
:func:`len_chars` (_O(n)_).

Examples
--------
>>> df = pl.DataFrame(
...     {"a": pl.Series(["CafÃ©", "345", "æ±äº¬", None], dtype=pl.Categorical)}
... )
>>> df.with_columns(
...     pl.col("a").cat.len_bytes().alias("n_bytes"),
...     pl.col("a").cat.len_chars().alias("n_chars"),
... )
shape: (4, 3)
â”Œâ”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ a    â”† n_bytes â”† n_chars â”‚
â”‚ ---  â”† ---     â”† ---     â”‚
â”‚ cat  â”† u32     â”† u32     â”‚
â•žâ•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•¡
â”‚ CafÃ© â”† 5       â”† 4       â”‚
â”‚ 345  â”† 3       â”† 3       â”‚
â”‚ æ±äº¬ â”† 6       â”† 2       â”‚
â”‚ null â”† null    â”† null    â”‚
â””â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
)r   r   Úcat_len_bytesr   s    r   Ú	len_bytesÚExprCatNameSpace.len_bytes+   s   € ôR ˜Ÿ™×3Ñ3Ó5Ó6Ð6r   c                óH   • [        U R                  R                  5       5      $ )u•  
Return the number of characters of the string representation of each value.

Returns
-------
Expr
    Expression of data type :class:`UInt32`.

See Also
--------
len_bytes

Notes
-----
When working with ASCII text, use :func:`len_bytes` instead to achieve
equivalent output with much better performance:
:func:`len_bytes` runs in _O(1)_, while :func:`len_chars` runs in (_O(n)_).

A character is defined as a `Unicode scalar value`_. A single character is
represented by a single byte when working with ASCII text, and a maximum of
4 bytes otherwise.

.. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value

Examples
--------
>>> df = pl.DataFrame(
...     {"a": pl.Series(["CafÃ©", "345", "æ±äº¬", None], dtype=pl.Categorical)}
... )
>>> df.with_columns(
...     pl.col("a").cat.len_chars().alias("n_chars"),
...     pl.col("a").cat.len_bytes().alias("n_bytes"),
... )
shape: (4, 3)
â”Œâ”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ a    â”† n_chars â”† n_bytes â”‚
â”‚ ---  â”† ---     â”† ---     â”‚
â”‚ cat  â”† u32     â”† u32     â”‚
â•žâ•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•¡
â”‚ CafÃ© â”† 4       â”† 5       â”‚
â”‚ 345  â”† 3       â”† 3       â”‚
â”‚ æ±äº¬ â”† 2       â”† 6       â”‚
â”‚ null â”† null    â”† null    â”‚
â””â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
)r   r   Úcat_len_charsr   s    r   Ú	len_charsÚExprCatNameSpace.len_charsV   s   € ô\ ˜Ÿ™×3Ñ3Ó5Ó6Ð6r   c                ó¨   • [        U[        5      (       d  S[        U5      < 3n[        U5      e[	        U R
                  R                  U5      5      $ )uÿ  
Check if string representations of values start with a substring.

Parameters
----------
prefix
    Prefix substring.

See Also
--------
contains : Check if string repr contains a substring that matches a pattern.
ends_with : Check if string repr end with a substring.

Notes
-----
Whereas `str.starts_with` allows expression inputs, `cat.starts_with` requires
a literal string value.

Examples
--------
>>> df = pl.DataFrame(
...     {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
... )
>>> df.with_columns(
...     pl.col("fruits").cat.starts_with("app").alias("has_prefix"),
... )
shape: (3, 2)
â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ fruits â”† has_prefix â”‚
â”‚ ---    â”† ---        â”‚
â”‚ cat    â”† bool       â”‚
â•žâ•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•¡
â”‚ apple  â”† true       â”‚
â”‚ mango  â”† false      â”‚
â”‚ null   â”† null       â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜

Using `starts_with` as a filter condition:

>>> df.filter(pl.col("fruits").cat.starts_with("app"))
shape: (1, 1)
â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ fruits â”‚
â”‚ ---    â”‚
â”‚ cat    â”‚
â•žâ•â•â•â•â•â•â•â•â•¡
â”‚ apple  â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”˜
z!'prefix' must be a string; found )Ú
isinstanceÚstrr   Ú	TypeErrorr   r   Úcat_starts_with)r   ÚprefixÚmsgs      r   Ústarts_withÚExprCatNameSpace.starts_with†   sI   € ôd ˜&¤#×&Ñ&Ø5Ô6IÈ&Ó6QÑ5TÐUˆCÜ˜C“.Ð Ü˜Ÿ™×5Ñ5°fÓ=Ó>Ð>r   c                ó¨   • [        U[        5      (       d  S[        U5      < 3n[        U5      e[	        U R
                  R                  U5      5      $ )uè  
Check if string representations of values end with a substring.

Parameters
----------
suffix
    Suffix substring.

See Also
--------
contains : Check if string reprs contains a substring that matches a pattern.
starts_with : Check if string reprs start with a substring.

Notes
-----
Whereas `str.ends_with` allows expression inputs, `cat.ends_with` requires a
literal string value.

Examples
--------
>>> df = pl.DataFrame(
...     {"fruits": pl.Series(["apple", "mango", None], dtype=pl.Categorical)}
... )
>>> df.with_columns(pl.col("fruits").cat.ends_with("go").alias("has_suffix"))
shape: (3, 2)
â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ fruits â”† has_suffix â”‚
â”‚ ---    â”† ---        â”‚
â”‚ cat    â”† bool       â”‚
â•žâ•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•â•â•â•â•â•¡
â”‚ apple  â”† false      â”‚
â”‚ mango  â”† true       â”‚
â”‚ null   â”† null       â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜

Using `ends_with` as a filter condition:

>>> df.filter(pl.col("fruits").cat.ends_with("go"))
shape: (1, 1)
â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”
â”‚ fruits â”‚
â”‚ ---    â”‚
â”‚ cat    â”‚
â•žâ•â•â•â•â•â•â•â•â•¡
â”‚ mango  â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”˜
z!'suffix' must be a string; found )r$   r%   r   r&   r   r   Úcat_ends_with)r   Úsuffixr)   s      r   Ú	ends_withÚExprCatNameSpace.ends_with½   sI   € ô` ˜&¤#×&Ñ&Ø5Ô6IÈ&Ó6QÑ5TÐUˆCÜ˜C“.Ð Ü˜Ÿ™×3Ñ3°FÓ;Ó<Ð<r   Nc                óJ   • [        U R                  R                  X5      5      $ )u–  
Extract a substring from the string representation of each value.

Parameters
----------
offset
    Start index. Negative indexing is supported.
length
    Length of the slice. If set to `None` (default), the slice is taken to the
    end of the string.

Returns
-------
Expr
    Expression of data type :class:`String`.

Notes
-----
Both the `offset` and `length` inputs are defined in terms of the number
of characters in the (UTF8) string. A character is defined as a
`Unicode scalar value`_. A single character is represented by a single byte
when working with ASCII text, and a maximum of 4 bytes otherwise.

.. _Unicode scalar value: https://www.unicode.org/glossary/#unicode_scalar_value

Examples
--------
>>> df = pl.DataFrame(
...     {
...         "s": pl.Series(
...             ["pear", None, "papaya", "dragonfruit"],
...             dtype=pl.Categorical,
...         )
...     }
... )
>>> df.with_columns(pl.col("s").cat.slice(-3).alias("slice"))
shape: (4, 2)
â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”
â”‚ s           â”† slice â”‚
â”‚ ---         â”† ---   â”‚
â”‚ cat         â”† str   â”‚
â•žâ•â•â•â•â•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•¡
â”‚ pear        â”† ear   â”‚
â”‚ null        â”† null  â”‚
â”‚ papaya      â”† aya   â”‚
â”‚ dragonfruit â”† uit   â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”˜

Using the optional `length` parameter

>>> df.with_columns(pl.col("s").cat.slice(4, length=3).alias("slice"))
shape: (4, 2)
â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”
â”‚ s           â”† slice â”‚
â”‚ ---         â”† ---   â”‚
â”‚ cat         â”† str   â”‚
â•žâ•â•â•â•â•â•â•â•â•â•â•â•â•â•ªâ•â•â•â•â•â•â•â•¡
â”‚ pear        â”†       â”‚
â”‚ null        â”† null  â”‚
â”‚ papaya      â”† ya    â”‚
â”‚ dragonfruit â”† onf   â”‚
â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”˜
)r   r   Ú	cat_slice)r   ÚoffsetÚlengths      r   ÚsliceÚExprCatNameSpace.sliceò   s   € ô@ ˜Ÿ™×/Ñ/°Ó?Ó@Ð@r   r   )r   r   ÚreturnÚNone)r7   r   )r(   r%   r7   r   )r.   r%   r7   r   r   )r3   Úintr4   z
int | Noner7   r   )Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__Ú	_accessorr   r   r   r!   r*   r/   r5   Ú__static_attributes__© r   r   r	   r	      s:   † Ù8à€Iô$ô<ô.)7ôV.7ô`5?ôn3=÷j@Añ @Ar   r	   N)Ú
__future__r   Útypingr   Úpolars._utils.variousr   Úpolars._utils.wrapr   Úpolarsr   r	   rA   r   r   Ú<module>rG      s%   ðÝ "å  å 5Ý (æÝ÷fAò fAr   