ó
    ±Éi„"  ã                   ó¢   • S r SSKJr  SSKJrJrJrJr  SSKJ	r	  SSK
r
SSKrSSKJr  SSKJr   " S S	\5      r " S
 S\5      r " S S\5      rg)z<
Tabular parser.

Contains parsers for tabular data files.

é    )ÚPath)ÚAnyÚDictÚListÚOptional)ÚAbstractFileSystemN)Ú
BaseReader)ÚDocumentc            	       óp   ^ • \ rS rSrSrSS.S\S\S\SS	4U 4S
 jjjr SS\S\	\
   S\\   4S jjrSrU =r$ )Ú	CSVReaderé   z»
CSV parser.

Args:
    concat_rows (bool): whether to concatenate all rows into one document.
        If set to False, a Document will be created for each row.
        True by default.

T)Úconcat_rowsÚargsr   ÚkwargsÚreturnNc                ó2   >• [         TU ]  " U0 UD6  Xl        g©zInit params.N)ÚsuperÚ__init__Ú_concat_rows)Úselfr   r   r   Ú	__class__s       €Ú_/home/james-whalen/.local/lib/python3.13/site-packages/llama_index/readers/file/tabular/base.pyr   ÚCSVReader.__init__   s   ø€ ä‰Ò˜$Ð) &Ò)Ø'Õó    ÚfileÚ
extra_infoc                 óà  •  SSK n/ n[        U5       nUR                  U5      nU H#  nUR	                  SR                  U5      5        M%     SSS5        UR                  UR                  S.nU(       a  0 UEUEnU R                  (       a  [        SR                  U5      US9/$ U V	s/ s H  n	[        X˜S9PM     sn	$ ! [         a    [        S5      ef = f! , (       d  f       N’= fs  sn	f )zR
Parse file.

Returns:
    Union[str, List[str]]: a string or a List of strings.

r   Nz)csv module is required to read CSV files.ú, )ÚfilenameÚ	extensionÚ
©ÚtextÚmetadata)
ÚcsvÚImportErrorÚopenÚreaderÚappendÚjoinÚnameÚsuffixr   r
   )
r   r   r   r&   Ú	text_listÚfpÚ
csv_readerÚrowr%   r$   s
             r   Ú	load_dataÚCSVReader.load_data"   sÛ   € ð	KÛð ˆ	Ü$ŒZ˜2ØŸ™ B›ˆJÛ!Ø× Ñ  §¡¨3£Ö0ñ "÷ ð
 !%§	¡	¸¿¹ÑDˆÞØ1˜(Ð1 jÐ1ˆHà××Ü $§)¡)¨IÓ"6ÀÑJÐKÐKáGPÓQÂy¸t”H $Ô:ÁyÑQÐQøô ó 	KÜÐIÓJÐJð	Kú÷ Züò Rs   ‚C “;CÂ+C+ÃCÃ
C()r   ©N)Ú__name__Ú
__module__Ú__qualname__Ú__firstlineno__Ú__doc__r   Úboolr   r   r   r   r   r
   r2   Ú__static_attributes__Ú__classcell__©r   s   @r   r   r      sg   ø† ñð 8<ò (˜cð (°ð (Àsð (Èt÷ (ð (ð 8<ñRØðRØ&.¨t¡nðRà	ˆh‰÷Ró Rr   r   c                   óŽ   ^ • \ rS rSrSrSSS0 S.S\S\S	\S
\S\S\SS4U 4S jjjr	  SS\
S\\   S\\   S\\   4S jjrSrU =r$ )ÚPandasCSVReaderé@   a  
Pandas-based CSV parser.

Parses CSVs using the separator detection from Pandas `read_csv`function.
If special parameters are required, use the `pandas_config` dict.

Args:
    concat_rows (bool): whether to concatenate all rows into one document.
        If set to False, a Document will be created for each row.
        True by default.

    col_joiner (str): Separator to use for joining cols per row.
        Set to ", " by default.

    row_joiner (str): Separator to use for joining each row.
        Only used when `concat_rows=True`.
        Set to "\n" by default.

    pandas_config (dict): Options for the `pandas.read_csv` function call.
        Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
        for more information.
        Set to empty dict by default, this means pandas will try to figure
        out the separators, table head, etc. on its own.

Tr   r"   )r   Ú
col_joinerÚ
row_joinerÚpandas_configr   r   rA   rB   rC   r   r   Nc                óV   >• [         TU ]  " U0 UD6  Xl        X l        X0l        X@l        gr   )r   r   r   Ú_col_joinerÚ_row_joinerÚ_pandas_config)r   r   rA   rB   rC   r   r   r   s          €r   r   ÚPandasCSVReader.__init__[   s/   ø€ ô 	‰Ò˜$Ð) &Ò)Ø'ÔØ%ÔØ%ÔØ+Õr   r   r   Úfsc                 óþ  ^ • U(       a<  UR                  U5       n[        R                  " U40 T R                  D6nSSS5        O![        R                  " U40 T R                  D6nWR	                  U 4S jSS9R                  5       nT R                  (       a-  [        T R                  R                  U5      U=(       d    0 S9/$ U Vs/ s H  n[        Xr=(       d    0 S9PM     sn$ ! , (       d  f       N‘= fs  snf )zParse file.Nc                 ó|   >• TR                   R                  U R                  [        5      R	                  5       5      $ r4   )rE   r+   ÚastypeÚstrÚtolist)r1   r   s    €r   Ú<lambda>Ú+PandasCSVReader.load_data.<locals>.<lambda>y   s'   ø€ ˜×)Ñ)×/Ñ/°·
±
¼3³×0FÑ0FÓ0HÔIr   é   )Úaxisr#   )
r(   ÚpdÚread_csvrG   ÚapplyrN   r   r
   rF   r+   )r   r   r   rI   ÚfÚdfr.   r$   s   `       r   r2   ÚPandasCSVReader.load_datak   sâ   ø€ ö Ø—‘˜” !Ü—[’[ Ñ: d×&9Ñ&9Ñ:÷ ô —’˜TÑ9 T×%8Ñ%8Ñ9ˆBà—H‘HÜIÐPQð ð 
ç
‰&‹(ð 	ð ××äØ×*Ñ*×0Ñ0°Ó;Àj×FVÐTVñðð ñ LUóÚKTÀ4”˜d×-=¸2Ô>É9ñð ÷! •üò s   š"C)Ã
C:Ã)
C7)rE   r   rG   rF   ©NN©r5   r6   r7   r8   r9   r   r:   rM   Údictr   r   r   r   r   r   r
   r2   r;   r<   r=   s   @r   r?   r?   @   sª   ø† ñð: !ØØØ ò,àð,ð ð,ð ð	,ð
 ð,ð ð,ð ð,ð 
÷,ð ,ð& &*Ø+/ñ	àðð ˜T‘Nðð Ð'Ñ(ð	ð
 
ˆh‰÷ó r   r?   c                   ó   ^ • \ rS rSrSrSSSS0 S.S\S	\S
\S\S\S\SS4U 4S jjjr	  SS\
S\\   S\\   S\\   4S jjrSrU =r$ )ÚPandasExcelReaderéˆ   a  
Custom Excel parser that includes header names in each row.

Parses Excel files using Pandas' `read_excel` function, but formats
each row to include the header name, for example: "name: joao, position: analyst".
The first row (header) is not included in the generated documents.

Args:
    concat_rows (bool): Determines whether to concatenate all rows into one document.
        If set to False, one Document is created for each row.
        Defaults to True.
    sheet_name (str | int | None): Defaults to None, meaning all sheets.
        Alternatively, pass a string or an integer to specify the sheet to be read.
    field_separator (str): Character or string to separate each field. Default: ", ".
    key_value_separator (str): Character or string to separate the key from the value. Default: ": ".
    pandas_config (dict): Options for the `pandas.read_excel` function call.
        Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
        for more details.
        Defaults to an empty dictionary.

TNr   z: )r   Ú
sheet_nameÚfield_separatorÚkey_value_separatorrC   r   r   r`   ra   rC   r   r   c                ób   >• [         TU ]  " U0 UD6  Xl        X l        X0l        X@l        XPl        g)zInitializes the parameters.N)r   r   r   Ú_sheet_nameÚ_field_separatorÚ_key_value_separatorrG   )	r   r   r_   r`   ra   rC   r   r   r   s	           €r   r   ÚPandasExcelReader.__init__Ÿ   s5   ø€ ô 	‰Ò˜$Ð) &Ò)Ø'ÔØ%ÔØ /ÔØ$7Ô!Ø+Õr   r   r   rI   c                 óJ  • [         R                  R                  S5      nUb  O[        S5      eU(       aF  UR	                  U5       n[
        R                  " XPR                  40 U R                  D6nSSS5        O+[
        R                  " XR                  40 U R                  D6n/ n[        W[
        R                  5      (       Ga  UR                  S5      nUR                  R                  5       n	/ n
UR                  5        HU  u  p¼U R                  R!                  U	 Vs/ s H  nU U R"                   XÍ   < 3PM     sn5      nU
R%                  U5        MW     U R&                  (       a3  UR%                  [)        SR!                  U
5      U=(       d    0 S95        U$ UR+                  U
 Vs/ s H  n[)        Xò=(       d    0 S9PM     sn5         U$ UR-                  5        GH  nUR                  S5      nUR                  R                  5       n	/ n
UR                  5        HU  u  p¼U R                  R!                  U	 Vs/ s H  nU U R"                   XÍ   < 3PM     sn5      nU
R%                  U5        MW     U R&                  (       a3  UR%                  [)        SR!                  U
5      U=(       d    0 S95        MÞ  UR+                  U
 Vs/ s H  n[)        Xò=(       d    0 S9PM     sn5        GM     U$ ! , (       d  f       GNg= fs  snf s  snf s  snf s  snf )zParses the file.ÚopenpyxlNz[Please install openpyxl to read Excel files. You can install it with 'pip install openpyxl'Ú r"   r#   )Ú	importlibÚutilÚ	find_specr'   r(   rS   Ú
read_excelrc   rG   Ú
isinstanceÚ	DataFrameÚfillnaÚcolumnsrN   Úiterrowsrd   r+   re   r*   r   r
   ÚextendÚvalues)r   r   r   rI   Úopenpyxl_specrV   ÚdfsÚ	documentsrW   Úheadersr.   Ú_r1   ÚheaderÚformatted_rowr$   s                   r   r2   ÚPandasExcelReader.load_data±   sÞ  € ô "Ÿ™×0Ñ0°Ó<ˆØÑ$ØäØmóð ö
 Ø—‘˜” !Ü—m’m A×'7Ñ'7ÑO¸4×;NÑ;NÑO÷ ô —-’- ×&6Ñ&6ÑN¸$×:MÑ:MÑNˆCàˆ	ô cœ2Ÿ<™<×(Ò(Ø—‘˜B“ˆBà—j‘j×'Ñ'Ó)ˆGð ˆIð Ÿ+™+ž-‘à $× 5Ñ 5× :Ñ :ñ '.óâ&-˜Fð "˜( 4×#<Ñ#<Ð"=¸c¹k¹_ÓMÙ&-ñó!ð × Ñ  Ö/ñ (ð × × Ø× Ñ Ü $§)¡)¨IÓ"6À×AQÈrÑRôðL ÐðE × Ñ ñ %.óâ$-˜Dô ! d×5EÀ2ÔFÙ$-ñõðD Ðð5 —j‘j—lØ—Y‘Y˜r“]ØŸ*™*×+Ñ+Ó-à	Ø Ÿk™kžm‘FAØ$(×$9Ñ$9×$>Ñ$>ñ +2óâ*1 ð  &˜h t×'@Ñ'@Ð&AÀ#Á+ÁÓQÙ*1ñó%Mð ×$Ñ$ ]Ö3ñ ,ð ×$×$Ø×$Ñ$Ü  d§i¡i°	Ó&:ÀZ×EUÐSUÑVöð ×$Ñ$ñ )2óâ(1 ô %¨$×9IÀrÔJÙ(1ñ÷ñ' #ð4 Ð÷G –üò*ùòùòùòs$   Á,K?Ä$L
Æ2LÉLËL 
Ë?
L)r   rd   re   rG   rc   rY   rZ   r=   s   @r   r]   r]   ˆ   s³   ø† ñð2 !ØØ#Ø#'Ø ò,àð,ð ð,ð
 ð,ð !ð,ð ð,ð ð,ð 
÷,ð ,ð* &*Ø+/ñ	TàðTð ˜T‘NðTð Ð'Ñ(ð	Tð
 
ˆh‰÷Tó Tr   r]   )r9   Úpathlibr   Útypingr   r   r   r   Úfsspecr   rj   ÚpandasrS   Úllama_index.core.readers.baser	   Úllama_index.core.schemar
   r   r?   r]   © r   r   Ú<module>r„      sP   ðñõ ß ,Ó ,Ý %Û ã Ý 4Ý ,ô+R
ô +Rô\Ejô EôP}˜
õ }r   