
    i"                         S r SSKJr  SSKJrJrJrJr  SSKJ	r	  SSK
r
SSKrSSKJr  SSKJr   " S S	\5      r " S
 S\5      r " S S\5      rg)z<
Tabular parser.

Contains parsers for tabular data files.

    )Path)AnyDictListOptional)AbstractFileSystemN)
BaseReader)Documentc            	       p   ^  \ rS rSrSrSS.S\S\S\SS	4U 4S
 jjjr SS\S\	\
   S\\   4S jjrSrU =r$ )	CSVReader   z
CSV parser.

Args:
    concat_rows (bool): whether to concatenate all rows into one document.
        If set to False, a Document will be created for each row.
        True by default.

T)concat_rowsargsr   kwargsreturnNc                2   > [         TU ]  " U0 UD6  Xl        gzInit params.N)super__init___concat_rows)selfr   r   r   	__class__s       _/home/james-whalen/.local/lib/python3.13/site-packages/llama_index/readers/file/tabular/base.pyr   CSVReader.__init__   s    $)&)'    file
extra_infoc                     SSK n/ n[        U5       nUR                  U5      nU H#  nUR	                  SR                  U5      5        M%     SSS5        UR                  UR                  S.nU(       a  0 UEUEnU R                  (       a  [        SR                  U5      US9/$ U V	s/ s H  n	[        XS9PM     sn	$ ! [         a    [        S5      ef = f! , (       d  f       N= fs  sn	f )zR
Parse file.

Returns:
    Union[str, List[str]]: a string or a List of strings.

r   Nz)csv module is required to read CSV files., )filename	extension
textmetadata)
csvImportErroropenreaderappendjoinnamesuffixr   r
   )
r   r   r   r&   	text_listfp
csv_readerrowr%   r$   s
             r   	load_dataCSVReader.load_data"   s    	K 	$Z2BJ!  30 " 
 !%		D1(1j1H$))I"6JKKGPQytH$:yQQ  	KIJJ	K Z Rs   C ;C+C+C
C()r   N)__name__
__module____qualname____firstlineno____doc__r   boolr   r   r   r   r   r
   r2   __static_attributes____classcell__r   s   @r   r   r      sg     8< (c ( (s (t ( ( 8<RR&.tnR	hR Rr   r   c                      ^  \ rS rSrSrSSS0 S.S\S\S	\S
\S\S\SS4U 4S jjjr	  SS\
S\\   S\\   S\\   4S jjrSrU =r$ )PandasCSVReader@   a  
Pandas-based CSV parser.

Parses CSVs using the separator detection from Pandas `read_csv`function.
If special parameters are required, use the `pandas_config` dict.

Args:
    concat_rows (bool): whether to concatenate all rows into one document.
        If set to False, a Document will be created for each row.
        True by default.

    col_joiner (str): Separator to use for joining cols per row.
        Set to ", " by default.

    row_joiner (str): Separator to use for joining each row.
        Only used when `concat_rows=True`.
        Set to "\n" by default.

    pandas_config (dict): Options for the `pandas.read_csv` function call.
        Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
        for more information.
        Set to empty dict by default, this means pandas will try to figure
        out the separators, table head, etc. on its own.

Tr   r"   )r   
col_joiner
row_joinerpandas_configr   r   rA   rB   rC   r   r   Nc                V   > [         TU ]  " U0 UD6  Xl        X l        X0l        X@l        gr   )r   r   r   _col_joiner_row_joiner_pandas_config)r   r   rA   rB   rC   r   r   r   s          r   r   PandasCSVReader.__init__[   s/     	$)&)'%%+r   r   r   fsc                   ^  U(       a<  UR                  U5       n[        R                  " U40 T R                  D6nSSS5        O![        R                  " U40 T R                  D6nWR	                  U 4S jSS9R                  5       nT R                  (       a-  [        T R                  R                  U5      U=(       d    0 S9/$ U Vs/ s H  n[        Xr=(       d    0 S9PM     sn$ ! , (       d  f       N= fs  snf )zParse file.Nc                 |   > TR                   R                  U R                  [        5      R	                  5       5      $ r4   )rE   r+   astypestrtolist)r1   r   s    r   <lambda>+PandasCSVReader.load_data.<locals>.<lambda>y   s'    ))//

30F0F0HIr      )axisr#   )
r(   pdread_csvrG   applyrN   r   r
   rF   r+   )r   r   r   rI   fdfr.   r$   s   `       r   r2   PandasCSVReader.load_datak   s     ![[:d&9&9:  T9T%8%89BHHIPQ  

&( 	 **00;jFVTV  LUKT4d-=2>9 !  s   "C)
C:)
C7)rE   r   rG   rF   NNr5   r6   r7   r8   r9   r   r:   rM   dictr   r   r   r   r   r   r
   r2   r;   r<   r=   s   @r   r?   r?   @   s    : ! ,, , 	,
 , , , 
, ,& &*+/	 TN '(	
 
h r   r?   c                      ^  \ rS rSrSrSSSS0 S.S\S	\S
\S\S\S\SS4U 4S jjjr	  SS\
S\\   S\\   S\\   4S jjrSrU =r$ )PandasExcelReader   a  
Custom Excel parser that includes header names in each row.

Parses Excel files using Pandas' `read_excel` function, but formats
each row to include the header name, for example: "name: joao, position: analyst".
The first row (header) is not included in the generated documents.

Args:
    concat_rows (bool): Determines whether to concatenate all rows into one document.
        If set to False, one Document is created for each row.
        Defaults to True.
    sheet_name (str | int | None): Defaults to None, meaning all sheets.
        Alternatively, pass a string or an integer to specify the sheet to be read.
    field_separator (str): Character or string to separate each field. Default: ", ".
    key_value_separator (str): Character or string to separate the key from the value. Default: ": ".
    pandas_config (dict): Options for the `pandas.read_excel` function call.
        Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
        for more details.
        Defaults to an empty dictionary.

TNr   z: )r   
sheet_namefield_separatorkey_value_separatorrC   r   r   r`   ra   rC   r   r   c                b   > [         TU ]  " U0 UD6  Xl        X l        X0l        X@l        XPl        g)zInitializes the parameters.N)r   r   r   _sheet_name_field_separator_key_value_separatorrG   )	r   r   r_   r`   ra   rC   r   r   r   s	           r   r   PandasExcelReader.__init__   s5     	$)&)'% /$7!+r   r   r   rI   c                 J   [         R                  R                  S5      nUb  O[        S5      eU(       aF  UR	                  U5       n[
        R                  " XPR                  40 U R                  D6nSSS5        O+[
        R                  " XR                  40 U R                  D6n/ n[        W[
        R                  5      (       Ga  UR                  S5      nUR                  R                  5       n	/ n
UR                  5        HU  u  pU R                  R!                  U	 Vs/ s H  nU U R"                   X   < 3PM     sn5      nU
R%                  U5        MW     U R&                  (       a3  UR%                  [)        SR!                  U
5      U=(       d    0 S95        U$ UR+                  U
 Vs/ s H  n[)        X=(       d    0 S9PM     sn5         U$ UR-                  5        GH  nUR                  S5      nUR                  R                  5       n	/ n
UR                  5        HU  u  pU R                  R!                  U	 Vs/ s H  nU U R"                   X   < 3PM     sn5      nU
R%                  U5        MW     U R&                  (       a3  UR%                  [)        SR!                  U
5      U=(       d    0 S95        M  UR+                  U
 Vs/ s H  n[)        X=(       d    0 S9PM     sn5        GM     U$ ! , (       d  f       GNg= fs  snf s  snf s  snf s  snf )zParses the file.openpyxlNz[Please install openpyxl to read Excel files. You can install it with 'pip install openpyxl' r"   r#   )	importlibutil	find_specr'   r(   rS   
read_excelrc   rG   
isinstance	DataFramefillnacolumnsrN   iterrowsrd   r+   re   r*   r   r
   extendvalues)r   r   r   rI   openpyxl_specrV   dfs	documentsrW   headersr.   _r1   headerformatted_rowr$   s                   r   r2   PandasExcelReader.load_data   s    "00<$m 
 !mmA'7'7O4;N;NO  --&6&6N$:M:MNC	 c2<<((BBjj'')G I ++- $ 5 5 : : '.&-F "(4#<#<"=ck_M&-!   / (     $))I"6AQrRL E    %.$-D !d5E2F$-D 5 jjlYYr]**++-	 kkmFA$($9$9$>$> +2*1  &ht'@'@&A#+Q*1%M $$]3 , $$$$ dii	&:ZEUSUV $$ )2(1 %$9IrJ(1' #4 G *s$   ,K?$L
2LLL 
?
L)r   rd   re   rG   rc   rY   rZ   r=   s   @r   r]   r]      s    2 !##' ,, ,
 , !, , , 
, ,* &*+/	TT TNT '(	T
 
hT Tr   r]   )r9   pathlibr   typingr   r   r   r   fsspecr   rj   pandasrS   llama_index.core.readers.baser	   llama_index.core.schemar
   r   r?   r]    r   r   <module>r      sP     , , %   4 ,+R
 +R\Ej EP}
 }r   