
    ij                     r    S r SSKrSSKJr  SSKJr  SSKJrJrJ	r	J
r
Jr  SSKJr  SSKJr   " S S	\5      rg)
z2
Markdown parser.

Contains parser for md files.

    N)AbstractFileSystem)LocalFileSystem)AnyDictListOptionalTuple)
BaseReader)Documentc                      ^  \ rS rSrSrSSSS.S\S\S\S	\S
\SS4U 4S jjjrS\S\	\
\\   \4      4S jrS\S\4S jrS\S\4S jrS\4S jr  SS\S\S\\   S\	\
\\   \4      4S jjr  SS\S\\   S\\   S\	\   4S jjrSrU =r$ )MarkdownReader   z
Markdown parser.

Extract text from markdown files.
Returns dictionary with keys as headers and values as the text between headers.

T )remove_hyperlinksremove_images	separatorargsr   r   r   kwargsreturnNc                J   > [         TU ]  " U0 UD6  Xl        X l        X0l        g)zInit params.N)super__init___remove_hyperlinks_remove_images
_separator)selfr   r   r   r   r   	__class__s         `/home/james-whalen/.local/lib/python3.13/site-packages/llama_index/readers/file/markdown/base.pyr   MarkdownReader.__init__   s(     	$)&)"3+#    markdown_textc           
         / nUR                  S5      n/ nSn0 nU GH  nUR                  S5      (       a  U(       + nU(       a  UR                  U5        M;  UR                  5       nU(       d  MT  [        R
                  " SU5      nU(       Ga  U(       a9  U(       d2  UR                  SSR                  U5      45        UR                  5         [        UR                  S5      5      n	UR                  S5      n
UR                  U	5      (       a  UR                  U R                  R                  UR                  5       5      SR                  U5      45        UR                  5        VVs0 s H  u  pX:  d  M  X_M     nnnUR                  5         XU	'   GM  UR                  U5        GM     U(       d  U(       aI  UR                  U R                  R                  UR                  5       5      SR                  U5      45        U VVs/ s H6  u  pU(       a  UR                  5       OS[        R                  " SS	U5      4PM8     snn$ s  snnf s  snnf )
zGConvert a markdown file to a list of tuples containing header and text.
Fz```z^(#+)\s+(.*)N      z<.*?> )split
startswithappendstriprematchjoinclearlengroupgetr   valuesitemssub)r   r!   markdown_tupslinescurrent_linesin_code_blockheaderslineheader_matchheader_levelcurrent_headerkvkeyvalues                  r   markdown_to_tupsMarkdownReader.markdown_to_tups'   s   9;##D)Du%%$1 1$$T* zz|!xx>$W%,,dDIIm4L-MN%++-#&$**1-$L &2%7%7%:N{{<00%,, $ 4 4W^^5E F $		- 8 5<MMO"XODAqGW414O"X%++-,:L)!((.O T G  %%gnn&67=9QR ,

 ,
  #		xU+ ,
 	
 #Y
s   ,I;I=Icontentc                 6    Sn[         R                  " USU5      $ )z;Remove images in markdown content but keep the description.z![(.?)](.?)\1r+   r4   r   rD   patterns      r   r   MarkdownReader.remove_imagesg   s     vvgug..r    c                 6    Sn[         R                  " USU5      $ )z&Remove hyperlinks in markdown content.z\[(.*?)\]\((.*?)\)rF   rG   rH   s      r   r    MarkdownReader.remove_hyperlinksl   s    'vvgug..r    c                     0 $ )z&Initialize the parser with the config. )r   s    r   _init_parserMarkdownReader._init_parserq   s    	r    filepatherrorsfsc                 `   U=(       d
    [        5       nUR                  USS9 nUR                  5       R                  SS9nSSS5        U R                  (       a  U R                  W5      nU R                  (       a  U R                  W5      nU R                  W5      $ ! , (       d  f       Nc= f)zParse file into tuples.zutf-8)encodingN)	r   openreaddecoder   r   r   r   rB   )r   rQ   rR   rS   frD   s         r   
parse_tupsMarkdownReader.parse_tupsu   s     $?$WWXW0Affhoowo7G 1"",,W5G((1G$$W-- 10s   B
B-file
extra_infoc           	          U R                  XS9n/ nU HS  u  pgUc#  UR                  [        Xr=(       d    0 S95        M+  UR                  [        SU SU 3U=(       d    0 S95        MU     U$ )zParse file into string.)rS   )textmetadataz

r#   )rZ   r)   r   )r   r\   r]   rS   tupsresultsheaderr_   s           r   	load_dataMarkdownReader.load_data   sp     t+ LF~xT<L"MND4&"9JDTRTU	 ! r    )r   r   r   )ignoreN)NN)__name__
__module____qualname____firstlineno____doc__r   boolstrr   r   r	   r   rB   r   r   r   rO   r   rZ   r   rd   __static_attributes____classcell__)r   s   @r   r   r      sC    #'"$$  $ 	$
 $ $ 
$ $>
c >
d5#PSAS;T6U >
@/S /S /
/ / /
d  +/	.. . '(	.
 
eHSM3&'	(.& &*+/	 TN '(	
 
h r    r   )rk   r+   fsspecr   fsspec.implementations.localr   typingr   r   r   r   r	   llama_index.core.readers.baser
   llama_index.core.schemar   r   rN   r    r   <module>ru      s/    
 % 8 3 3 4 ,FZ Fr    