
    i	                     Z    S r SSKJr  SSKJrJrJrJr  SSKJ	r	  SSK
Jr   " S S\	5      rg)	%Read PDF files using PyMuPDF library.    )Path)DictListOptionalUnion)
BaseReader)Documentc            
           \ rS rSrSr  SS\\\4   S\S\	\
   S\\   4S jjr  SS\\\4   S\S\	\
   S\\   4S	 jjrS
rg)PyMuPDFReader
   r   N	file_pathmetadata
extra_inforeturnc                 "    U R                  XUS9$ )zXLoads list of documents from PDF file and also accepts extra information in dict format.)r   r   )load)selfr   r   r   s       `/home/james-whalen/.local/lib/python3.13/site-packages/llama_index/readers/file/pymu_pdf/base.py	load_dataPyMuPDFReader.load_data   s     yy*yMM    c                 v   SSK n[        U[        5      (       d   [        U[        5      (       d  [	        S5      eUR                  U5      nU(       a   [        U[        5      (       d  [	        S5      eU(       aw  U(       d  0 n[        U5      US'   [        U5      US'   U Vs/ s HD  n[        UR                  5       R                  S5      [        U40 SUR                  S	-    0D6S
9PMF     sn$ U Vs/ s H3  n[        UR                  5       R                  S5      U=(       d    0 S
9PM5     sn$ s  snf s  snf )a!  
Loads list of documents from PDF file and also accepts extra information in dict format.

Args:
    file_path (Union[Path, str]): file path of PDF file (accepts string or Path).
    metadata (bool, optional): if metadata to be included or not. Defaults to True.
    extra_info (Optional[Dict], optional): extra information related to each document in dict format. Defaults to None.

Raises:
    TypeError: if extra_info is not a dictionary.
    TypeError: if file_path is not a string or Path.

Returns:
    List[Document]: list of documents.

r   Nz#file_path must be a string or Path.z extra_info must be a dictionary.total_pagesr   zutf-8source   )textr   )fitz
isinstancestrr   	TypeErroropendictlenr
   get_textencodenumber)r   r   r   r   r   docpages          r   r   PyMuPDFReader.load   s<   , 	 )S))*Y2M2MABB ii	" j$// BCC 
(+CJ}%&))nJ{#    D //8#"  %q(9    $  	  D //8ZEUSU  	 s   "AD14:D6 )TN)__name__
__module____qualname____firstlineno____doc__r   r   r    boolr   r   r   r
   r   r   __static_attributes__r+   r   r   r   r   
   s    /
 %)	Ns#N N TN	N
 
hN %)	?s#? ? TN	?
 
h? ?r   r   N)r0   pathlibr   typingr   r   r   r   llama_index.core.readers.baser	   llama_index.core.schemar
   r   r+   r   r   <module>r7      s%    +  . . 4 ,KJ Kr   