
    i                         S r SSKrSSKJr  SSKJs  Jr  SSKJ	r	  SSK
JrJrJr  SSKJr  SSKJr  S\R$                  S\S	\\R$                     4S
 jr " S S\5      rg)zJSON Reader.    N)Path)DictListOptional)
BaseReader)Documentrootlevelreturnc                 2   ^^^ UUU4S jm/ mT" U S5        T$ )z
Get collection of nodes up to certain level including leaf nodes.

Args:
    root (ET.Element): XML Root Element
    level (int): Levels to traverse in the tree

Returns:
    List[ET.Element]: List of target nodes

c                    > [        U 5      S:X  d  TU:X  a  TR                  U 5        g UT:  a  U  H  nT" X!S-   5        M     g g )Nr      )lenappend)current_nodecurrent_levelchildr
   nodestraverses      [/home/james-whalen/.local/lib/python3.13/site-packages/llama_index/readers/file/xml/base.pyr   -_get_leaf_nodes_up_to_level.<locals>.traverse   sG    |!Um%;LL&U"% 12 & #    r    )r	   r
   r   r   s    `@@r   _get_leaf_nodes_up_to_levelr      s    3 ET1Lr   c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjr SS\R                  S\\
   S\\   4S	 jjr SS
\S\\
   S\\   4S jjrSrU =r$ )	XMLReader*   z
XML reader.

Reads XML documents with options to help suss out relationships between nodes.

Args:
    tree_level_split (int): From which level in the xml tree we split documents,
    the default level is the root which is level 0

tree_level_splitr   Nc                 .   > [         TU ]  5         Xl        g)zInitialize with arguments.N)super__init__r   )selfr   	__class__s     r   r!   XMLReader.__init__6   s     0r   r	   
extra_infoc                     [        XR                  5      n/ nU Hp  n[        R                  " USS9R	                  S5      n[
        R                  " SSU5      nUR                  5       nUR                  [        Xb=(       d    0 S95        Mr     U$ )z
Parse the xml object into a list of Documents.

Args:
    root: The XML Element to be converted.
    extra_info (Optional[Dict]): Additional information. Default is None.

Returns:
    Document: The documents.

utf8)encodingzutf-8z	^<\?xml.* )textr%   )
r   r   ETtostringdecoderesubstripr   r   )r"   r	   r%   r   	documentsnodecontents          r   _parse_xmlelt_to_document#XMLReader._parse_xmlelt_to_document;   s{     ,D2G2GH	Dkk$8??HGff\2w7GmmoGX7?ORPQ	  r   filec                     [        U[        5      (       d  [        U5      n[        R                  " U5      nU R	                  UR                  5       U5      $ )z
Load data from the input file.

Args:
    file (Path): Path to the input file.
    extra_info (Optional[Dict]): Additional information. Default is None.

Returns:
    List[Document]: List of documents.

)
isinstancer   r+   parser4   getroot)r"   r6   r%   trees       r   	load_dataXMLReader.load_dataS   s@      $%%:Dxx~--dllnjIIr   )r   )r   )N)__name__
__module____qualname____firstlineno____doc__r   intr!   _XmlETElementr   r   r   r4   r   r<   __static_attributes____classcell__)r#   s   @r   r   r   *   s    	1# 1t 1 1 BFNN08	h6 &*JJ TNJ 
h	J Jr   r   )rB   r.   defusedxml.ElementTreeElementTreer+   xml.etree.ElementTreeetreerD   pathlibr   typingr   r   r   llama_index.core.readers.baser   llama_index.core.schemar   rE   rC   r   r   r   r   r   <module>rP      s[     	 # & &  ' ' 4 ,
..!$	&..:=J
 =Jr   