
    i                         S r SSKrSSKJr  SSKJrJrJrJr  SSK	J
r
  SSKJr  SSKJr  \R                  " \5      r " S S	\5      rg)
z7
Mbox parser.

Contains simple parser for mbox files.

    N)Path)AnyDictListOptional)AbstractFileSystem)
BaseReader)Documentc                      ^  \ rS rSr% SrSr\\S'   S\S.S\S\	S	\S
\SS4
U 4S jjjr
  SS\S\\   S\\   S\\   4S jjrSrU =r$ )
MboxReader   z
Mbox parser.

Extract messages from mailbox files.
Returns string including date, subject, sender, receiver and
content for each message.

zMDate: {_date}
From: {_from}
To: {_to}
Subject: {_subject}
Content: {_content}DEFAULT_MESSAGE_FORMATr   )	max_countmessage_formatargsr   r   kwargsreturnNc                ~   >  SSK Jn  [        TU ]  " U0 UD6  Xl        X l        g! [         a    [        S5      ef = f)zInit params.r   BeautifulSoupz@`beautifulsoup4` package not found: `pip install beautifulsoup4`N)bs4r   ImportErrorsuper__init__r   r   )selfr   r   r   r   r   	__class__s         \/home/james-whalen/.local/lib/python3.13/site-packages/llama_index/readers/file/mbox/base.pyr   MboxReader.__init__%   sK    	) 	$)&)",  	R 	s   & <file
extra_infofsc           	         SSK nSSKJn  SSKJn  SSKJn  U(       a  [        R                  S5        Sn/ n	U" US9R                  n
UR                  XS9n[        U5       GH&  u  p UnUR                  5       (       aa  UR                  5        HL  nUR                  5       n[        UR!                  S	5      5      nUS
:X  d  M5  SU;  d  M=  UR#                  SS9n  O   OUR#                  SS9nU" W5      nSR%                  UR'                  5       R)                  5       5      nU R*                  R-                  US   US   US   US   US9nU	R/                  U5        US-  nU R2                  S:  d  GM  XR2                  :  d  GM'    O   U	 Vs/ s H  n[5        UU=(       d    0 S9PM     sn$ ! [0         a%  n[        R                  SU SU 35         SnAN|SnAff = fs  snf )zParse file into string.r   N)BytesParser)defaultr   zyfs was specified but MboxReader doesn't support loading from fsspec filesystems. Will load from local filesystem instead.)policy)factoryzContent-Dispositionz
text/plain
attachmentT)decode datefromtosubject)_date_from_to_subject_contentzFailed to parse message:
z
 with exception    )textmetadata)mailboxemail.parserr#   email.policyr$   r   r   loggerwarningparsembox	enumerateis_multipartwalkget_content_typestrgetget_payloadjoinget_textsplitr   formatappend	Exceptionr   r
   )r   r   r    r!   r6   r#   r$   r   iresultsbytes_parserr<   __msgmsgpartctypecdispocontentsoupstripped_content
msg_stringeresults                           r   	load_dataMboxReader.load_data8   s    	,(%NNT
 "'288||D|7 !GAY+/##%% #
 $ 5 5 7!$TXX.C%D!E L0\5O&*&6&6d&6&CG! !+ "ooTo:G %W-#&88DMMO,A,A,C#D !0077f+f+D	 ^- 8 
 z*
 FA~~!a>>&9E 'H PWWwVfz/?R@wWW  Y!;D6ASTUSVWXXY Xs,   (AF0F0BF0G"0
G:GG)NN)__name__
__module____qualname____firstlineno____doc__r   rA   __annotations__r   intr   r   r   r   r   r   r
   rY   __static_attributes____classcell__)r   s   @r   r   r      s    	 C  4	-- - 	-
 - 
- -, &*+/	?X?X TN?X '(	?X
 
h?X ?X    r   )r_   loggingpathlibr   typingr   r   r   r   fsspecr   llama_index.core.readers.baser	   llama_index.core.schemar
   	getLoggerr[   r9   r    rd   r   <module>rm      sA      , , % 4 ,			8	$dX dXrd   