
    D_i[+                        S r SSKJr  SSKrSSKrSSKJrJr  SSKJ	r	J
r
  SSKJrJrJrJr  SSKJrJrJr  SSKJr  \(       a  SS	KJr  \\
-  r " S
 S\5      r " S S\5      r " S S\5      rg)aD  Base classes for media and documents.

This module contains core abstractions for **data retrieval and processing workflows**:

- `BaseMedia`: Base class providing `id` and `metadata` fields
- `Blob`: Raw data loading (files, binary data) - used by document loaders
- `Document`: Text content for retrieval (RAG, vector stores, semantic search)

!!! note "Not for LLM chat messages"
    These classes are for data processing pipelines, not LLM I/O. For multimodal
    content in chat messages (images, audio in conversations), see
    `langchain.messages` content blocks instead.
    )annotationsN)BufferedReaderBytesIO)PathPurePath)TYPE_CHECKINGAnyLiteralcast)
ConfigDictFieldmodel_validator)Serializable)	Generatorc                  J    \ rS rSr% Sr\" SSS9rS\S'    \" \S9r	S	\S
'   Sr
g)	BaseMedia!   a-  Base class for content used in retrieval and data processing workflows.

Provides common fields for content that needs to be stored, indexed, or searched.

!!! note
    For multimodal content in **chat messages** (images, audio sent to/from LLMs),
    use `langchain.messages` content blocks instead.
NT)defaultcoerce_numbers_to_str
str | Noneid)default_factorydictmetadata )__name__
__module____qualname____firstlineno____doc__r   r   __annotations__r   r   __static_attributes__r       W/home/james-whalen/.local/lib/python3.13/site-packages/langchain_core/documents/base.pyr   r   !   s3     4tDB
D 40Hd09r#   r   c                  X   \ rS rSr% SrSrS\S'    SrS\S'    SrS	\S
'    Sr	S\S'    \
" SSS9r\SS j5       r\" SS9\SS j5       5       rSS jrSS jr\R(                  S S j5       r\SSSSS.           S!S jj5       r\SSSSS.           S"S jj5       rSS jrSrg)#Blob9   a2  Raw data abstraction for document loading and file processing.

Represents raw bytes or text, either in-memory or by file reference. Used
primarily by document loaders to decouple data loading from parsing.

Inspired by [Mozilla's `Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)

???+ example "Initialize a blob from in-memory data"

    ```python
    from langchain_core.documents import Blob

    blob = Blob.from_data("Hello, world!")

    # Read the blob as a string
    print(blob.as_string())

    # Read the blob as bytes
    print(blob.as_bytes())

    # Read the blob as a byte stream
    with blob.as_bytes_io() as f:
        print(f.read())
    ```

??? example "Load from memory and specify MIME type and metadata"

    ```python
    from langchain_core.documents import Blob

    blob = Blob.from_data(
        data="Hello, world!",
        mime_type="text/plain",
        metadata={"source": "https://example.com"},
    )
    ```

??? example "Load the blob from a file"

    ```python
    from langchain_core.documents import Blob

    blob = Blob.from_path("path/to/file.txt")

    # Read the blob as a string
    print(blob.as_string())

    # Read the blob as bytes
    print(blob.as_bytes())

    # Read the blob as a byte stream
    with blob.as_bytes_io() as f:
        print(f.read())
    ```
Nzbytes | str | Nonedatar   mimetypezutf-8strencodingzPathLike | NonepathT)arbitrary_types_allowedfrozenc                    U R                   (       a)  SU R                   ;   a  [        SU R                   S   5      $ U R                  (       a  [        U R                  5      $ S$ )a  The source location of the blob as string if known otherwise none.

If a path is associated with the `Blob`, it will default to the path location.

Unless explicitly set via a metadata field called `'source'`, in which
case that value will be used instead.
sourcer   N)r   r   r,   r*   selfs    r$   r0   Blob.source   sE     ==X6dmmH&=>>!%s499~44r#   before)modec                8    SU;  a  SU;  a  Sn[        U5      eU$ )z,Verify that either data or path is provided.r(   r,   z$Either data or path must be provided)
ValueError)clsvaluesmsgs      r$   check_blob_is_validBlob.check_blob_is_valid   s'     F&$88CS/!r#   c                   U R                   c=  U R                  (       a,  [        U R                  5      R                  U R                  S9$ [        U R                   [        5      (       a%  U R                   R                  U R                  5      $ [        U R                   [        5      (       a  U R                   $ SU  3n[        U5      e)zRead data as a string.

Raises:
    ValueError: If the blob cannot be represented as a string.

Returns:
    The data as a string.
)r+   zUnable to get string for blob )
r(   r,   r   	read_textr+   
isinstancebytesdecoder*   r7   r2   r:   s     r$   	as_stringBlob.as_string   s     99		?,,dmm,DDdii''99##DMM22dii%%99.tf5or#   c                   [        U R                  [        5      (       a  U R                  $ [        U R                  [        5      (       a%  U R                  R	                  U R
                  5      $ U R                  c4  U R                  (       a#  [        U R                  5      R                  5       $ SU  3n[        U5      e)zzRead data as bytes.

Raises:
    ValueError: If the blob cannot be represented as bytes.

Returns:
    The data as bytes.
zUnable to get bytes for blob )
r?   r(   r@   r*   encoder+   r,   r   
read_bytesr7   rB   s     r$   as_bytesBlob.as_bytes   s     dii''99dii%%99##DMM2299		?--//-dV4or#   c              #  Z  #    [        U R                  [        5      (       a  [        U R                  5      v   gU R                  cC  U R                  (       a2  [        U R                  5      R                  S5       nUv   SSS5        gSU  3n[        U5      e! , (       d  f       g= f7f)zRead data as a byte stream.

Raises:
    NotImplementedError: If the blob cannot be represented as a byte stream.

Yields:
    The data as a byte stream.
NrbzUnable to convert blob )r?   r(   r@   r   r,   r   openNotImplementedError)r2   fr:   s      r$   as_bytes_ioBlob.as_bytes_io   s}      dii''$))$$YY499dii%%d+q ,+ ,D62C%c**	 ,+s   A:B+<BB+
B($B+)r+   	mime_type
guess_typer   c                   Uc*  U(       a#  U(       a  [         R                  " U5      S   OSnOUnU " SUUUUb  US9$ 0 S9$ )a  Load the blob from a path like object.

Args:
    path: Path-like object to file to be read
    encoding: Encoding to use if decoding the bytes into a string
    mime_type: If provided, will be set as the MIME type of the data
    guess_type: If `True`, the MIME type will be guessed from the file
        extension, if a MIME type was not provided
    metadata: Metadata to associate with the `Blob`

Returns:
    `Blob` instance
Nr   r(   r)   r+   r,   r   )	mimetypesrR   )r8   r,   r+   rQ   rR   r   r)   s          r$   	from_pathBlob.from_path   s\    . 8By++D1!4H H !)!5X
 	

 <>
 	
r#   )r+   rQ   r,   r   c               $    U " UUUUUb  US9$ 0 S9$ )a  Initialize the `Blob` from in-memory data.

Args:
    data: The in-memory data associated with the `Blob`
    encoding: Encoding to use if decoding the bytes into a string
    mime_type: If provided, will be set as the MIME type of the data
    path: If provided, will be set as the source from which the data came
    metadata: Metadata to associate with the `Blob`

Returns:
    `Blob` instance
rT   r   )r8   r(   r+   rQ   r,   r   s         r$   	from_dataBlob.from_data   s4    , !)!5X
 	

 <>
 	
r#   c                h    S[        U 5       3nU R                  (       a  USU R                   3-  nU$ )zReturn the blob representation.zBlob  )r   r0   )r2   str_reprs     r$   __repr__Blob.__repr__  s3    2d8*%;;!DKK=))Hr#   r   )returnr   )r9   zdict[str, Any]r`   r	   r`   r*   )r`   r@   )r`   z/Generator[BytesIO | BufferedReader, None, None])r,   PathLiker+   r*   rQ   r   rR   boolr   dict | Noner`   r&   )r(   zstr | bytesr+   r*   rQ   r   r,   r   r   rd   r`   r&   )r   r   r   r   r    r(   r!   r)   r+   r,   r   model_configpropertyr0   r   classmethodr;   rC   rH   
contextlibcontextmanagerrO   rV   rY   r^   r"   r   r#   r$   r&   r&   9   sj   6p  $D
#.Hj>Hc !D/ 8 $L
 
5 
5 (#  $$$ + +$ 
   $ $"
"
 	"

 "
 "
 "
 
"
 "
H 
   $ $

 	

 
 
 
 

 
:r#   r&   c                  ~   ^  \ rS rSr% SrS\S'    S rS\S'   SU 4S jjr\SS j5       r	\SS	 j5       r
SS
 jrSrU =r$ )Documenti  a  Class for storing a piece of text and associated metadata.

!!! note
    `Document` is for **retrieval workflows**, not chat I/O. For sending text
    to an LLM in a conversation, use message types from `langchain.messages`.

Example:
    ```python
    from langchain_core.documents import Document

    document = Document(
        page_content="Hello, world!", metadata={"source": "https://example.com"}
    )
    ```
r*   page_contentzLiteral['Document']typec                *   > [         TU ]  " SSU0UD6  g)z0Pass page_content in as positional or named arg.rl   Nr   )super__init__)r2   rl   kwargs	__class__s      r$   rp   Document.__init__0  s     	=l=f=r#   c                    g)z,Return `True` as this class is serializable.Tr   r8   s    r$   is_lc_serializableDocument.is_lc_serializable6  s     r#   c                
    / SQ$ )z]Get the namespace of the LangChain object.

Returns:
    ["langchain", "schema", "document"]
)	langchainschemadocumentr   ru   s    r$   get_lc_namespaceDocument.get_lc_namespace;  s
     32r#   c                |    U R                   (       a  SU R                   SU R                    3$ SU R                   S3$ )zyOverride `__str__` to restrict it to page_content and metadata.

Returns:
    A string representation of the `Document`.
zpage_content='z' metadata=')r   rl   r1   s    r$   __str__Document.__str__D  s?     ==#D$5$5#6k$--QQ 1 12!44r#   r   )rl   r*   rq   r	   r`   None)r`   rc   )r`   z	list[str]ra   )r   r   r   r   r    r!   rm   rp   rg   rv   r|   r   r"   __classcell__)rr   s   @r$   rk   rk     sU       *D
*>   3 35 5r#   rk   )r    
__future__r   rh   rU   ior   r   pathlibr   r   typingr   r	   r
   r   pydanticr   r   r    langchain_core.load.serializabler   collections.abcr   r*   rb   r   r&   rk   r   r#   r$   <module>r      s`    #   & " 4 4 7 7 9)>: :0_9 _D95y 95r#   