
    i.                     T   S SK r S SKrS SKJr  SSKJr  SSKJrJ	r	  SSK
Jr  SSKJr  SSKJr  SS	KJr  SS
KJr   S SKr\R*                  R-                  S5      (       a  \eS SKJr  \ R4                  " \ R6                  S5      r " S S5      r " S S5      rg! \ a    S SKr N>f = f)    N)JSONDecodeError   )ApiError)AsyncClientWrapperSyncClientWrapper)jsonable_encoder)remove_none_from_dict)UnprocessableEntityError)ClassifyResponse)HttpValidationErrorz1..c                      \ rS rSrS\4S jrSS\\\\S.S\R                  \	   S\R                  \	   S\	S	\R                  \R                  \	      S
\R                  \	   S\R                  \   S\R                  \   S\4S jjrSrg)ClassifierClient   client_wrapperc                    Xl         g N_client_wrapperselfr   s     a/home/james-whalen/.local/lib/python3.13/site-packages/llama_cloud/resources/classifier/client.py__init__ClassifierClient.__init__       -    N
project_idorganization_idfilesfile_idsmatching_thresholdenable_metadata_heuristicr   r   
rules_jsonr   r    r!   r"   returnc          	      8   SU0nU[         La  XHS'   U[         La  XXS'   U[         La  XhS'   U[         La  XxS'   U R                  R                  R                  S[        R
                  R                  U R                  R                  5        S3S5      [        XS	.5      [        U5      U R                  R                  5       S
S9n	SU	R                  s=::  a  S:  a,  O  O)[        R                  " [        U	R                  5       5      $ U	R                  S:X  a2  [!        [        R                  " ["        U	R                  5       5      5      e U	R                  5       n
['        U	R                  U
S9e! [$         a    ['        U	R                  U	R(                  S9ef = f)x  
**[BETA]** Classify documents based on provided rules - simplified classification system.

**This is a Beta feature** - API may change based on user feedback.

This endpoint supports:

- Classifying new uploaded files
- Classifying existing files by ID
- Both new files and existing file IDs in one request

## v0 Features:

- **Simplified Rules**: Only `type` and `description` fields needed
- **Matching Threshold**: Confidence-based classification with configurable threshold
- **Smart Classification**: Filename heuristics + LLM content analysis
- **Document Type Filtering**: Automatically filters out non-document file types
- **Fast Processing**: Uses LlamaParse fast mode + GPT-4.1-nano
- **Optimized Performance**: Parses each file only once for all rules

## Simplified Scoring Logic:

1. **Evaluate All Rules**: Compare document against all classification rules
2. **Best Match Selection**: Return the highest scoring rule above matching_threshold
3. **Unknown Classification**: Return as "unknown" if no rules score above threshold

This ensures optimal classification by:

- Finding the best possible match among all rules
- Avoiding false positives with confidence thresholds
- Maximizing performance with single-pass file parsing

## Rule Format:

```json
[
  {
    "type": "invoice",
    "description": "contains invoice number, line items, and total amount"
  },
  {
    "type": "receipt",
    "description": "purchase receipt with transaction details and payment info"
  }
]
```

## Classification Process:

1. **Metadata Heuristics** (configurable via API):
   - **Document Type Filter**: Only process document file types (PDF, DOC, DOCX, RTF, TXT, ODT, Pages, HTML, XML, Markdown)
   - **Filename Heuristics**: Check if rule type appears in filename
   - **Content Analysis**: Parse document content once and use LLM for semantic matching against all rules
2. **Result**: Returns type, confidence score, and matched rule information

## API Parameters:

- `matching_threshold` (0.1-0.99, default: 0.6): Minimum confidence threshold for acceptable matches
- `enable_metadata_heuristic` (boolean, default: true): Enable metadata-based features

## Supported Document Types:

**Text Documents**: pdf, doc, docx, rtf, txt, odt, pages
**Web Documents**: html, htm, xml
**Markup**: md, markdown

## Limits (Beta):

- Maximum 100 files per request
- Maximum 10 rules per request
- Rule descriptions: 10-500 characters
- Document types: 1-50 characters (alphanumeric, hyphens, underscores)

**Beta Notice**: This API is subject to change. Please provide feedback!

Parameters:
    - project_id: typing.Optional[str].

    - organization_id: typing.Optional[str].

    - rules_json: str. JSON string containing classifier rules

    - files: typing.Optional[typing.List[str]].

    - file_ids: typing.Optional[str].

    - matching_threshold: typing.Optional[float].

    - enable_metadata_heuristic: typing.Optional[bool].
r#   r   r    r!   r"   POST/api/v1/classifier/classifyr   r   <   paramsjsonheaderstimeout   ,    status_codebodyOMITr   httpx_clientrequesturllibparseurljoinget_base_urlr	   r   get_headersr5   pydanticparse_obj_asr   r.   r
   r   r   r   textr   r   r   r#   r   r    r!   r"   _request	_response_response_jsons              r   classify_documents#ClassifierClient.classify_documents   s{   J 3?
1K %W4#+Z T)-?)*$D04M01((55==LL  D$8$8$E$E$G#H!JLhi(
)gh!(+((446 > 
	 )''-#-(()99>>;KLL  C'*8+@+@ATV_VdVdVf+ghh	S&^^-N 9#8#8~NN  	Sy'<'<9>>RR	Ss   E0 0)Fr   )__name__
__module____qualname____firstlineno__r   r   r8   typingOptionalstrListfloatboolr   rG   __static_attributes__ r   r   r   r      s    .*; . ,00437)-59;?~O OOC(~O  -	~O
 ~O v{{3/0~O //#&~O #OOE2~O $*??4#8~O 
~O ~Or   r   c                      \ rS rSrS\4S jrSS\\\\S.S\R                  \	   S\R                  \	   S\	S	\R                  \R                  \	      S
\R                  \	   S\R                  \   S\R                  \   S\4S jjrSrg)AsyncClassifierClient   r   c                    Xl         g r   r   r   s     r   r   AsyncClassifierClient.__init__   r   r   Nr   r   r   r#   r   r    r!   r"   r$   c          	      T  #    SU0nU[         La  XHS'   U[         La  XXS'   U[         La  XhS'   U[         La  XxS'   U R                  R                  R                  S[        R
                  R                  U R                  R                  5        S3S5      [        XS	.5      [        U5      U R                  R                  5       S
S9I Sh  vN n	SU	R                  s=::  a  S:  a,  O  O)[        R                  " [        U	R                  5       5      $ U	R                  S:X  a2  [!        [        R                  " ["        U	R                  5       5      5      e U	R                  5       n
['        U	R                  U
S9e N! [$         a    ['        U	R                  U	R(                  S9ef = f7f)r&   r#   r   r    r!   r"   r'   r(   r)   r*   r+   r,   Nr1   r2   r3   r4   r7   rC   s              r   rG   (AsyncClassifierClient.classify_documents   s    J 3?
1K %W4#+Z T)-?)*$D04M01..;;CCLL  D$8$8$E$E$G#H!JLhi(
)gh!(+((446 D 
 
	 )''-#-(()99>>;KLL  C'*8+@+@ATV_VdVdVf+ghh	S&^^-N 9#8#8~NN!
  	Sy'<'<9>>RR	Ss+   C	F(E:B	F(E< &F(<)F%%F(r   )rI   rJ   rK   rL   r   r   r8   rM   rN   rO   rP   rQ   rR   r   rG   rS   rT   r   r   rV   rV      s    .*< . ,00437)-59;?~O OOC(~O  -	~O
 ~O v{{3/0~O //#&~O #OOE2~O $*??4#8~O 
~O ~Or   rV   )rM   urllib.parser;   json.decoderr   core.api_errorr   core.client_wrapperr   r   core.jsonable_encoderr   core.remove_none_from_dictr	   !errors.unprocessable_entity_errorr
   types.classify_responser   types.http_validation_errorr   r@   __version__
startswithImportErrorpydantic.v1v1castAnyr8   r   rV   rT   r   r   <module>rl      s      ( & H 5 ? I 7 >&&t,,"
 {{6::s#BO BOJBO BOY  s   ,B 
B'&B'