# This file was auto-generated by Fern from our API Definition.

import typing
import urllib.parse
from json.decoder import JSONDecodeError

from ...core.api_error import ApiError
from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
from ...core.jsonable_encoder import jsonable_encoder
from ...core.remove_none_from_dict import remove_none_from_dict
from ...errors.unprocessable_entity_error import UnprocessableEntityError
from ...types.classify_response import ClassifyResponse
from ...types.http_validation_error import HttpValidationError

try:
    import pydantic
    if pydantic.__version__.startswith("1."):
        raise ImportError
    import pydantic.v1 as pydantic  # type: ignore
except ImportError:
    import pydantic  # type: ignore

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)


class ClassifierClient:
    def __init__(self, *, client_wrapper: SyncClientWrapper):
        self._client_wrapper = client_wrapper

    def classify_documents(
        self,
        *,
        project_id: typing.Optional[str] = None,
        organization_id: typing.Optional[str] = None,
        rules_json: str,
        files: typing.Optional[typing.List[str]] = OMIT,
        file_ids: typing.Optional[str] = OMIT,
        matching_threshold: typing.Optional[float] = OMIT,
        enable_metadata_heuristic: typing.Optional[bool] = OMIT,
    ) -> ClassifyResponse:
        """
        **[BETA]** Classify documents based on provided rules - simplified classification system.

        **This is a Beta feature** - API may change based on user feedback.

        This endpoint supports:

        - Classifying new uploaded files
        - Classifying existing files by ID
        - Both new files and existing file IDs in one request

        ## v0 Features:

        - **Simplified Rules**: Only `type` and `description` fields needed
        - **Matching Threshold**: Confidence-based classification with configurable threshold
        - **Smart Classification**: Filename heuristics + LLM content analysis
        - **Document Type Filtering**: Automatically filters out non-document file types
        - **Fast Processing**: Uses LlamaParse fast mode + GPT-4.1-nano
        - **Optimized Performance**: Parses each file only once for all rules

        ## Simplified Scoring Logic:

        1. **Evaluate All Rules**: Compare document against all classification rules
        2. **Best Match Selection**: Return the highest scoring rule above matching_threshold
        3. **Unknown Classification**: Return as "unknown" if no rules score above threshold

        This ensures optimal classification by:

        - Finding the best possible match among all rules
        - Avoiding false positives with confidence thresholds
        - Maximizing performance with single-pass file parsing

        ## Rule Format:

        ```json
        [
          {
            "type": "invoice",
            "description": "contains invoice number, line items, and total amount"
          },
          {
            "type": "receipt",
            "description": "purchase receipt with transaction details and payment info"
          }
        ]
        ```

        ## Classification Process:

        1. **Metadata Heuristics** (configurable via API):
           - **Document Type Filter**: Only process document file types (PDF, DOC, DOCX, RTF, TXT, ODT, Pages, HTML, XML, Markdown)
           - **Filename Heuristics**: Check if rule type appears in filename
           - **Content Analysis**: Parse document content once and use LLM for semantic matching against all rules
        2. **Result**: Returns type, confidence score, and matched rule information

        ## API Parameters:

        - `matching_threshold` (0.1-0.99, default: 0.6): Minimum confidence threshold for acceptable matches
        - `enable_metadata_heuristic` (boolean, default: true): Enable metadata-based features

        ## Supported Document Types:

        **Text Documents**: pdf, doc, docx, rtf, txt, odt, pages
        **Web Documents**: html, htm, xml
        **Markup**: md, markdown

        ## Limits (Beta):

        - Maximum 100 files per request
        - Maximum 10 rules per request
        - Rule descriptions: 10-500 characters
        - Document types: 1-50 characters (alphanumeric, hyphens, underscores)

        **Beta Notice**: This API is subject to change. Please provide feedback!

        Parameters:
            - project_id: typing.Optional[str].

            - organization_id: typing.Optional[str].

            - rules_json: str. JSON string containing classifier rules

            - files: typing.Optional[typing.List[str]].

            - file_ids: typing.Optional[str].

            - matching_threshold: typing.Optional[float].

            - enable_metadata_heuristic: typing.Optional[bool].
        """
        _request: typing.Dict[str, typing.Any] = {"rules_json": rules_json}
        if files is not OMIT:
            _request["files"] = files
        if file_ids is not OMIT:
            _request["file_ids"] = file_ids
        if matching_threshold is not OMIT:
            _request["matching_threshold"] = matching_threshold
        if enable_metadata_heuristic is not OMIT:
            _request["enable_metadata_heuristic"] = enable_metadata_heuristic
        _response = self._client_wrapper.httpx_client.request(
            "POST",
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/classifier/classify"),
            params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
            json=jsonable_encoder(_request),
            headers=self._client_wrapper.get_headers(),
            timeout=60,
        )
        if 200 <= _response.status_code < 300:
            return pydantic.parse_obj_as(ClassifyResponse, _response.json())  # type: ignore
        if _response.status_code == 422:
            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
        try:
            _response_json = _response.json()
        except JSONDecodeError:
            raise ApiError(status_code=_response.status_code, body=_response.text)
        raise ApiError(status_code=_response.status_code, body=_response_json)


class AsyncClassifierClient:
    def __init__(self, *, client_wrapper: AsyncClientWrapper):
        self._client_wrapper = client_wrapper

    async def classify_documents(
        self,
        *,
        project_id: typing.Optional[str] = None,
        organization_id: typing.Optional[str] = None,
        rules_json: str,
        files: typing.Optional[typing.List[str]] = OMIT,
        file_ids: typing.Optional[str] = OMIT,
        matching_threshold: typing.Optional[float] = OMIT,
        enable_metadata_heuristic: typing.Optional[bool] = OMIT,
    ) -> ClassifyResponse:
        """
        **[BETA]** Classify documents based on provided rules - simplified classification system.

        **This is a Beta feature** - API may change based on user feedback.

        This endpoint supports:

        - Classifying new uploaded files
        - Classifying existing files by ID
        - Both new files and existing file IDs in one request

        ## v0 Features:

        - **Simplified Rules**: Only `type` and `description` fields needed
        - **Matching Threshold**: Confidence-based classification with configurable threshold
        - **Smart Classification**: Filename heuristics + LLM content analysis
        - **Document Type Filtering**: Automatically filters out non-document file types
        - **Fast Processing**: Uses LlamaParse fast mode + GPT-4.1-nano
        - **Optimized Performance**: Parses each file only once for all rules

        ## Simplified Scoring Logic:

        1. **Evaluate All Rules**: Compare document against all classification rules
        2. **Best Match Selection**: Return the highest scoring rule above matching_threshold
        3. **Unknown Classification**: Return as "unknown" if no rules score above threshold

        This ensures optimal classification by:

        - Finding the best possible match among all rules
        - Avoiding false positives with confidence thresholds
        - Maximizing performance with single-pass file parsing

        ## Rule Format:

        ```json
        [
          {
            "type": "invoice",
            "description": "contains invoice number, line items, and total amount"
          },
          {
            "type": "receipt",
            "description": "purchase receipt with transaction details and payment info"
          }
        ]
        ```

        ## Classification Process:

        1. **Metadata Heuristics** (configurable via API):
           - **Document Type Filter**: Only process document file types (PDF, DOC, DOCX, RTF, TXT, ODT, Pages, HTML, XML, Markdown)
           - **Filename Heuristics**: Check if rule type appears in filename
           - **Content Analysis**: Parse document content once and use LLM for semantic matching against all rules
        2. **Result**: Returns type, confidence score, and matched rule information

        ## API Parameters:

        - `matching_threshold` (0.1-0.99, default: 0.6): Minimum confidence threshold for acceptable matches
        - `enable_metadata_heuristic` (boolean, default: true): Enable metadata-based features

        ## Supported Document Types:

        **Text Documents**: pdf, doc, docx, rtf, txt, odt, pages
        **Web Documents**: html, htm, xml
        **Markup**: md, markdown

        ## Limits (Beta):

        - Maximum 100 files per request
        - Maximum 10 rules per request
        - Rule descriptions: 10-500 characters
        - Document types: 1-50 characters (alphanumeric, hyphens, underscores)

        **Beta Notice**: This API is subject to change. Please provide feedback!

        Parameters:
            - project_id: typing.Optional[str].

            - organization_id: typing.Optional[str].

            - rules_json: str. JSON string containing classifier rules

            - files: typing.Optional[typing.List[str]].

            - file_ids: typing.Optional[str].

            - matching_threshold: typing.Optional[float].

            - enable_metadata_heuristic: typing.Optional[bool].
        """
        _request: typing.Dict[str, typing.Any] = {"rules_json": rules_json}
        if files is not OMIT:
            _request["files"] = files
        if file_ids is not OMIT:
            _request["file_ids"] = file_ids
        if matching_threshold is not OMIT:
            _request["matching_threshold"] = matching_threshold
        if enable_metadata_heuristic is not OMIT:
            _request["enable_metadata_heuristic"] = enable_metadata_heuristic
        _response = await self._client_wrapper.httpx_client.request(
            "POST",
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/classifier/classify"),
            params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
            json=jsonable_encoder(_request),
            headers=self._client_wrapper.get_headers(),
            timeout=60,
        )
        if 200 <= _response.status_code < 300:
            return pydantic.parse_obj_as(ClassifyResponse, _response.json())  # type: ignore
        if _response.status_code == 422:
            raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json()))  # type: ignore
        try:
            _response_json = _response.json()
        except JSONDecodeError:
            raise ApiError(status_code=_response.status_code, body=_response.text)
        raise ApiError(status_code=_response.status_code, body=_response_json)
