Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,10 @@ Also see [architecture](docs/ARCHITECTURE.md).
| Provider | Status | Provider | Status |
|----------|--------|----------|--------|
| OpenAI | ✅ | Azure OpenAI | ✅ |
| Anthropic Claude | ✅ | Google Gemini | ✅ |
| AWS Bedrock | ✅ | Mistral AI | ✅ |
| Ollama (local) | ✅ | Anyscale | ✅ |
| OpenAI Compatible | ✅ | Anthropic Claude | ✅ |
| AWS Bedrock | ✅ | Google Gemini | ✅ |
| Ollama (local) | ✅ | Mistral AI | ✅ |
| Anyscale | ✅ | | |

### Vector Databases

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 34 additions & 6 deletions unstract/sdk1/src/unstract/sdk1/adapters/base1.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
from abc import ABC, abstractmethod
from importlib import import_module
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, ClassVar

if TYPE_CHECKING:
from typing import Any
Expand Down Expand Up @@ -76,6 +76,8 @@ def register_adapters(adapters: dict[str, dict[str, "Any"]], adapter_type: str)
class BaseAdapter(ABC):
"""Adapter base class for compatibility with all SDK v1 providers."""

SCHEMA_PATH: ClassVar[str | None] = None

@staticmethod
@abstractmethod
def get_id() -> str:
Expand Down Expand Up @@ -103,11 +105,13 @@ def get_icon() -> str:

@classmethod
def get_json_schema(cls) -> str:
schema_path = (
f"{os.path.dirname(__file__)}/"
f"{cls.get_adapter_type().name.lower()}1/static/"
f"{cls.get_provider()}.json"
)
schema_path = cls.SCHEMA_PATH
if schema_path is None:
schema_path = (
f"{os.path.dirname(__file__)}/"
f"{cls.get_adapter_type().name.lower()}1/static/"
f"{cls.get_provider()}.json"
)
with open(schema_path) as f:
return f.read()

Expand Down Expand Up @@ -225,6 +229,30 @@ def validate_model(adapter_metadata: dict[str, "Any"]) -> str:
return f"openai/{model}"


class OpenAICompatibleLLMParameters(BaseChatCompletionParameters):
"""See https://docs.litellm.ai/docs/providers/openai_compatible/."""

api_key: str | None = None
api_base: str

@staticmethod
def validate(adapter_metadata: dict[str, "Any"]) -> dict[str, "Any"]:
adapter_metadata["model"] = OpenAICompatibleLLMParameters.validate_model(
adapter_metadata
)
api_key = adapter_metadata.get("api_key")
if isinstance(api_key, str) and not api_key.strip():
adapter_metadata["api_key"] = None
return OpenAICompatibleLLMParameters(**adapter_metadata).model_dump()
Comment thread
greptile-apps[bot] marked this conversation as resolved.

@staticmethod
def validate_model(adapter_metadata: dict[str, "Any"]) -> str:
model = adapter_metadata.get("model", "")
if model.startswith("custom_openai/"):
return model
return f"custom_openai/{model}"


class AzureOpenAILLMParameters(BaseChatCompletionParameters):
"""See https://docs.litellm.ai/docs/providers/azure/#completion---using-azure_ad_token-api_base-api_version."""

Expand Down
2 changes: 2 additions & 0 deletions unstract/sdk1/src/unstract/sdk1/adapters/llm1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from unstract.sdk1.adapters.llm1.bedrock import AWSBedrockLLMAdapter
from unstract.sdk1.adapters.llm1.ollama import OllamaLLMAdapter
from unstract.sdk1.adapters.llm1.openai import OpenAILLMAdapter
from unstract.sdk1.adapters.llm1.openai_compatible import OpenAICompatibleLLMAdapter
from unstract.sdk1.adapters.llm1.vertexai import VertexAILLMAdapter

adapters: dict[str, dict[str, Any]] = {}
Expand All @@ -22,5 +23,6 @@
"AzureOpenAILLMAdapter",
"OllamaLLMAdapter",
"OpenAILLMAdapter",
"OpenAICompatibleLLMAdapter",
"VertexAILLMAdapter",
]
51 changes: 51 additions & 0 deletions unstract/sdk1/src/unstract/sdk1/adapters/llm1/openai_compatible.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
from typing import Any, ClassVar

from unstract.sdk1.adapters.base1 import BaseAdapter, OpenAICompatibleLLMParameters
from unstract.sdk1.adapters.enums import AdapterTypes

DESCRIPTION = (
"Adapter for servers that implement the OpenAI Chat Completions API "
"(vLLM, LM Studio, self-hosted gateways, and third-party providers). "
"Use OpenAI for the official OpenAI service."
)


class OpenAICompatibleLLMAdapter(OpenAICompatibleLLMParameters, BaseAdapter):
SCHEMA_PATH: ClassVar[str] = os.path.join(
os.path.dirname(__file__), "static", "openai_compatible.json"
)

@staticmethod
def get_id() -> str:
return "openaicompatible|b6d10f33-2c41-49fc-a8c2-58d2b247fc09"

@staticmethod
def get_metadata() -> dict[str, Any]:
return {
"name": "OpenAI Compatible",
"version": "1.0.0",
"adapter": OpenAICompatibleLLMAdapter,
"description": DESCRIPTION,
"is_active": True,
}

@staticmethod
def get_name() -> str:
return "OpenAI Compatible"

@staticmethod
def get_description() -> str:
return DESCRIPTION

@staticmethod
def get_provider() -> str:
return "custom_openai"

@staticmethod
def get_icon() -> str:
return "/icons/adapter-icons/OpenAICompatible.png"

@staticmethod
def get_adapter_type() -> AdapterTypes:
return AdapterTypes.LLM
Comment thread
hari-kuriakose marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"title": "OpenAI Compatible",
"type": "object",
"required": [
"adapter_name",
"api_base"
],
Comment thread
jimmyzhuu marked this conversation as resolved.
"properties": {
"adapter_name": {
"type": "string",
"title": "Name",
"default": "",
"description": "Provide a unique name for this adapter instance. Example: compatible-gateway-1"
},
"api_key": {
"type": [
"string",
"null"
],
"title": "API Key",
"format": "password",
"description": "API key for your OpenAI-compatible endpoint. Leave empty if the endpoint does not require one."
Comment thread
jaseemjaskp marked this conversation as resolved.
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"model": {
"type": "string",
"title": "Model",
"description": "The model name expected by your OpenAI-compatible endpoint. Examples: gateway-model, gpt-4o-mini, openai/gpt-4o"
},
"api_base": {
"type": "string",
"format": "url",
"title": "API Base",
"description": "Base URL for the OpenAI-compatible endpoint. Examples: https://gateway.example.com/v1, https://llm.example.net/openai/v1"
},
"max_tokens": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Maximum Output Tokens",
"default": 4096,
"description": "Maximum number of output tokens to limit LLM replies. Leave it empty to use the provider default."
},
"max_retries": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Max Retries",
"default": 5,
"description": "The maximum number of times to retry a request if it fails."
},
"timeout": {
"type": "number",
"minimum": 0,
"multipleOf": 1,
"title": "Timeout",
"default": 900,
"description": "Timeout in seconds."
}
}
}
16 changes: 14 additions & 2 deletions unstract/sdk1/src/unstract/sdk1/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,10 +652,22 @@ def _record_usage(
usage: Mapping[str, int] | None,
llm_api: str,
) -> None:
prompt_tokens = token_counter(model=model, messages=messages)
usage_data: Mapping[str, int] = usage or {}
prompt_tokens = usage_data.get("prompt_tokens")
if prompt_tokens is None:
try:
prompt_tokens = token_counter(model=model, messages=messages)
except Exception as e:
prompt_tokens = 0
logger.warning(
"[sdk1][LLM][%s][%s] Failed to estimate prompt tokens; "
"recording 0 prompt tokens for usage audit: %s",
model,
llm_api,
e,
)
Comment on lines +656 to +668
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pk-zipstack @johnyrahul is this a safe change?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kept this scoped to usage accounting only. It still uses provider-reported prompt tokens when available, only estimates when they are missing, and the fallback paths are covered by tests now.

all_tokens = TokenCounterCompat(
prompt_tokens=usage_data.get("prompt_tokens", 0),
prompt_tokens=prompt_tokens or 0,
completion_tokens=usage_data.get("completion_tokens", 0),
total_tokens=usage_data.get("total_tokens", 0),
)
Comment on lines +656 to 673
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Silent zero-token recording risks corrupting billing/usage audit data.

When token_counter raises (e.g., unmapped custom models in LiteLLM's metadata), the code records prompt_tokens=0 into Audit().push_usage_data. Per unstract/sdk1/src/unstract/sdk1/utils/common.py:114-145 and unstract/sdk1/src/unstract/sdk1/audit.py:85-98, that zero flows directly to the platform's usage record with no sentinel/flag distinguishing "unknown" from "actually zero." For long-running workloads against an OpenAI-compatible endpoint that doesn't return usage.prompt_tokens, this could silently understate prompt-token consumption in cost attribution and analytics.

Consider one of:

  • Tagging the audit payload with an estimation_failed / prompt_tokens_source flag so downstream consumers can distinguish missing data from genuinely zero usage.
  • Narrowing the except (e.g., except (KeyError, ValueError, litellm.exceptions.*)) so truly unexpected errors still propagate instead of being swallowed.
  • Emitting a metric/counter when this fallback triggers so ops can detect silent drift.

A warning log alone is easy to miss in aggregated usage reports. This answers the question raised in the prior review thread on this range.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@unstract/sdk1/src/unstract/sdk1/llm.py` around lines 543 - 560, The current
catch in the prompt token estimation around token_counter (used when building
TokenCounterCompat) silently sets prompt_tokens=0; update this to (1) narrow the
except to only expected errors from the estimator (e.g., KeyError/ValueError and
litellm-specific exceptions raised by token_counter) so unexpected errors still
propagate, and (2) add a sentinel field to the usage payload (e.g.,
prompt_tokens_source or estimation_failed) before calling
Audit().push_usage_data to mark that prompt tokens were estimated/failed, and/or
increment an ops metric/counter when the fallback path occurs; reference the
token_counter call, TokenCounterCompat construction, Audit().push_usage_data,
and the existing logger to emit a clear warning and metric.

Expand Down
Loading