-
Notifications
You must be signed in to change notification settings - Fork 623
Add a dedicated OpenAI-compatible LLM adapter #1895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8b97716
5090773
f1d6dff
b4d0af1
d3e1cad
2cb187a
f6a2a7d
d9dcdfa
37e8afb
633112b
8e78618
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| import os | ||
| from typing import Any, ClassVar | ||
|
|
||
| from unstract.sdk1.adapters.base1 import BaseAdapter, OpenAICompatibleLLMParameters | ||
| from unstract.sdk1.adapters.enums import AdapterTypes | ||
|
|
||
| DESCRIPTION = ( | ||
| "Adapter for servers that implement the OpenAI Chat Completions API " | ||
| "(vLLM, LM Studio, self-hosted gateways, and third-party providers). " | ||
| "Use OpenAI for the official OpenAI service." | ||
| ) | ||
|
|
||
|
|
||
| class OpenAICompatibleLLMAdapter(OpenAICompatibleLLMParameters, BaseAdapter): | ||
| SCHEMA_PATH: ClassVar[str] = os.path.join( | ||
| os.path.dirname(__file__), "static", "openai_compatible.json" | ||
| ) | ||
|
|
||
| @staticmethod | ||
| def get_id() -> str: | ||
| return "openaicompatible|b6d10f33-2c41-49fc-a8c2-58d2b247fc09" | ||
|
|
||
| @staticmethod | ||
| def get_metadata() -> dict[str, Any]: | ||
| return { | ||
| "name": "OpenAI Compatible", | ||
| "version": "1.0.0", | ||
| "adapter": OpenAICompatibleLLMAdapter, | ||
| "description": DESCRIPTION, | ||
| "is_active": True, | ||
| } | ||
|
|
||
| @staticmethod | ||
| def get_name() -> str: | ||
| return "OpenAI Compatible" | ||
|
|
||
| @staticmethod | ||
| def get_description() -> str: | ||
| return DESCRIPTION | ||
|
|
||
| @staticmethod | ||
| def get_provider() -> str: | ||
| return "custom_openai" | ||
|
|
||
| @staticmethod | ||
| def get_icon() -> str: | ||
| return "/icons/adapter-icons/OpenAICompatible.png" | ||
|
|
||
| @staticmethod | ||
| def get_adapter_type() -> AdapterTypes: | ||
| return AdapterTypes.LLM |
|
hari-kuriakose marked this conversation as resolved.
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| { | ||
| "title": "OpenAI Compatible", | ||
| "type": "object", | ||
| "required": [ | ||
| "adapter_name", | ||
| "api_base" | ||
| ], | ||
|
jimmyzhuu marked this conversation as resolved.
|
||
| "properties": { | ||
| "adapter_name": { | ||
| "type": "string", | ||
| "title": "Name", | ||
| "default": "", | ||
| "description": "Provide a unique name for this adapter instance. Example: compatible-gateway-1" | ||
| }, | ||
| "api_key": { | ||
| "type": [ | ||
| "string", | ||
| "null" | ||
| ], | ||
| "title": "API Key", | ||
| "format": "password", | ||
| "description": "API key for your OpenAI-compatible endpoint. Leave empty if the endpoint does not require one." | ||
|
jaseemjaskp marked this conversation as resolved.
|
||
| }, | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| "model": { | ||
| "type": "string", | ||
| "title": "Model", | ||
| "description": "The model name expected by your OpenAI-compatible endpoint. Examples: gateway-model, gpt-4o-mini, openai/gpt-4o" | ||
| }, | ||
| "api_base": { | ||
| "type": "string", | ||
| "format": "url", | ||
| "title": "API Base", | ||
| "description": "Base URL for the OpenAI-compatible endpoint. Examples: https://gateway.example.com/v1, https://llm.example.net/openai/v1" | ||
| }, | ||
| "max_tokens": { | ||
| "type": "number", | ||
| "minimum": 0, | ||
| "multipleOf": 1, | ||
| "title": "Maximum Output Tokens", | ||
| "default": 4096, | ||
| "description": "Maximum number of output tokens to limit LLM replies. Leave it empty to use the provider default." | ||
| }, | ||
| "max_retries": { | ||
| "type": "number", | ||
| "minimum": 0, | ||
| "multipleOf": 1, | ||
| "title": "Max Retries", | ||
| "default": 5, | ||
| "description": "The maximum number of times to retry a request if it fails." | ||
| }, | ||
| "timeout": { | ||
| "type": "number", | ||
| "minimum": 0, | ||
| "multipleOf": 1, | ||
| "title": "Timeout", | ||
| "default": 900, | ||
| "description": "Timeout in seconds." | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -652,10 +652,22 @@ def _record_usage( | |
| usage: Mapping[str, int] | None, | ||
| llm_api: str, | ||
| ) -> None: | ||
| prompt_tokens = token_counter(model=model, messages=messages) | ||
| usage_data: Mapping[str, int] = usage or {} | ||
| prompt_tokens = usage_data.get("prompt_tokens") | ||
| if prompt_tokens is None: | ||
| try: | ||
| prompt_tokens = token_counter(model=model, messages=messages) | ||
| except Exception as e: | ||
| prompt_tokens = 0 | ||
| logger.warning( | ||
| "[sdk1][LLM][%s][%s] Failed to estimate prompt tokens; " | ||
| "recording 0 prompt tokens for usage audit: %s", | ||
| model, | ||
| llm_api, | ||
| e, | ||
| ) | ||
|
Comment on lines
+656
to
+668
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pk-zipstack @johnyrahul is this a safe change?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Kept this scoped to usage accounting only. It still uses provider-reported prompt tokens when available, only estimates when they are missing, and the fallback paths are covered by tests now. |
||
| all_tokens = TokenCounterCompat( | ||
| prompt_tokens=usage_data.get("prompt_tokens", 0), | ||
| prompt_tokens=prompt_tokens or 0, | ||
| completion_tokens=usage_data.get("completion_tokens", 0), | ||
| total_tokens=usage_data.get("total_tokens", 0), | ||
| ) | ||
|
Comment on lines
+656
to
673
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Silent zero-token recording risks corrupting billing/usage audit data. When Consider one of:
A warning log alone is easy to miss in aggregated usage reports. This answers the question raised in the prior review thread on this range. 🤖 Prompt for AI Agents |
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.