Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,12 @@ These endpoints mimic the OpenAI API structure.
| Endpoint | Method | Description |
| --------------------------- | ------ | --------------------------------------------------------- |
| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
| `POST /v1/responses` | `POST` | Creates a model response using the Responses API format. |
| `GET /v1/models` | `GET` | Lists the currently available models. |
| `POST /v1/embeddings` | `POST` | Creates an embedding vector representing the input text. |

For GPT-family models, `/v1/chat/completions` is a compatibility layer. If you need native Responses features and the best chance of preserving model-specific reasoning metadata, prefer `POST /v1/responses`.

### Anthropic Compatible Endpoints

These endpoints are designed to be compatible with the Anthropic Messages API.
Expand Down
52 changes: 52 additions & 0 deletions src/lib/model-level.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
export const MODEL_LEVELS = ["low", "medium", "high", "xhigh"] as const

export type ModelLevel = (typeof MODEL_LEVELS)[number]

const CLAUDE_MODEL_LEVEL_VARIANTS = {
"claude-opus-4.6": ["low", "medium", "high"],
"claude-opus-4.6-fast": ["low", "medium", "high"],
"claude-sonnet-4.6": ["low", "medium", "high"],
} as const satisfies Record<string, ReadonlyArray<ModelLevel>>

export const parseModelNameWithLevel = (
model: string,
): {
baseModel: string
level: ModelLevel | undefined
} => {
const match = model.match(/^(.+)\((low|medium|high|xhigh)\)$/)
if (!match) {
return {
baseModel: model,
level: undefined,
}
}

return {
baseModel: match[1],
level: match[2] as ModelLevel,
}
}

export const isGptResponsesModel = (model: string): boolean =>
model.startsWith("gpt-")

export const supportsGptReasoningEffort = (model: string): boolean =>
model.startsWith("gpt-5")

export const getModelLevelsForModel = (
model: string,
): ReadonlyArray<ModelLevel> | undefined => {
if (supportsGptReasoningEffort(model)) {
return MODEL_LEVELS
}

return CLAUDE_MODEL_LEVEL_VARIANTS[
model as keyof typeof CLAUDE_MODEL_LEVEL_VARIANTS
]
}

export const isClaudeThinkingModel = (model: string): boolean =>
model === "claude-opus-4.6"
|| model === "claude-opus-4.6-fast"
|| model === "claude-sonnet-4.6"
47 changes: 45 additions & 2 deletions src/routes/chat-completions/handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import consola from "consola"
import { streamSSE, type SSEMessage } from "hono/streaming"

import { awaitApproval } from "~/lib/approval"
import { isGptResponsesModel, parseModelNameWithLevel } from "~/lib/model-level"
import { checkRateLimit } from "~/lib/rate-limit"
import { state } from "~/lib/state"
import { getTokenCount } from "~/lib/tokenizer"
Expand All @@ -12,17 +13,29 @@ import {
createChatCompletions,
type ChatCompletionResponse,
type ChatCompletionsPayload,
normalizeChatCompletionsPayloadModel,
} from "~/services/copilot/create-chat-completions"
import {
createResponses,
type ResponsesApiResponse,
} from "~/services/copilot/create-responses"

import {
translateChatCompletionsToResponses,
translateResponsesStreamToChatStream,
translateResponsesToChatCompletions,
} from "./responses-translation"

export async function handleCompletion(c: Context) {
await checkRateLimit(state)

let payload = await c.req.json<ChatCompletionsPayload>()
const { baseModel } = parseModelNameWithLevel(payload.model)
consola.debug("Request payload:", JSON.stringify(payload).slice(-400))

// Find the selected model
const selectedModel = state.models?.data.find(
(model) => model.id === payload.model,
(model) => model.id === baseModel,
)

// Calculate and display token count
Expand All @@ -47,7 +60,33 @@ export async function handleCompletion(c: Context) {
consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
}

const response = await createChatCompletions(payload)
const normalizedPayload = normalizeChatCompletionsPayloadModel(payload)

if (isGptResponsesModel(baseModel)) {
const responsesPayload =
translateChatCompletionsToResponses(normalizedPayload)
const responses = await createResponses(responsesPayload)

if (isNonStreamingResponse(responses)) {
const completionResponse = translateResponsesToChatCompletions(responses)
consola.debug(
"GPT translated response:",
JSON.stringify(completionResponse).slice(-400),
)
return c.json(completionResponse)
}

return streamSSE(c, async (stream) => {
for await (const chunk of translateResponsesStreamToChatStream(
responses,
normalizedPayload.model,
)) {
await stream.writeSSE(chunk)
}
})
}

const response = await createChatCompletions(normalizedPayload)

if (isNonStreaming(response)) {
consola.debug("Non-streaming response:", JSON.stringify(response))
Expand All @@ -63,6 +102,10 @@ export async function handleCompletion(c: Context) {
})
}

const isNonStreamingResponse = (
response: Awaited<ReturnType<typeof createResponses>>,
): response is ResponsesApiResponse => !(Symbol.asyncIterator in response)

const isNonStreaming = (
response: Awaited<ReturnType<typeof createChatCompletions>>,
): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
Loading