ericc-ch · Godzilla675 · Mar 4, 2026 · Mar 13, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/README.md b/README.md
@@ -188,9 +188,12 @@ These endpoints mimic the OpenAI API structure.
 | Endpoint                    | Method | Description                                               |
 | --------------------------- | ------ | --------------------------------------------------------- |
 | `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
+| `POST /v1/responses`        | `POST` | Creates a model response using the Responses API format.  |
 | `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
 | `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
 
+For GPT-family models, `/v1/chat/completions` is a compatibility layer. If you need native Responses features and the best chance of preserving model-specific reasoning metadata, prefer `POST /v1/responses`.
+
 ### Anthropic Compatible Endpoints
 
 These endpoints are designed to be compatible with the Anthropic Messages API.

diff --git a/src/lib/model-level.ts b/src/lib/model-level.ts
@@ -0,0 +1,52 @@
+export const MODEL_LEVELS = ["low", "medium", "high", "xhigh"] as const
+
+export type ModelLevel = (typeof MODEL_LEVELS)[number]
+
+const CLAUDE_MODEL_LEVEL_VARIANTS = {
+  "claude-opus-4.6": ["low", "medium", "high"],
+  "claude-opus-4.6-fast": ["low", "medium", "high"],
+  "claude-sonnet-4.6": ["low", "medium", "high"],
+} as const satisfies Record<string, ReadonlyArray<ModelLevel>>
+
+export const parseModelNameWithLevel = (
+  model: string,
+): {
+  baseModel: string
+  level: ModelLevel | undefined
+} => {
+  const match = model.match(/^(.+)\((low|medium|high|xhigh)\)$/)
+  if (!match) {
+    return {
+      baseModel: model,
+      level: undefined,
+    }
+  }
+
+  return {
+    baseModel: match[1],
+    level: match[2] as ModelLevel,
+  }
+}
+
+export const isGptResponsesModel = (model: string): boolean =>
+  model.startsWith("gpt-")
+
+export const supportsGptReasoningEffort = (model: string): boolean =>
+  model.startsWith("gpt-5")
+
+export const getModelLevelsForModel = (
+  model: string,
+): ReadonlyArray<ModelLevel> | undefined => {
+  if (supportsGptReasoningEffort(model)) {
+    return MODEL_LEVELS
+  }
+
+  return CLAUDE_MODEL_LEVEL_VARIANTS[
+    model as keyof typeof CLAUDE_MODEL_LEVEL_VARIANTS
+  ]
+}
+
+export const isClaudeThinkingModel = (model: string): boolean =>
+  model === "claude-opus-4.6"
+  || model === "claude-opus-4.6-fast"
+  || model === "claude-sonnet-4.6"
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
@@ -4,6 +4,7 @@ import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { isGptResponsesModel, parseModelNameWithLevel } from "~/lib/model-level"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
@@ -12,17 +13,29 @@ import {
   createChatCompletions,
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
+  normalizeChatCompletionsPayloadModel,
 } from "~/services/copilot/create-chat-completions"
+import {
+  createResponses,
+  type ResponsesApiResponse,
+} from "~/services/copilot/create-responses"
+
+import {
+  translateChatCompletionsToResponses,
+  translateResponsesStreamToChatStream,
+  translateResponsesToChatCompletions,
+} from "./responses-translation"
 
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   let payload = await c.req.json<ChatCompletionsPayload>()
+  const { baseModel } = parseModelNameWithLevel(payload.model)
   consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
   // Find the selected model
   const selectedModel = state.models?.data.find(
-    (model) => model.id === payload.model,
+    (model) => model.id === baseModel,
   )
 
   // Calculate and display token count
@@ -47,7 +60,33 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
-  const response = await createChatCompletions(payload)
+  const normalizedPayload = normalizeChatCompletionsPayloadModel(payload)
+
+  if (isGptResponsesModel(baseModel)) {
+    const responsesPayload =
+      translateChatCompletionsToResponses(normalizedPayload)
+    const responses = await createResponses(responsesPayload)
+
+    if (isNonStreamingResponse(responses)) {
+      const completionResponse = translateResponsesToChatCompletions(responses)
+      consola.debug(
+        "GPT translated response:",
+        JSON.stringify(completionResponse).slice(-400),
+      )
+      return c.json(completionResponse)
+    }
+
+    return streamSSE(c, async (stream) => {
+      for await (const chunk of translateResponsesStreamToChatStream(
+        responses,
+        normalizedPayload.model,
+      )) {
+        await stream.writeSSE(chunk)
+      }
+    })
+  }
+
+  const response = await createChatCompletions(normalizedPayload)
 
   if (isNonStreaming(response)) {
     consola.debug("Non-streaming response:", JSON.stringify(response))
@@ -63,6 +102,10 @@ export async function handleCompletion(c: Context) {
   })
 }
 
+const isNonStreamingResponse = (
+  response: Awaited<ReturnType<typeof createResponses>>,
+): response is ResponsesApiResponse => !(Symbol.asyncIterator in response)
+
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")