Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
293 changes: 162 additions & 131 deletions src/api/providers/claude-code.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,153 +122,184 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler {
// Reset per-request state that we persist into apiConversationHistory
this.lastThinkingSignature = undefined

// Get access token from OAuth manager
const accessToken = await claudeCodeOAuthManager.getAccessToken()

if (!accessToken) {
throw new Error(
const buildNotAuthenticatedError = () =>
new Error(
t("common:errors.claudeCode.notAuthenticated", {
defaultValue:
"Not authenticated with Claude Code. Please sign in using the Claude Code OAuth flow.",
}),
)
}

// Get user email for generating user_id metadata
const email = await claudeCodeOAuthManager.getEmail()

const model = this.getModel()

// Validate that the model ID is a valid ClaudeCodeModelId
const modelId = Object.hasOwn(claudeCodeModels, model.id)
? (model.id as ClaudeCodeModelId)
: claudeCodeDefaultModelId

// Generate user_id metadata in the format required by Claude Code API
const userId = generateUserId(email || undefined)

// Convert OpenAI tools to Anthropic format if provided and protocol is native
// Exclude tools when tool_choice is "none" since that means "don't use tools"
const shouldIncludeNativeTools =
metadata?.tools &&
metadata.tools.length > 0 &&
metadata?.toolProtocol !== "xml" &&
metadata?.tool_choice !== "none"

const anthropicTools = shouldIncludeNativeTools ? convertOpenAIToolsToAnthropic(metadata.tools!) : undefined

const anthropicToolChoice = shouldIncludeNativeTools
? convertOpenAIToolChoice(metadata.tool_choice, metadata.parallelToolCalls)
: undefined

// Determine reasoning effort and thinking configuration
const reasoningLevel = this.getReasoningEffort(model.info)

let thinking: ThinkingConfig
// With interleaved thinking (enabled via beta header), budget_tokens can exceed max_tokens
// as the token limit becomes the entire context window. We use the model's maxTokens.
// See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#interleaved-thinking
const maxTokens = model.info.maxTokens ?? 16384

if (reasoningLevel) {
// Use thinking mode with budget_tokens from config
const config = claudeCodeReasoningConfig[reasoningLevel]
thinking = {
type: "enabled",
budget_tokens: config.budgetTokens,
async function* streamOnce(this: ClaudeCodeHandler, accessToken: string): ApiStream {
// Get user email for generating user_id metadata
const email = await claudeCodeOAuthManager.getEmail()

const model = this.getModel()

// Validate that the model ID is a valid ClaudeCodeModelId
const modelId = Object.hasOwn(claudeCodeModels, model.id)
? (model.id as ClaudeCodeModelId)
: claudeCodeDefaultModelId

// Generate user_id metadata in the format required by Claude Code API
const userId = generateUserId(email || undefined)

// Convert OpenAI tools to Anthropic format if provided and protocol is native
// Exclude tools when tool_choice is "none" since that means "don't use tools"
const shouldIncludeNativeTools =
metadata?.tools &&
metadata.tools.length > 0 &&
metadata?.toolProtocol !== "xml" &&
metadata?.tool_choice !== "none"

const anthropicTools = shouldIncludeNativeTools ? convertOpenAIToolsToAnthropic(metadata.tools!) : undefined

const anthropicToolChoice = shouldIncludeNativeTools
? convertOpenAIToolChoice(metadata.tool_choice, metadata.parallelToolCalls)
: undefined

// Determine reasoning effort and thinking configuration
const reasoningLevel = this.getReasoningEffort(model.info)

let thinking: ThinkingConfig
// With interleaved thinking (enabled via beta header), budget_tokens can exceed max_tokens
// as the token limit becomes the entire context window. We use the model's maxTokens.
// See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#interleaved-thinking
const maxTokens = model.info.maxTokens ?? 16384

if (reasoningLevel) {
// Use thinking mode with budget_tokens from config
const config = claudeCodeReasoningConfig[reasoningLevel]
thinking = {
type: "enabled",
budget_tokens: config.budgetTokens,
}
} else {
// Explicitly disable thinking
thinking = { type: "disabled" }
}
} else {
// Explicitly disable thinking
thinking = { type: "disabled" }
}

// Create streaming request using OAuth
const stream = createStreamingMessage({
accessToken,
model: modelId,
systemPrompt,
messages,
maxTokens,
thinking,
tools: anthropicTools,
toolChoice: anthropicToolChoice,
metadata: {
user_id: userId,
},
})

// Track usage for cost calculation
let inputTokens = 0
let outputTokens = 0
let cacheReadTokens = 0
let cacheWriteTokens = 0

for await (const chunk of stream) {
switch (chunk.type) {
case "text":
yield {
type: "text",
text: chunk.text,
}
break

case "reasoning":
yield {
type: "reasoning",
text: chunk.text,
// Create streaming request using OAuth
const stream = createStreamingMessage({
accessToken,
model: modelId,
systemPrompt,
messages,
maxTokens,
thinking,
tools: anthropicTools,
toolChoice: anthropicToolChoice,
metadata: {
user_id: userId,
},
})

// Track usage for cost calculation
let inputTokens = 0
let outputTokens = 0
let cacheReadTokens = 0
let cacheWriteTokens = 0

for await (const chunk of stream) {
switch (chunk.type) {
case "text":
yield {
type: "text",
text: chunk.text,
}
break

case "reasoning":
yield {
type: "reasoning",
text: chunk.text,
}
break

case "thinking_complete":
// Capture the signature for persistence in api_conversation_history
// This enables tool use continuations where thinking blocks must be passed back
if (chunk.signature) {
this.lastThinkingSignature = chunk.signature
}
// Emit a complete thinking block with signature
// This is critical for interleaved thinking with tool use
// The signature must be included when passing thinking blocks back to the API
yield {
type: "reasoning",
text: chunk.thinking,
signature: chunk.signature,
}
break

case "tool_call_partial":
yield {
type: "tool_call_partial",
index: chunk.index,
id: chunk.id,
name: chunk.name,
arguments: chunk.arguments,
}
break

case "usage": {
inputTokens = chunk.inputTokens
outputTokens = chunk.outputTokens
cacheReadTokens = chunk.cacheReadTokens || 0
cacheWriteTokens = chunk.cacheWriteTokens || 0

// Claude Code is subscription-based, no per-token cost
const usageChunk: ApiStreamUsageChunk = {
type: "usage",
inputTokens,
outputTokens,
cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
totalCost: 0,
}

yield usageChunk
break
}
break

case "thinking_complete":
// Capture the signature for persistence in api_conversation_history
// This enables tool use continuations where thinking blocks must be passed back
if (chunk.signature) {
this.lastThinkingSignature = chunk.signature
}
// Emit a complete thinking block with signature
// This is critical for interleaved thinking with tool use
// The signature must be included when passing thinking blocks back to the API
yield {
type: "reasoning",
text: chunk.thinking,
signature: chunk.signature,
}
break

case "tool_call_partial":
yield {
type: "tool_call_partial",
index: chunk.index,
id: chunk.id,
name: chunk.name,
arguments: chunk.arguments,
}
break
case "error":
throw new Error(chunk.error)
}
}
}

case "usage": {
inputTokens = chunk.inputTokens
outputTokens = chunk.outputTokens
cacheReadTokens = chunk.cacheReadTokens || 0
cacheWriteTokens = chunk.cacheWriteTokens || 0

// Claude Code is subscription-based, no per-token cost
const usageChunk: ApiStreamUsageChunk = {
type: "usage",
inputTokens,
outputTokens,
cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
totalCost: 0,
}
// Get access token from OAuth manager
let accessToken = await claudeCodeOAuthManager.getAccessToken()
if (!accessToken) {
throw buildNotAuthenticatedError()
}

yield usageChunk
break
// Try the request with at most one force-refresh retry on auth failure
for (let attempt = 0; attempt < 2; attempt++) {
try {
yield* streamOnce.call(this, accessToken)
return
} catch (error) {
const message = error instanceof Error ? error.message : String(error)
const isAuthFailure = /unauthorized|invalid token|not authenticated|authentication/i.test(message)

// Only retry on auth failure during first attempt
const canRetry = attempt === 0 && isAuthFailure
if (!canRetry) {
throw error
}

case "error":
throw new Error(chunk.error)
// Force refresh the token for retry
const refreshed = await claudeCodeOAuthManager.forceRefreshAccessToken()
if (!refreshed) {
throw buildNotAuthenticatedError()
}
accessToken = refreshed
}
}

// Unreachable: loop always returns on success or throws on failure
throw buildNotAuthenticatedError()
}

getModel(): { id: string; info: ModelInfo } {
Expand Down
2 changes: 1 addition & 1 deletion src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ export async function activate(context: vscode.ExtensionContext) {
TerminalRegistry.initialize()

// Initialize Claude Code OAuth manager for direct API access.
claudeCodeOAuthManager.initialize(context)
claudeCodeOAuthManager.initialize(context, (message) => outputChannel.appendLine(message))

// Get default commands from configuration.
const defaultCommands = vscode.workspace.getConfiguration(Package.name).get<string[]>("allowedCommands") || []
Expand Down
37 changes: 37 additions & 0 deletions src/integrations/claude-code/__tests__/oauth.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,41 @@ describe("Claude Code OAuth", () => {
expect(CLAUDE_CODE_OAUTH_CONFIG.callbackPort).toBe(54545)
})
})

describe("refresh token behavior", () => {
afterEach(() => {
vi.unstubAllGlobals()
})

test("refresh responses may omit refresh_token (should be tolerated)", async () => {
const { refreshAccessToken } = await import("../oauth")

// Mock fetch to return a refresh response with no refresh_token
const mockFetch = vi.fn().mockResolvedValue(
new Response(
JSON.stringify({
access_token: "new-access",
expires_in: 3600,
// refresh_token intentionally omitted
}),
{ status: 200, headers: { "Content-Type": "application/json" } },
),
)

vi.stubGlobal("fetch", mockFetch)

const creds: ClaudeCodeCredentials = {
type: "claude" as const,
access_token: "old-access",
refresh_token: "old-refresh",
expired: new Date(Date.now() - 1000).toISOString(),
email: "test@example.com",
}

const refreshed = await refreshAccessToken(creds)
expect(refreshed.access_token).toBe("new-access")
expect(refreshed.refresh_token).toBe("old-refresh")
expect(refreshed.email).toBe("test@example.com")
})
})
})
Loading
Loading