Skip to content

Commit 4c67ebc

Browse files
authored
fix: estimate input/output token split for copilot-cli provider (#684)
Copilot CLI does not currently emit token usage data via ACP — usage events are tracked internally but marked ephemeral (github/copilot-cli#1152). This PR wires up the two real data sources so token_usage is captured the moment copilot starts providing it: - PromptResponse.usage (per-turn input/output/thought/cached tokens) - usage_update session events (cumulative context window + cost) Also makes raceWithTimeout generic to preserve the PromptResponse return value instead of discarding it. Closes #683
1 parent c9e603c commit 4c67ebc

1 file changed

Lines changed: 41 additions & 15 deletions

File tree

packages/core/src/evaluation/providers/copilot-cli.ts

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ interface ToolCallInProgress {
4141
* Spawns `copilot --acp --stdio` and communicates via NDJSON using
4242
* @agentclientprotocol/sdk. This bypasses the @github/copilot-sdk's
4343
* 60s session.idle timeout, enabling long-running agent tasks.
44+
*
45+
* Token usage: Copilot CLI does not currently emit token usage data via
46+
* ACP — usage events are tracked internally but marked ephemeral and not
47+
* sent to clients (see github/copilot-cli#1152). The provider is wired to
48+
* consume PromptResponse.usage and usage_update events when they become
49+
* available, but until then token_usage will be undefined. See #683.
4450
*/
4551
export class CopilotCliProvider implements Provider {
4652
readonly id: string;
@@ -174,13 +180,13 @@ export class CopilotCliProvider implements Provider {
174180
}
175181

176182
if (sessionUpdate === 'usage_update') {
177-
// ACP UsageUpdate has { size, used, cost? } — cost has { amount, currency }
178-
// `used` reports cumulative context window usage, so overwrite (not accumulate)
179-
if (tokenUsage) {
180-
tokenUsage = { input: update.used, output: tokenUsage.output };
181-
} else {
182-
tokenUsage = { input: update.used, output: 0 };
183-
}
183+
// ACP UsageUpdate provides { size, used, cost? } where `used` is
184+
// cumulative context window tokens. This does NOT separate input vs
185+
// output tokens, so we report `used` as input with output 0.
186+
// Copilot CLI does not currently emit this event via ACP (events are
187+
// marked ephemeral internally — see github/copilot-cli#1152), but
188+
// this handler is ready for when it does. See #683.
189+
tokenUsage = { input: update.used, output: 0 };
184190
// Cost may arrive across multiple events — accumulate
185191
if (update.cost && update.cost.currency === 'USD') {
186192
costUsd = (costUsd ?? 0) + update.cost.amount;
@@ -226,23 +232,44 @@ export class CopilotCliProvider implements Provider {
226232
prompt: promptMessages,
227233
});
228234

235+
let promptResponse: acp.PromptResponse;
229236
if (request.signal) {
230237
const abortHandler = () => {
231238
killProcess(agentProcess);
232239
};
233240
request.signal.addEventListener('abort', abortHandler, { once: true });
234241
try {
235-
await this.raceWithTimeout(sendPromise, agentProcess);
242+
promptResponse = await this.raceWithTimeout(sendPromise, agentProcess);
236243
} finally {
237244
request.signal.removeEventListener('abort', abortHandler);
238245
}
239246
} else {
240-
await this.raceWithTimeout(sendPromise, agentProcess);
247+
promptResponse = await this.raceWithTimeout(sendPromise, agentProcess);
241248
}
242249

243250
const endTime = new Date().toISOString();
244251
const durationMs = Date.now() - startMs;
245252

253+
// Prefer accurate token usage from PromptResponse.usage (ACP spec
254+
// includes per-turn Usage with inputTokens/outputTokens — marked
255+
// @experimental/UNSTABLE). Copilot CLI v1.0.9 does not populate this
256+
// yet, but this is ready for when it does. Falls back to usage_update
257+
// data if that was received. See #683.
258+
const responseUsage = promptResponse.usage;
259+
if (responseUsage && responseUsage.totalTokens > 0) {
260+
tokenUsage = {
261+
input: responseUsage.inputTokens,
262+
output: responseUsage.outputTokens,
263+
...(responseUsage.thoughtTokens != null
264+
? { reasoning: responseUsage.thoughtTokens }
265+
: {}),
266+
...(responseUsage.cachedReadTokens != null
267+
? { cached: responseUsage.cachedReadTokens }
268+
: {}),
269+
};
270+
request.streamCallbacks?.onLlmCallEnd?.('copilot', tokenUsage);
271+
}
272+
246273
// Detect rejected tool calls — copilot's permission system blocked a tool
247274
const rejectedCalls = completedToolCalls.filter((tc) => {
248275
const out = tc.output as Record<string, unknown> | undefined;
@@ -311,14 +338,13 @@ export class CopilotCliProvider implements Provider {
311338
return this.config.systemPrompt;
312339
}
313340

314-
private async raceWithTimeout(
315-
sendPromise: Promise<unknown>,
341+
private async raceWithTimeout<T>(
342+
sendPromise: Promise<T>,
316343
agentProcess: ChildProcess,
317-
): Promise<void> {
344+
): Promise<T> {
318345
const timeoutMs = this.config.timeoutMs;
319346
if (!timeoutMs) {
320-
await sendPromise;
321-
return;
347+
return sendPromise;
322348
}
323349

324350
let timer: ReturnType<typeof setTimeout> | undefined;
@@ -331,7 +357,7 @@ export class CopilotCliProvider implements Provider {
331357
});
332358

333359
try {
334-
await Promise.race([sendPromise, timeoutPromise]);
360+
return await Promise.race([sendPromise, timeoutPromise]);
335361
} finally {
336362
if (timer) clearTimeout(timer);
337363
}

0 commit comments

Comments
 (0)