Skip to content

Commit 8dfad5a

Browse files
committed
A bunch of improvements around extracting AI data
New model admin dashboard, test model strings, add and edit models, view missing models and easily add them. Also extract cost data from ai gateway provider response metadata, better enrichment.
1 parent 7d3aac9 commit 8dfad5a

17 files changed

+3747
-42
lines changed

apps/webapp/app/components/runs/v3/ai/AIModelSummary.tsx

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ export function AITagsRow({ aiData }: { aiData: AISpanData }) {
66
<div className="flex flex-wrap items-center gap-1.5 py-2.5">
77
<Pill>{aiData.model}</Pill>
88
{aiData.provider !== "unknown" && <Pill variant="dimmed">{aiData.provider}</Pill>}
9+
{aiData.resolvedProvider && (
10+
<Pill variant="dimmed">via {aiData.resolvedProvider}</Pill>
11+
)}
912
{aiData.finishReason && <Pill variant="dimmed">{aiData.finishReason}</Pill>}
1013
{aiData.serviceTier && <Pill variant="dimmed">tier: {aiData.serviceTier}</Pill>}
1114
{aiData.toolChoice && <Pill variant="dimmed">tools: {aiData.toolChoice}</Pill>}
@@ -38,7 +41,14 @@ export function AIStatsSummary({ aiData }: { aiData: AISpanData }) {
3841
<MetricRow label="Input" value={aiData.inputTokens.toLocaleString()} unit="tokens" />
3942
<MetricRow label="Output" value={aiData.outputTokens.toLocaleString()} unit="tokens" />
4043
{aiData.cachedTokens != null && aiData.cachedTokens > 0 && (
41-
<MetricRow label="Cached" value={aiData.cachedTokens.toLocaleString()} unit="tokens" />
44+
<MetricRow label="Cache read" value={aiData.cachedTokens.toLocaleString()} unit="tokens" />
45+
)}
46+
{aiData.cacheCreationTokens != null && aiData.cacheCreationTokens > 0 && (
47+
<MetricRow
48+
label="Cache write"
49+
value={aiData.cacheCreationTokens.toLocaleString()}
50+
unit="tokens"
51+
/>
4252
)}
4353
{aiData.reasoningTokens != null && aiData.reasoningTokens > 0 && (
4454
<MetricRow

apps/webapp/app/components/runs/v3/ai/extractAISpanData.ts

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,12 @@ export function extractAISpanData(
3434
const model = str(gResponse.model) ?? str(gRequest.model) ?? str(aiModel.id);
3535
if (!model) return undefined;
3636

37-
// Prefer ai.usage (richer) over gen_ai.usage
38-
const inputTokens = num(aiUsage.inputTokens) ?? num(gUsage.input_tokens) ?? 0;
39-
const outputTokens = num(aiUsage.outputTokens) ?? num(gUsage.output_tokens) ?? 0;
37+
// Prefer ai.usage (richer) over gen_ai.usage.
38+
// Gateway/some providers emit promptTokens/completionTokens instead of inputTokens/outputTokens.
39+
const inputTokens =
40+
num(aiUsage.inputTokens) ?? num(aiUsage.promptTokens) ?? num(gUsage.input_tokens) ?? 0;
41+
const outputTokens =
42+
num(aiUsage.outputTokens) ?? num(aiUsage.completionTokens) ?? num(gUsage.output_tokens) ?? 0;
4043
const totalTokens = num(aiUsage.totalTokens) ?? inputTokens + outputTokens;
4144

4245
const tokensPerSecond =
@@ -56,6 +59,7 @@ export function extractAISpanData(
5659
operationName: str(gOperation.name) ?? str(ai.operationId) ?? "",
5760
finishReason: str(aiResponse.finishReason),
5861
serviceTier: providerMeta?.serviceTier,
62+
resolvedProvider: providerMeta?.resolvedProvider,
5963
toolChoice: parseToolChoice(aiPrompt.toolChoice),
6064
toolCount: toolDefs?.length,
6165
messageCount: countMessages(aiPrompt.messages),
@@ -64,14 +68,16 @@ export function extractAISpanData(
6468
outputTokens,
6569
totalTokens,
6670
cachedTokens: num(aiUsage.cachedInputTokens) ?? num(gUsage.cache_read_input_tokens),
71+
cacheCreationTokens:
72+
num(aiUsage.cacheCreationInputTokens) ?? num(gUsage.cache_creation_input_tokens),
6773
reasoningTokens: num(aiUsage.reasoningTokens) ?? num(gUsage.reasoning_tokens),
6874
tokensPerSecond,
6975
msToFirstChunk: num(aiResponse.msToFirstChunk),
7076
durationMs,
7177
inputCost: num(triggerLlm.input_cost),
7278
outputCost: num(triggerLlm.output_cost),
7379
totalCost: num(triggerLlm.total_cost),
74-
responseText: str(aiResponse.text) || undefined,
80+
responseText: str(aiResponse.text) || str(aiResponse.object) || undefined,
7581
toolDefinitions: toolDefs,
7682
items: buildDisplayItems(aiPrompt.messages, aiResponse.toolCalls, toolDefs),
7783
};
@@ -417,18 +423,38 @@ function parseToolDefinitions(raw: unknown): ToolDefinition[] | undefined {
417423

418424
function parseProviderMetadata(
419425
raw: unknown
420-
): { serviceTier?: string } | undefined {
426+
): { serviceTier?: string; resolvedProvider?: string; gatewayCost?: string } | undefined {
421427
if (typeof raw !== "string") return undefined;
422428
try {
423429
const parsed = JSON.parse(raw);
424430
if (!parsed || typeof parsed !== "object") return undefined;
425431

432+
let serviceTier: string | undefined;
433+
let resolvedProvider: string | undefined;
434+
let gatewayCost: string | undefined;
435+
426436
// Anthropic: { anthropic: { usage: { service_tier: "standard" } } }
427-
const anthropic = rec(parsed.anthropic ?? parsed);
428-
const usage = rec(anthropic.usage);
429-
const serviceTier = str(usage.service_tier);
437+
const anthropic = rec(parsed.anthropic);
438+
serviceTier = str(rec(anthropic.usage).service_tier);
439+
440+
// Azure/OpenAI: { azure: { serviceTier: "default" } } or { openai: { serviceTier: "..." } }
441+
if (!serviceTier) {
442+
serviceTier = str(rec(parsed.azure).serviceTier) ?? str(rec(parsed.openai).serviceTier);
443+
}
444+
445+
// Gateway: { gateway: { routing: { finalProvider, resolvedProvider }, cost } }
446+
const gateway = rec(parsed.gateway);
447+
const routing = rec(gateway.routing);
448+
resolvedProvider = str(routing.finalProvider) ?? str(routing.resolvedProvider);
449+
gatewayCost = str(gateway.cost);
450+
451+
// OpenRouter: { openrouter: { provider: "xAI" } }
452+
if (!resolvedProvider) {
453+
resolvedProvider = str(rec(parsed.openrouter).provider);
454+
}
430455

431-
return serviceTier ? { serviceTier } : undefined;
456+
if (!serviceTier && !resolvedProvider && !gatewayCost) return undefined;
457+
return { serviceTier, resolvedProvider, gatewayCost };
432458
} catch {
433459
return undefined;
434460
}

apps/webapp/app/components/runs/v3/ai/types.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ export type AISpanData = {
6666
// Categorical tags
6767
finishReason?: string;
6868
serviceTier?: string;
69+
/** Resolved downstream provider for gateway/openrouter spans (e.g. "xAI", "mistral") */
70+
resolvedProvider?: string;
6971
toolChoice?: string;
7072
toolCount?: number;
7173
messageCount?: number;
@@ -77,6 +79,7 @@ export type AISpanData = {
7779
outputTokens: number;
7880
totalTokens: number;
7981
cachedTokens?: number;
82+
cacheCreationTokens?: number;
8083
reasoningTokens?: number;
8184

8285
// Performance

apps/webapp/app/env.server.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,12 @@ const EnvironmentSchema = z
12471247
// Metric widget concurrency limits
12481248
METRIC_WIDGET_DEFAULT_ORG_CONCURRENCY_LIMIT: z.coerce.number().int().default(30),
12491249

1250+
// Admin ClickHouse URL (for admin dashboard queries like missing models)
1251+
ADMIN_CLICKHOUSE_URL: z
1252+
.string()
1253+
.optional()
1254+
.transform((v) => v ?? process.env.CLICKHOUSE_URL),
1255+
12501256
EVENTS_CLICKHOUSE_URL: z
12511257
.string()
12521258
.optional()
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { type LoaderFunctionArgs, json } from "@remix-run/server-runtime";
2+
import { prisma } from "~/db.server";
3+
import { authenticateApiRequestWithPersonalAccessToken } from "~/services/personalAccessToken.server";
4+
import { getMissingLlmModels } from "~/services/admin/missingLlmModels.server";
5+
6+
async function requireAdmin(request: Request) {
7+
const authResult = await authenticateApiRequestWithPersonalAccessToken(request);
8+
if (!authResult) {
9+
throw json({ error: "Invalid or Missing API key" }, { status: 401 });
10+
}
11+
12+
const user = await prisma.user.findUnique({ where: { id: authResult.userId } });
13+
if (!user?.admin) {
14+
throw json({ error: "You must be an admin to perform this action" }, { status: 403 });
15+
}
16+
17+
return user;
18+
}
19+
20+
export async function loader({ request }: LoaderFunctionArgs) {
21+
await requireAdmin(request);
22+
23+
const url = new URL(request.url);
24+
const lookbackHours = parseInt(url.searchParams.get("lookbackHours") ?? "24", 10);
25+
26+
if (isNaN(lookbackHours) || lookbackHours < 1 || lookbackHours > 720) {
27+
return json({ error: "lookbackHours must be between 1 and 720" }, { status: 400 });
28+
}
29+
30+
const models = await getMissingLlmModels({ lookbackHours });
31+
32+
return json({ models, lookbackHours });
33+
}

0 commit comments

Comments
 (0)