feat(webapp): make LLM pricing pub/sub subscription opt-in per process

ericallam · ericallam · commit 1381c9d4e750 · 2026-05-07T15:47:49.000+01:00
Subscribing every replica to the reload channel — admin dashboards,
workers, anything that imports the registry — fans out a full-table
reload across processes that don't actually need real-time pricing
freshness. The 5-minute interval is enough for those.

Add LLM_PRICING_RELOAD_PUBSUB_ENABLED (default true). Set false on
non-OTel services in multi-service deployments so only the
span-ingesting processes subscribe and reload on publish.

Default-true preserves current behavior for single-service self-hosted
deployments without any env tuning.
diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
@@ -1426,6 +1426,12 @@ const EnvironmentSchema = z
     LLM_PRICING_RELOAD_INTERVAL_MS: z.coerce.number().int().default(5 * 60 * 1000), // 5 minutes
     LLM_PRICING_RELOAD_CHANNEL: z.string().default("llm-registry:reload"),
     LLM_PRICING_RELOAD_DEBOUNCE_MS: z.coerce.number().int().default(1000),
+    // Whether to subscribe this process to the LLM_PRICING_RELOAD_CHANNEL.
+    // Defaults to true so single-service self-hosted deployments work without
+    // tuning. In multi-service deployments, set this to false on services
+    // that don't ingest spans (dashboard, workers) — only the OTel-ingesting
+    // services need the registry to reload in real time.
+    LLM_PRICING_RELOAD_PUBSUB_ENABLED: BoolEnv.default(true),
     LLM_PRICING_SEED_ON_STARTUP: BoolEnv.default(false),
     LLM_PRICING_READY_TIMEOUT_MS: z.coerce.number().int().default(500),
     LLM_METRICS_BATCH_SIZE: z.coerce.number().int().default(5000),
diff --git a/apps/webapp/app/v3/llmPricingRegistry.server.ts b/apps/webapp/app/v3/llmPricingRegistry.server.ts
@@ -37,62 +37,69 @@ export const llmPricingRegistry = singleton("llmPricingRegistry", () => {
     });
   }, reloadInterval);
 
-  // Pub/sub reload — billing's LLM registry worker publishes on this channel
-  // immediately after writing new/changed model rows, so all webapp pods see
-  // updates within ~1s instead of waiting for the next interval tick.
-  const subscriber = createRedisClient("llm-pricing:subscriber", {
-    keyPrefix: "llm-pricing:subscriber:",
-    host: env.COMMON_WORKER_REDIS_HOST,
-    port: env.COMMON_WORKER_REDIS_PORT,
-    username: env.COMMON_WORKER_REDIS_USERNAME,
-    password: env.COMMON_WORKER_REDIS_PASSWORD,
-    tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true",
-    clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1",
-  });
+  // Pub/sub reload is opt-in per process. Without it, the registry stays
+  // accurate via the existing 5-minute interval. In multi-service deployments
+  // we only want the OTel-ingesting services subscribed — the dashboard and
+  // worker services don't need real-time pricing freshness and shouldn't pile
+  // onto each publish with a full-table reload.
+  if (env.LLM_PRICING_RELOAD_PUBSUB_ENABLED) {
+    const subscriber = createRedisClient("llm-pricing:subscriber", {
+      keyPrefix: "llm-pricing:subscriber:",
+      host: env.COMMON_WORKER_REDIS_HOST,
+      port: env.COMMON_WORKER_REDIS_PORT,
+      username: env.COMMON_WORKER_REDIS_USERNAME,
+      password: env.COMMON_WORKER_REDIS_PASSWORD,
+      tlsDisabled: env.COMMON_WORKER_REDIS_TLS_DISABLED === "true",
+      clusterMode: env.COMMON_WORKER_REDIS_CLUSTER_MODE_ENABLED === "1",
+    });
 
-  subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => {
-    logger.warn("Failed to subscribe to LLM pricing reload channel", {
-      channel: env.LLM_PRICING_RELOAD_CHANNEL,
-      error: err instanceof Error ? err.message : String(err),
+    subscriber.subscribe(env.LLM_PRICING_RELOAD_CHANNEL).catch((err) => {
+      logger.warn("Failed to subscribe to LLM pricing reload channel", {
+        channel: env.LLM_PRICING_RELOAD_CHANNEL,
+        error: err instanceof Error ? err.message : String(err),
+      });
     });
-  });
 
-  // Coalesce reload calls so a burst of publishes only triggers one reload.
-  // A reload always fires within LLM_PRICING_RELOAD_DEBOUNCE_MS of the first
-  // publish in a burst; subsequent publishes during that window are no-ops
-  // because the trailing-edge reload will pick up everything when it queries
-  // the DB. Bounds reload rate to at most 1 / debounce-window regardless of
-  // how chatty the publisher is.
-  const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS;
-  let pendingReloadTimer: NodeJS.Timeout | null = null;
-
-  function scheduleReload() {
-    if (pendingReloadTimer) return;
-    pendingReloadTimer = setTimeout(() => {
-      pendingReloadTimer = null;
-      registry.reload().catch((err) => {
-        logger.warn("Failed to reload LLM pricing registry from pub/sub", {
-          error: err instanceof Error ? err.message : String(err),
+    // Coalesce reload calls so a burst of publishes only triggers one
+    // reload. The first publish schedules a reload at
+    // T+LLM_PRICING_RELOAD_DEBOUNCE_MS; subsequent publishes during that
+    // window are no-ops because the trailing reload picks up everything
+    // when it queries the DB. Bounds reload rate to at most 1 per debounce
+    // window regardless of publisher chattiness.
+    const debounceMs = env.LLM_PRICING_RELOAD_DEBOUNCE_MS;
+    let pendingReloadTimer: NodeJS.Timeout | null = null;
+
+    function scheduleReload() {
+      if (pendingReloadTimer) return;
+      pendingReloadTimer = setTimeout(() => {
+        pendingReloadTimer = null;
+        registry.reload().catch((err) => {
+          logger.warn("Failed to reload LLM pricing registry from pub/sub", {
+            error: err instanceof Error ? err.message : String(err),
+          });
         });
-      });
-    }, debounceMs);
-  }
+      }, debounceMs);
+    }
 
-  subscriber.on("message", (channel) => {
-    if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return;
-    scheduleReload();
-  });
+    subscriber.on("message", (channel) => {
+      if (channel !== env.LLM_PRICING_RELOAD_CHANNEL) return;
+      scheduleReload();
+    });
 
-  signalsEmitter.on("SIGTERM", () => {
-    clearInterval(interval);
-    if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
-    void subscriber.quit().catch(() => {});
-  });
-  signalsEmitter.on("SIGINT", () => {
-    clearInterval(interval);
-    if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
-    void subscriber.quit().catch(() => {});
-  });
+    signalsEmitter.on("SIGTERM", () => {
+      clearInterval(interval);
+      if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
+      void subscriber.quit().catch(() => {});
+    });
+    signalsEmitter.on("SIGINT", () => {
+      clearInterval(interval);
+      if (pendingReloadTimer) clearTimeout(pendingReloadTimer);
+      void subscriber.quit().catch(() => {});
+    });
+  } else {
+    signalsEmitter.on("SIGTERM", () => clearInterval(interval));
+    signalsEmitter.on("SIGINT", () => clearInterval(interval));
+  }
 
   return registry;
 });