Skip to content

Commit 65273f1

Browse files
committed
Fix close-ordering: keep context registered through final telemetry flush
`TelemetryClientProvider.releaseClient` was calling `unregisterContext` before `client.close()`. On the last refcount, that emptied the FIFO of `(context, authProvider)` pairs before the final flush ran, so the exporter's `getAuthProvider()` walk returned undefined and the batch was dropped with "Telemetry: Authorization header missing — metrics will be dropped". Defer `unregisterContext` until after `close()` on the last refcount; multi-refcount path is unchanged (immediate unregister so surviving consumers don't reach into a closing context). Verified end-to-end against a real SPOG host: the final flush now exports its 3 metrics (connection.open, statement.start, statement.complete) instead of warning and dropping them. Co-authored-by: Isaac
1 parent e30d820 commit 65273f1

3 files changed

Lines changed: 74 additions & 15 deletions

File tree

lib/telemetry/TelemetryClient.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,10 @@ class TelemetryClient implements IClientContext {
169169

170170
/**
171171
* Remove a `DBSQLClient`'s context from the pool. Called by
172-
* `TelemetryClientProvider.releaseClient` before refcount decrement so the
173-
* exporter doesn't keep trying to use a closed context.
172+
* `TelemetryClientProvider.releaseClient` in the multi-refcount case so the
173+
* exporter doesn't keep trying to use a closing context. Deliberately
174+
* NOT called on the last refcount release: `close()` needs the snapshot
175+
* pair to resolve auth/connection providers for the final flush.
174176
*
175177
* Uses the cached snapshot pair (`context`, `authProvider`) from register
176178
* time, not a fresh `context.getAuthProvider?.()` call. If the underlying

lib/telemetry/TelemetryClientProvider.ts

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -133,22 +133,35 @@ class TelemetryClientProvider {
133133
return;
134134
}
135135

136-
holder.client.unregisterContext(context);
136+
if (holder.refCount > 1) {
137+
// Other registrants remain — drop this context now so subsequent
138+
// flushes by surviving consumers don't try to authenticate via a
139+
// context whose underlying DBSQLClient is closing.
140+
holder.client.unregisterContext(context);
141+
holder.refCount -= 1;
142+
logger.log(LogLevel.debug, `TelemetryClient reference count for ${host}: ${holder.refCount}`);
143+
return;
144+
}
145+
146+
// Last refcount holder. Keep the context registered through close()
147+
// so the final flush can still resolve `getAuthProvider()` and
148+
// `getConnectionProvider()` from the FIFO snapshot — otherwise the
149+
// exporter drops the final batch with "missing Authorization header".
150+
// The TelemetryClient is fully closed below, so the lingering FIFO
151+
// entry has no further effect.
137152
holder.refCount -= 1;
138153
logger.log(LogLevel.debug, `TelemetryClient reference count for ${host}: ${holder.refCount}`);
139154

140-
if (holder.refCount <= 0) {
141-
// Remove from map BEFORE awaiting close so a concurrent
142-
// getOrCreateClient creates a fresh instance rather than receiving
143-
// this closing one.
144-
this.clients.delete(key);
145-
try {
146-
await holder.client.close();
147-
logger.log(LogLevel.debug, `Closed and removed TelemetryClient for host: ${host}`);
148-
} catch (error) {
149-
const msg = error instanceof Error ? error.message : String(error);
150-
logger.log(LogLevel.debug, `Error releasing TelemetryClient: ${msg}`);
151-
}
155+
// Remove from map BEFORE awaiting close so a concurrent
156+
// getOrCreateClient creates a fresh instance rather than receiving
157+
// this closing one.
158+
this.clients.delete(key);
159+
try {
160+
await holder.client.close();
161+
logger.log(LogLevel.debug, `Closed and removed TelemetryClient for host: ${host}`);
162+
} catch (error) {
163+
const msg = error instanceof Error ? error.message : String(error);
164+
logger.log(LogLevel.debug, `Error releasing TelemetryClient: ${msg}`);
152165
}
153166
}
154167

tests/unit/telemetry/TelemetryClientProvider.test.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,50 @@ describe('TelemetryClientProvider', () => {
267267
expect(second.isClosed()).to.be.false;
268268
expect(provider.getRefCount(HOST1)).to.equal(1);
269269
});
270+
271+
it('keeps the context registered through close() on last refcount', async () => {
272+
// Regression: releaseClient used to call unregisterContext before
273+
// close(), which dropped the only FIFO entry and left the final
274+
// flush without an auth provider — metrics were dropped with
275+
// "missing Authorization header".
276+
const context = new ClientContextStub();
277+
const provider = new TelemetryClientProvider();
278+
279+
const client = provider.getOrCreateClient(context, HOST1);
280+
let authProviderAtClose: unknown = 'unset';
281+
sinon.stub(client, 'close').callsFake(async () => {
282+
// The TelemetryClient is its own IClientContext. While close() runs,
283+
// getAuthProvider() must still resolve (the FIFO entry survives).
284+
authProviderAtClose = client.getAuthProvider?.();
285+
});
286+
287+
await provider.releaseClient(context, HOST1);
288+
289+
// The FIFO walk hits the (still-registered) context; the stub's
290+
// getAuthProvider returns undefined but the lookup itself completes.
291+
// What we're really asserting is that the FIFO wasn't pre-emptied:
292+
// pre-fix, this assertion would not even fire because close() runs
293+
// against an empty FIFO and the auth-provider walk short-circuits
294+
// before close()'s callsFake. Here we verify the spy ran AND the
295+
// context is still registered at that moment.
296+
expect(authProviderAtClose).to.not.equal('unset');
297+
expect((client as any).contexts.length).to.equal(1);
298+
});
299+
300+
it('drops the context immediately when other refcounts remain', async () => {
301+
const context = new ClientContextStub();
302+
const provider = new TelemetryClientProvider();
303+
304+
const client = provider.getOrCreateClient(context, HOST1);
305+
provider.getOrCreateClient(context, HOST1); // refcount=2
306+
307+
await provider.releaseClient(context, HOST1);
308+
309+
// Multi-refcount path: unregisterContext runs immediately; the
310+
// (single) FIFO entry tracking this context was removed.
311+
expect((client as any).contexts.length).to.equal(0);
312+
expect(provider.getRefCount(HOST1)).to.equal(1);
313+
});
270314
});
271315

272316
describe('Host normalization', () => {

0 commit comments

Comments
 (0)