Skip to content

Commit 3d21c11

Browse files
committed
Address review feedback: pure header builder, SPOG logs, all-purpose path form
- buildCustomHeaders(httpPath, userHeaders) and extractWorkspaceId(httpPath): SPOG-routing inputs now in the signature instead of read off this.httpPath. extractWorkspaceId is static. Drops the (client as any).httpPath test workaround. Fixes the hidden ordering dependency flagged in PR review [M1]. - Log SPOG header injection (debug), caller-supplied override (debug), and non-numeric workspace ID (warn). Closes JDBC parity gap so oncall has a grep handle when SPOG routing misbehaves [H2]. - extractWorkspaceId now also matches the all-purpose cluster path form /o/<wsId>/<cluster-id> in addition to the warehouse query form ?o=<wsId>. Verified end-to-end against a real all-purpose cluster on a SPOG host. Co-authored-by: Isaac Signed-off-by: samikshya-chand_data <samikshya.chand@databricks.com>
1 parent 66ce012 commit 3d21c11

2 files changed

Lines changed: 167 additions & 53 deletions

File tree

lib/DBSQLClient.ts

Lines changed: 69 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,9 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
298298
/**
299299
* Extract the numeric workspace ID for telemetry.
300300
*
301-
* The only reliable carrier in the connection params today is the `?o=N`
302-
* query parameter on `httpPath` — Databricks SQL warehouses are typically
303-
* connected to via paths like `/sql/1.0/warehouses/<id>?o=12345678901234`.
301+
* Two URL shapes carry the workspace ID today:
302+
* - Warehouse, query form: `/sql/1.0/warehouses/<id>?o=<wsId>`
303+
* - All-purpose cluster, path form: `sql/protocolv1/o/<wsId>/<cluster-id>`
304304
*
305305
* Host-based extraction was tried previously but produced confidently-wrong
306306
* values:
@@ -313,39 +313,84 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
313313
* Returns `undefined` when no workspace ID can be derived. Server-side
314314
* attribution is better off seeing a missing field than a wrong value.
315315
*/
316-
private extractWorkspaceId(): string | undefined {
317-
const { httpPath } = this;
316+
private static extractWorkspaceId(httpPath: string | undefined): string | undefined {
318317
if (!httpPath) {
319318
return undefined;
320319
}
321320
const queryIdx = httpPath.indexOf('?');
322-
if (queryIdx < 0) {
323-
return undefined;
321+
// Warehouse form: `?o=<digits>` in the query string.
322+
if (queryIdx >= 0) {
323+
const query = httpPath.slice(queryIdx + 1);
324+
// Match `o=<digits>` as the first param, an inner `&o=<digits>`, etc.
325+
// Workspace IDs are decimal integers; reject anything else so a stray
326+
// `o=tenant_42` doesn't ship as a workspace ID.
327+
const queryMatch = query.match(/(?:^|&)o=(\d+)(?:&|$)/);
328+
if (queryMatch) {
329+
return queryMatch[1];
330+
}
324331
}
325-
const query = httpPath.slice(queryIdx + 1);
326-
// Match `o=<digits>` as the first param, an inner `&o=<digits>`, etc.
327-
// Workspace IDs are decimal integers; reject anything else so a stray
328-
// `o=tenant_42` doesn't ship as a workspace ID.
329-
const match = query.match(/(?:^|&)o=(\d+)(?:&|$)/);
330-
return match ? match[1] : undefined;
332+
// All-purpose cluster form: `/o/<digits>/<cluster-id>` as a path segment.
333+
const pathOnly = queryIdx >= 0 ? httpPath.slice(0, queryIdx) : httpPath;
334+
const pathMatch = pathOnly.match(/(?:^|\/)o\/(\d+)(?:\/|$)/);
335+
return pathMatch ? pathMatch[1] : undefined;
336+
}
337+
338+
// Detects an `o=<value>` or `/o/<value>` where `<value>` is present but
339+
// non-numeric, so the caller can warn instead of silently dropping a
340+
// malformed workspace param.
341+
private static hasMalformedOrgParam(httpPath: string | undefined): boolean {
342+
if (!httpPath) {
343+
return false;
344+
}
345+
const queryIdx = httpPath.indexOf('?');
346+
if (queryIdx >= 0) {
347+
const query = httpPath.slice(queryIdx + 1);
348+
const hasOrg = /(?:^|&)o=/.test(query);
349+
const hasNumericOrg = /(?:^|&)o=\d+(?:&|$)/.test(query);
350+
if (hasOrg && !hasNumericOrg) {
351+
return true;
352+
}
353+
}
354+
const pathOnly = queryIdx >= 0 ? httpPath.slice(0, queryIdx) : httpPath;
355+
const hasPathOrg = /(?:^|\/)o\/[^/]+/.test(pathOnly);
356+
const hasNumericPathOrg = /(?:^|\/)o\/\d+(?:\/|$)/.test(pathOnly);
357+
return hasPathOrg && !hasNumericPathOrg;
331358
}
332359

333360
/**
334361
* Build the customHeaders map applied to telemetry POSTs and feature-flag
335-
* GETs (SPOG / Single Panel of Glass support). When `httpPath` carries
336-
* `?o=<workspaceId>` — account-level vanity URL routing — endpoints that
337-
* don't include the workspace in their path need the workspace conveyed via
362+
* GETs (SPOG / Single Panel of Glass support). When `httpPath` carries a
363+
* workspace ID — either as a `?o=<wsId>` query (warehouse) or a
364+
* `/o/<wsId>/<cluster-id>` path segment (all-purpose cluster) — endpoints
365+
* that don't include the workspace in their URL path need it conveyed via
338366
* the `x-databricks-org-id` header instead. A user-supplied value in
339-
* `options.customHeaders` (case-insensitively keyed) wins over the parsed
340-
* value.
367+
* `userHeaders` (case-insensitively keyed) wins over the parsed value.
368+
*
369+
* `httpPath` is passed explicitly (rather than read off `this.httpPath`) so
370+
* the SPOG-routing dependency is visible in the signature — a future
371+
* refactor that reorders connect() can't silently break injection.
341372
*/
342-
private buildCustomHeaders(options: ConnectionOptions): Record<string, string> | undefined {
343-
const merged: Record<string, string> = { ...(options.customHeaders ?? {}) };
373+
private buildCustomHeaders(
374+
httpPath: string | undefined,
375+
userHeaders: Record<string, string> | undefined,
376+
): Record<string, string> | undefined {
377+
const merged: Record<string, string> = { ...(userHeaders ?? {}) };
344378
const hasOrgIdAlready = Object.keys(merged).some((k) => k.toLowerCase() === 'x-databricks-org-id');
345-
if (!hasOrgIdAlready) {
346-
const orgId = this.extractWorkspaceId();
379+
if (hasOrgIdAlready) {
380+
this.logger.log(
381+
LogLevel.debug,
382+
'SPOG: x-databricks-org-id supplied by caller; not extracting from httpPath',
383+
);
384+
} else {
385+
const orgId = DBSQLClient.extractWorkspaceId(httpPath);
347386
if (orgId) {
348387
merged['x-databricks-org-id'] = orgId;
388+
this.logger.log(LogLevel.debug, `SPOG: injecting x-databricks-org-id=${orgId} (extracted from httpPath)`);
389+
} else if (DBSQLClient.hasMalformedOrgParam(httpPath)) {
390+
this.logger.log(
391+
LogLevel.warn,
392+
'SPOG: httpPath contains non-numeric workspace ID; x-databricks-org-id not injected',
393+
);
349394
}
350395
}
351396
return Object.keys(merged).length > 0 ? merged : undefined;
@@ -585,7 +630,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
585630
// SPOG: parse `?o=<workspaceId>` out of httpPath and stash it as
586631
// `x-databricks-org-id` for the telemetry + feature-flag clients, which
587632
// hit endpoints that don't carry the workspace in their URL path.
588-
this.config.customHeaders = this.buildCustomHeaders(options);
633+
this.config.customHeaders = this.buildCustomHeaders(options.path, options.customHeaders);
589634

590635
this.authProvider = this.createAuthProvider(options, authProvider);
591636

@@ -703,7 +748,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
703748
safeEmit(this, (emitter) => {
704749
if (!this.host) return;
705750
const latencyMs = Date.now() - startTime;
706-
const workspaceId = this.extractWorkspaceId();
751+
const workspaceId = DBSQLClient.extractWorkspaceId(this.httpPath);
707752
const driverConfig = this.driverConfigShipped ? undefined : this.buildDriverConfiguration();
708753
if (driverConfig) {
709754
this.driverConfigShipped = true;

tests/unit/DBSQLClient.test.ts

Lines changed: 98 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -767,77 +767,146 @@ describe('DBSQLClient telemetry paths', () => {
767767

768768
describe('extractWorkspaceId', () => {
769769
it('returns the numeric o= param from httpPath', () => {
770-
const client = new DBSQLClient();
771-
(client as any).httpPath = '/sql/1.0/warehouses/abc?o=12345678901234';
772-
const id = (client as any).extractWorkspaceId();
770+
const id = (DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?o=12345678901234');
773771
expect(id).to.equal('12345678901234');
774772
});
775773

776774
it('returns undefined when no query string', () => {
777-
const client = new DBSQLClient();
778-
(client as any).httpPath = '/sql/1.0/warehouses/abc';
779-
expect((client as any).extractWorkspaceId()).to.be.undefined;
775+
expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc')).to.be.undefined;
780776
});
781777

782778
it('returns undefined when o= is not numeric', () => {
783-
const client = new DBSQLClient();
784-
(client as any).httpPath = '/sql/1.0/warehouses/abc?o=tenant_xyz';
785-
expect((client as any).extractWorkspaceId()).to.be.undefined;
779+
expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?o=tenant_xyz')).to.be.undefined;
786780
});
787781

788782
it('handles o= as a non-first param', () => {
789-
const client = new DBSQLClient();
790-
(client as any).httpPath = '/sql/1.0/warehouses/abc?foo=bar&o=42&baz=qux';
791-
expect((client as any).extractWorkspaceId()).to.equal('42');
783+
expect((DBSQLClient as any).extractWorkspaceId('/sql/1.0/warehouses/abc?foo=bar&o=42&baz=qux')).to.equal('42');
792784
});
793785

794786
it('returns undefined when httpPath is unset', () => {
795-
const client = new DBSQLClient();
796-
expect((client as any).extractWorkspaceId()).to.be.undefined;
787+
expect((DBSQLClient as any).extractWorkspaceId(undefined)).to.be.undefined;
788+
});
789+
790+
it('returns the numeric workspace id from all-purpose cluster path form', () => {
791+
expect(
792+
(DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/99999999999999/0101-000000-aaaaaaaa'),
793+
).to.equal('99999999999999');
794+
});
795+
796+
it('returns the numeric workspace id from all-purpose cluster path with leading slash', () => {
797+
expect(
798+
(DBSQLClient as any).extractWorkspaceId('/sql/protocolv1/o/12345/0101-000000-aaaaaaaa'),
799+
).to.equal('12345');
800+
});
801+
802+
it('returns undefined when all-purpose cluster path has non-numeric workspace segment', () => {
803+
expect(
804+
(DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/tenant_xyz/0101-000000-aaaaaaaa'),
805+
).to.be.undefined;
806+
});
807+
808+
it('prefers ?o= query form over /o/ path form when both are present', () => {
809+
expect(
810+
(DBSQLClient as any).extractWorkspaceId('sql/protocolv1/o/111/cluster?o=222'),
811+
).to.equal('222');
797812
});
798813
});
799814

800815
describe('buildCustomHeaders (SPOG)', () => {
801816
it('injects x-databricks-org-id from ?o= in httpPath', () => {
802817
const client = new DBSQLClient();
803-
(client as any).httpPath = '/sql/1.0/warehouses/abc?o=12345678901234';
804-
const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc?o=12345678901234' });
818+
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=12345678901234', undefined);
805819
expect(headers).to.deep.equal({ 'x-databricks-org-id': '12345678901234' });
806820
});
807821

808822
it('returns undefined when no ?o= and no user-supplied customHeaders', () => {
809823
const client = new DBSQLClient();
810-
(client as any).httpPath = '/sql/1.0/warehouses/abc';
811-
const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc' });
824+
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc', undefined);
812825
expect(headers).to.be.undefined;
813826
});
814827

815828
it('preserves user-supplied customHeaders alongside parsed org-id', () => {
816829
const client = new DBSQLClient();
817-
(client as any).httpPath = '/sql/1.0/warehouses/abc?o=42';
818-
const headers = (client as any).buildCustomHeaders({
819-
path: '/sql/1.0/warehouses/abc?o=42',
820-
customHeaders: { 'x-trace-id': 'tid-001' },
821-
});
830+
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', { 'x-trace-id': 'tid-001' });
822831
expect(headers).to.deep.equal({ 'x-trace-id': 'tid-001', 'x-databricks-org-id': '42' });
823832
});
824833

825834
it('user-supplied x-databricks-org-id wins over ?o= parsed value (case-insensitive)', () => {
826835
const client = new DBSQLClient();
827-
(client as any).httpPath = '/sql/1.0/warehouses/abc?o=42';
828-
const headers = (client as any).buildCustomHeaders({
829-
path: '/sql/1.0/warehouses/abc?o=42',
830-
customHeaders: { 'X-Databricks-Org-Id': '999' },
836+
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', {
837+
'X-Databricks-Org-Id': '999',
831838
});
832839
expect(headers).to.deep.equal({ 'X-Databricks-Org-Id': '999' });
833840
});
834841

835842
it('does not inject org-id when ?o= value is non-numeric', () => {
836843
const client = new DBSQLClient();
837-
(client as any).httpPath = '/sql/1.0/warehouses/abc?o=tenant_xyz';
838-
const headers = (client as any).buildCustomHeaders({ path: '/sql/1.0/warehouses/abc?o=tenant_xyz' });
844+
const headers = (client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=tenant_xyz', undefined);
839845
expect(headers).to.be.undefined;
840846
});
847+
848+
it('injects x-databricks-org-id from all-purpose cluster path form', () => {
849+
const client = new DBSQLClient();
850+
const headers = (client as any).buildCustomHeaders(
851+
'sql/protocolv1/o/99999999999999/0101-000000-aaaaaaaa',
852+
undefined,
853+
);
854+
expect(headers).to.deep.equal({ 'x-databricks-org-id': '99999999999999' });
855+
});
856+
857+
it('logs a warning when workspace ID segment is non-numeric (path form)', () => {
858+
const client = new DBSQLClient();
859+
const logSpy = sinon.spy((client as any).logger, 'log');
860+
try {
861+
(client as any).buildCustomHeaders('sql/protocolv1/o/tenant_xyz/cluster', undefined);
862+
const warnCalls = logSpy.getCalls().filter((c) => c.args[0] === LogLevel.warn);
863+
expect(warnCalls).to.have.lengthOf(1);
864+
expect(warnCalls[0].args[1]).to.match(/non-numeric workspace ID/);
865+
} finally {
866+
logSpy.restore();
867+
}
868+
});
869+
870+
it('logs a warning when ?o= is present but non-numeric', () => {
871+
const client = new DBSQLClient();
872+
const logSpy = sinon.spy((client as any).logger, 'log');
873+
try {
874+
(client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=tenant_xyz', undefined);
875+
const warnCalls = logSpy.getCalls().filter((c) => c.args[0] === LogLevel.warn);
876+
expect(warnCalls).to.have.lengthOf(1);
877+
expect(warnCalls[0].args[1]).to.match(/non-numeric workspace ID/);
878+
} finally {
879+
logSpy.restore();
880+
}
881+
});
882+
883+
it('logs a debug line when injecting org-id from httpPath', () => {
884+
const client = new DBSQLClient();
885+
const logSpy = sinon.spy((client as any).logger, 'log');
886+
try {
887+
(client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', undefined);
888+
const injectLog = logSpy
889+
.getCalls()
890+
.find((c) => c.args[0] === LogLevel.debug && /injecting x-databricks-org-id=42/.test(String(c.args[1])));
891+
expect(injectLog, 'expected SPOG inject debug log').to.exist;
892+
} finally {
893+
logSpy.restore();
894+
}
895+
});
896+
897+
it('logs a debug line when caller supplies x-databricks-org-id', () => {
898+
const client = new DBSQLClient();
899+
const logSpy = sinon.spy((client as any).logger, 'log');
900+
try {
901+
(client as any).buildCustomHeaders('/sql/1.0/warehouses/abc?o=42', { 'x-databricks-org-id': '999' });
902+
const callerLog = logSpy
903+
.getCalls()
904+
.find((c) => c.args[0] === LogLevel.debug && /supplied by caller/.test(String(c.args[1])));
905+
expect(callerLog, 'expected SPOG caller-supplied debug log').to.exist;
906+
} finally {
907+
logSpy.restore();
908+
}
909+
});
841910
});
842911

843912
describe('telemetry refcount on reconnect', () => {

0 commit comments

Comments
 (0)