Skip to content

Commit 3e7852f

Browse files
committed
test(sea): lock complex types as native Arrow default — pecotesting e2e
Adds an end-to-end test against pecotesting (gated by `DATABRICKS_PECOTESTING_*` env vars; skipped when absent) that confirms ARRAY / MAP / STRUCT and nested ARRAY<STRUCT> come back as **native Arrow** shapes (List / Map / Struct) — not Utf8 JSON strings. The kernel's `ResultConfig::complex_types_as_json` defaults to `false` (Arrow-native), and the SEA wire request hardcodes `format = ARROW_STREAM`, so this is the existing default. The test locks the contract: a regression that flips the default (or that an upstream change wraps the result post-processor in the JSON pass) would fail this assertion immediately. Matches the NodeJS Thrift backend's `complexTypesAsArrow=true` default — see `DBSQLSession.getArrowOptions` where `useArrowNativeTypes=true` propagates to `complexTypesAsArrow`. Mirrors the kernel-side e2e at `tests/v0_execute_e2e.rs::complex_types_as_json_flag_stringifies_complex_columns` which exercises both the Arrow-native and JSON-string paths. Decision — no opt-in toggle exposed at the JS layer: neither Python `use_sea` nor NodeJS Thrift exposes a `complexTypesAsJson` knob to end users; the kernel's `ResultConfig.complex_types_as_json` remains internal until a consumer needs it. Adding the toggle now would invite drift from the kernel; we revisit when a consumer asks. Matrix row 11 of section 3 stays as "implemented — native Arrow default matches Thrift behaviour". Co-authored-by: Isaac Signed-off-by: Madhavendra Rathore <madhavendra.rathore@databricks.com>
1 parent b4095a6 commit 3e7852f

1 file changed

Lines changed: 139 additions & 0 deletions

File tree

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// Copyright (c) 2026 Databricks, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
/**
16+
* End-to-end check that complex types (ARRAY / MAP / STRUCT and nested
17+
* combinations) flow through the SEA path as **native Arrow** shapes —
18+
* not JSON strings.
19+
*
20+
* Kernel default is `ResultConfig::complex_types_as_json = false`
21+
* (Arrow-native is the default). The kernel-side equivalent of this
22+
* test lives at
23+
* `tests/v0_execute_e2e.rs::complex_types_as_json_flag_stringifies_complex_columns`
24+
* and asserts the dual: both `false` (Arrow) and `true` (Utf8 JSON)
25+
* paths produce the expected shape.
26+
*
27+
* Matrix parity: this matches the NodeJS Thrift backend's behaviour
28+
* when `useArrowNativeTypes: true` (the default — see
29+
* `DBSQLSession.getArrowOptions` setting `complexTypesAsArrow: true`).
30+
*
31+
* Skipped when DATABRICKS_PECOTESTING_* env vars are absent. Pulls
32+
* credentials from the standard pecotesting set (see
33+
* `tests/e2e/sea/operation-lifecycle-e2e.test.ts` for the same gate).
34+
*/
35+
36+
import { expect } from 'chai';
37+
import { tableFromIPC } from 'apache-arrow';
38+
import { getSeaNative } from '../../../lib/sea/SeaNativeLoader';
39+
40+
interface NativeBinding {
41+
openSession(opts: {
42+
hostName: string;
43+
httpPath: string;
44+
token: string;
45+
}): Promise<NativeConnection>;
46+
}
47+
48+
interface NativeConnection {
49+
executeStatement(sql: string): Promise<NativeStatement>;
50+
close(): Promise<void>;
51+
}
52+
53+
interface NativeStatement {
54+
fetchNextBatch(): Promise<{ ipcBytes: Buffer } | null>;
55+
schema(): Promise<{ ipcBytes: Buffer }>;
56+
cancel(): Promise<void>;
57+
close(): Promise<void>;
58+
}
59+
60+
describe('SEA complex types — native Arrow default', function suite() {
61+
this.timeout(120_000);
62+
63+
const hostName =
64+
process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME || process.env.E2E_HOST;
65+
const httpPath =
66+
process.env.DATABRICKS_PECOTESTING_HTTP_PATH || process.env.E2E_PATH;
67+
const token =
68+
process.env.DATABRICKS_PECOTESTING_TOKEN || process.env.E2E_ACCESS_TOKEN;
69+
70+
before(function gate() {
71+
if (!hostName || !httpPath || !token) {
72+
// eslint-disable-next-line no-invalid-this
73+
this.skip();
74+
}
75+
});
76+
77+
it('ARRAY / MAP / STRUCT come back as native Arrow shapes', async () => {
78+
const binding = getSeaNative() as unknown as NativeBinding;
79+
const connection = await binding.openSession({
80+
hostName: hostName as string,
81+
httpPath: httpPath as string,
82+
token: token as string,
83+
});
84+
85+
let statement: NativeStatement | null = null;
86+
try {
87+
const sql = `SELECT
88+
ARRAY(1, 2, 3) AS c_arr,
89+
MAP('k1', 'v1', 'k2', 'v2') AS c_map,
90+
NAMED_STRUCT('a', 'foo', 'b', 1) AS c_struct,
91+
ARRAY(NAMED_STRUCT('a', 'x', 'b', 1),
92+
NAMED_STRUCT('a', 'y', 'b', 2)) AS c_arr_struct`;
93+
94+
statement = await connection.executeStatement(sql);
95+
const batchEnvelope = await statement.fetchNextBatch();
96+
expect(batchEnvelope).to.not.equal(null);
97+
98+
const table = tableFromIPC(batchEnvelope!.ipcBytes);
99+
const schema = table.schema;
100+
101+
// Each complex column should be a native Arrow nested type, not Utf8.
102+
const arrField = schema.fields.find((f) => f.name === 'c_arr');
103+
const mapField = schema.fields.find((f) => f.name === 'c_map');
104+
const structField = schema.fields.find((f) => f.name === 'c_struct');
105+
const arrStructField = schema.fields.find((f) => f.name === 'c_arr_struct');
106+
107+
expect(arrField, 'c_arr field present').to.not.equal(undefined);
108+
expect(mapField, 'c_map field present').to.not.equal(undefined);
109+
expect(structField, 'c_struct field present').to.not.equal(undefined);
110+
expect(arrStructField, 'c_arr_struct field present').to.not.equal(undefined);
111+
112+
// Arrow type ids per arrow-js — these are the structural checks
113+
// that distinguish "native Arrow" from "JSON Utf8". Arrow type
114+
// names are stable across arrow-js minor versions.
115+
expect(arrField!.type.toString()).to.match(/List/i, 'c_arr should be List');
116+
expect(mapField!.type.toString()).to.match(/Map|List/i, 'c_map should be Map (or List of Struct of key/value)');
117+
expect(structField!.type.toString()).to.match(/Struct/i, 'c_struct should be Struct');
118+
expect(arrStructField!.type.toString()).to.match(/List/i, 'c_arr_struct should be List of Struct');
119+
120+
// Sanity-check: NONE of the complex columns should be Utf8 — that
121+
// would indicate complex_types_as_json was inadvertently enabled.
122+
for (const f of [arrField!, mapField!, structField!, arrStructField!]) {
123+
expect(f.type.toString()).to.not.match(
124+
/^Utf8$/,
125+
`${f.name} must not be a JSON string column`,
126+
);
127+
}
128+
} finally {
129+
if (statement !== null) {
130+
try {
131+
await statement.close();
132+
} catch (_) {
133+
// best-effort cleanup
134+
}
135+
}
136+
await connection.close();
137+
}
138+
});
139+
});

0 commit comments

Comments
 (0)