@@ -23,6 +23,143 @@ const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils;
2323type ArrowSchema = Schema < TypeMap > ;
2424type ArrowSchemaField = Field < DataType < Type , TypeMap > > ;
2525
26+ /**
27+ * Metadata key carrying the original Arrow `Duration` time unit on
28+ * fields that were rewritten to `Int64` by the SEA IPC pre-processor
29+ * (`lib/sea/SeaArrowIpcDurationFix.ts`). We re-declare the constant
30+ * here (rather than importing it) so the converter has no compile-time
31+ * dependency on the SEA module — it's reused unchanged by the
32+ * thrift-path which has no SEA awareness.
33+ */
34+ const DURATION_UNIT_METADATA_KEY = 'databricks.arrow.duration_unit' ;
35+
36+ /**
37+ * Format an Arrow `Interval[YearMonth]` or `Interval[DayTime]` value
38+ * into the canonical thrift string the JDBC/ODBC server emits:
39+ * YEAR-MONTH → `"Y-M"` (e.g. 1 year 2 months → `"1-2"`)
40+ * DAY-TIME → `"D HH:mm:ss.fffffffff"`
41+ * (e.g. 1 day 02:03:04 → `"1 02:03:04.000000000"`)
42+ *
43+ * Arrow surfaces these as `Int32Array(2)` via the `GetVisitor`
44+ * (`apache-arrow/visitor/get.js:177-185`):
45+ * YEAR-MONTH: `[years, months]` (years/months derived from a single
46+ * int32 holding total months)
47+ * DAY-TIME: `[days, milliseconds]` (legacy two-int32 form)
48+ *
49+ * Negative intervals: the FULL interval is emitted with a leading `-`
50+ * (Spark convention), and individual fields are unsigned. We mirror
51+ * Spark's display.
52+ */
53+ function formatArrowInterval ( value : any , valueType : any ) : string {
54+ // `value` is an Int32Array of length 2.
55+ const a = Number ( value [ 0 ] ) ;
56+ const b = Number ( value [ 1 ] ) ;
57+ // unit 0 = YEAR_MONTH, unit 1 = DAY_TIME, unit 2 = MONTH_DAY_NANO
58+ const unit = valueType ?. unit ;
59+ if ( unit === 0 ) {
60+ return formatYearMonth ( a , b ) ;
61+ }
62+ // DAY_TIME: a = days, b = milliseconds (within the day, can be ≥0 or <0)
63+ // We re-normalise: total milliseconds = a * 86_400_000 + b, then split into
64+ // days, hours, minutes, seconds, nanoseconds (nanoseconds is always 0
65+ // because the legacy IntervalDayTime carries only millisecond precision).
66+ const totalMs = BigInt ( a ) * BigInt ( 86_400_000 ) + BigInt ( b ) ;
67+ return formatDayTimeFromTotal ( totalMs * BigInt ( 1_000_000 ) /* → ns */ , 'NANOSECOND' ) ;
68+ }
69+
70+ /**
71+ * Format the (years, months) decomposition into `"Y-M"` (or `"-Y-M"`
72+ * for negative intervals). Arrow's `getIntervalYearMonth` (in
73+ * `apache-arrow/visitor/get.js:179`) decomposes a signed total-months
74+ * int32 via integer truncation, so years and months always share the
75+ * same sign. We render the absolute values with a single leading `-`
76+ * to match the Spark display format used on the thrift path.
77+ */
78+ function formatYearMonth ( years : number , months : number ) : string {
79+ const total = years * 12 + months ;
80+ if ( total < 0 ) {
81+ const abs = - total ;
82+ const y = Math . trunc ( abs / 12 ) ;
83+ const m = abs % 12 ;
84+ return `-${ y } -${ m } ` ;
85+ }
86+ return `${ years } -${ months } ` ;
87+ }
88+
89+ /**
90+ * Format an Arrow `Duration` value (rewritten by the SEA IPC
91+ * pre-processor to `Int64`) into the thrift INTERVAL DAY-TIME string.
92+ *
93+ * @param value the duration value as `bigint` (signed nanos/micros/
94+ * millis/seconds depending on `unit`)
95+ * @param unit one of `SECOND` / `MILLISECOND` / `MICROSECOND` /
96+ * `NANOSECOND` (the original Arrow time unit, captured
97+ * by `SeaArrowIpcDurationFix.ts`)
98+ */
99+ function formatDurationToIntervalDayTime ( value : bigint | number , unit : string ) : string {
100+ const bi = typeof value === 'bigint' ? value : BigInt ( value ) ;
101+ const nanos = toNanoseconds ( bi , unit ) ;
102+ return formatDayTimeFromTotal ( nanos , unit ) ;
103+ }
104+
105+ /**
106+ * Scale a duration value to nanoseconds based on its unit.
107+ *
108+ * SECOND → ×1_000_000_000
109+ * MILLISECOND → × 1_000_000
110+ * MICROSECOND → × 1_000
111+ * NANOSECOND → × 1
112+ */
113+ function toNanoseconds ( value : bigint , unit : string ) : bigint {
114+ switch ( unit ) {
115+ case 'SECOND' :
116+ return value * BigInt ( 1_000_000_000 ) ;
117+ case 'MILLISECOND' :
118+ return value * BigInt ( 1_000_000 ) ;
119+ case 'MICROSECOND' :
120+ return value * BigInt ( 1_000 ) ;
121+ case 'NANOSECOND' :
122+ default :
123+ return value ;
124+ }
125+ }
126+
127+ /**
128+ * Format a signed total-nanoseconds value as `"D HH:mm:ss.fffffffff"`.
129+ * Always emits 9 fractional digits to match the thrift driver's wire
130+ * format (`"1 02:03:04.000000000"` — 9 digits regardless of the
131+ * server-side storage precision). Negative values get a single
132+ * leading `-`.
133+ *
134+ * The `unit` parameter is currently unused for formatting (the value
135+ * is already in nanoseconds by the time we get here) but is retained
136+ * for future use if a unit-aware precision is ever needed.
137+ */
138+ function formatDayTimeFromTotal ( totalNanos : bigint , _unit : string ) : string {
139+ const ZERO = BigInt ( 0 ) ;
140+ const sign = totalNanos < ZERO ? '-' : '' ;
141+ const abs = totalNanos < ZERO ? - totalNanos : totalNanos ;
142+
143+ const NS_PER_SEC = BigInt ( 1_000_000_000 ) ;
144+ const NS_PER_MIN = NS_PER_SEC * BigInt ( 60 ) ;
145+ const NS_PER_HOUR = NS_PER_MIN * BigInt ( 60 ) ;
146+ const NS_PER_DAY = NS_PER_HOUR * BigInt ( 24 ) ;
147+
148+ const days = abs / NS_PER_DAY ;
149+ let rem = abs % NS_PER_DAY ;
150+ const hours = rem / NS_PER_HOUR ;
151+ rem %= NS_PER_HOUR ;
152+ const minutes = rem / NS_PER_MIN ;
153+ rem %= NS_PER_MIN ;
154+ const seconds = rem / NS_PER_SEC ;
155+ const subSeconds = rem % NS_PER_SEC ;
156+
157+ const pad2 = ( n : bigint ) : string => n . toString ( ) . padStart ( 2 , '0' ) ;
158+ const fraction = `.${ subSeconds . toString ( ) . padStart ( 9 , '0' ) } ` ;
159+
160+ return `${ sign } ${ days . toString ( ) } ${ pad2 ( hours ) } :${ pad2 ( minutes ) } :${ pad2 ( seconds ) } ${ fraction } ` ;
161+ }
162+
26163export default class ArrowResultConverter implements IResultsProvider < Array < any > > {
27164 private readonly context : IClientContext ;
28165
@@ -142,37 +279,52 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
142279 private getRows ( schema : ArrowSchema , rows : Array < StructRow | MapRow > ) : Array < any > {
143280 return rows . map ( ( row ) => {
144281 // First, convert native Arrow values to corresponding plain JS objects
145- const record = this . convertArrowTypes ( row , undefined , schema . fields ) ;
282+ const record = this . convertArrowTypes ( row , undefined , schema . fields , undefined ) ;
146283 // Second, cast all the values to original Thrift types
147284 return this . convertThriftTypes ( record ) ;
148285 } ) ;
149286 }
150287
151- private convertArrowTypes ( value : any , valueType : DataType | undefined , fields : Array < ArrowSchemaField > = [ ] ) : any {
288+ private convertArrowTypes (
289+ value : any ,
290+ valueType : DataType | undefined ,
291+ fields : Array < ArrowSchemaField > = [ ] ,
292+ field ?: ArrowSchemaField ,
293+ ) : any {
152294 if ( value === null ) {
153295 return value ;
154296 }
155297
156298 const fieldsMap : Record < string , ArrowSchemaField > = { } ;
157- for ( const field of fields ) {
158- fieldsMap [ field . name ] = field ;
299+ for ( const f of fields ) {
300+ fieldsMap [ f . name ] = f ;
159301 }
160302
161303 // Convert structures to plain JS object and process all its fields recursively
162304 if ( value instanceof StructRow ) {
163305 const result = value . toJSON ( ) ;
164306 for ( const key of Object . keys ( result ) ) {
165- const field : ArrowSchemaField | undefined = fieldsMap [ key ] ;
166- result [ key ] = this . convertArrowTypes ( result [ key ] , field ?. type , field ?. type . children || [ ] ) ;
307+ const childField : ArrowSchemaField | undefined = fieldsMap [ key ] ;
308+ result [ key ] = this . convertArrowTypes (
309+ result [ key ] ,
310+ childField ?. type ,
311+ childField ?. type . children || [ ] ,
312+ childField ,
313+ ) ;
167314 }
168315 return result ;
169316 }
170317 if ( value instanceof MapRow ) {
171318 const result = value . toJSON ( ) ;
172319 // Map type consists of its key and value types. We need only value type here, key will be cast to string anyway
173- const field = fieldsMap . entries ?. type . children . find ( ( item ) => item . name === 'value' ) ;
320+ const valueField = fieldsMap . entries ?. type . children . find ( ( item ) => item . name === 'value' ) ;
174321 for ( const key of Object . keys ( result ) ) {
175- result [ key ] = this . convertArrowTypes ( result [ key ] , field ?. type , field ?. type . children || [ ] ) ;
322+ result [ key ] = this . convertArrowTypes (
323+ result [ key ] ,
324+ valueField ?. type ,
325+ valueField ?. type . children || [ ] ,
326+ valueField ,
327+ ) ;
176328 }
177329 return result ;
178330 }
@@ -181,31 +333,67 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
181333 if ( value instanceof Vector ) {
182334 const result = value . toJSON ( ) ;
183335 // Array type contains the only child which defines a type of each array's element
184- const field = fieldsMap . element ;
185- return result . map ( ( item ) => this . convertArrowTypes ( item , field ?. type , field ?. type . children || [ ] ) ) ;
336+ const elementField = fieldsMap . element ;
337+ return result . map ( ( item ) =>
338+ this . convertArrowTypes ( item , elementField ?. type , elementField ?. type . children || [ ] , elementField ) ,
339+ ) ;
186340 }
187341
188342 if ( DataType . isTimestamp ( valueType ) ) {
189343 return new Date ( value ) ;
190344 }
191345
346+ // INTERVAL — Spark/Databricks SEA emits two flavours: native Arrow
347+ // `Interval[YearMonth]` / `Interval[DayTime]` (handled here) and
348+ // `Duration` (transparently rewritten to `Int64` upstream by
349+ // `SeaArrowIpcDurationFix.ts`; handled in the bigint/Int64 branch
350+ // below). In every case we coerce to the canonical thrift string
351+ // form so the SEA path is byte-identical with the thrift path:
352+ // YEAR-MONTH → `"Y-M"`
353+ // DAY-TIME → `"D HH:mm:ss.fffffffff"`
354+ if ( DataType . isInterval ( valueType ) ) {
355+ return formatArrowInterval ( value , valueType ) ;
356+ }
357+
192358 // Convert big number values to BigInt
193359 // Decimals are also represented as big numbers in Arrow, so additionally process them (convert to float)
194360 if ( value instanceof Object && value [ isArrowBigNumSymbol ] ) {
195361 const result = bigNumToBigInt ( value ) ;
196362 if ( DataType . isDecimal ( valueType ) ) {
197363 return Number ( result ) / 10 ** valueType . scale ;
198364 }
365+ // Duration columns rewritten to Int64 — detect via metadata.
366+ const durationUnit = field ?. metadata . get ( DURATION_UNIT_METADATA_KEY ) ;
367+ if ( durationUnit ) {
368+ return formatDurationToIntervalDayTime ( result , durationUnit ) ;
369+ }
199370 return result ;
200371 }
201372
202373 // Convert binary data to Buffer
203374 if ( value instanceof Uint8Array ) {
375+ // INTERVAL DAY-TIME / YEAR-MONTH that apache-arrow surfaced as
376+ // an Int32Array (size 2). `Uint8Array.isInstanceOf` is true for
377+ // every TypedArray subclass, so we have to check the parent type
378+ // first. The `DataType.isInterval` branch above already handles
379+ // the case where Arrow knew the field was an interval — this
380+ // fallback covers schemas where the interval surfaced as bare
381+ // bytes (defensive; not exercised in M0).
204382 return Buffer . from ( value ) ;
205383 }
206384
385+ // Bigint fallback — for raw bigints (not BigNum wrappers), the
386+ // duration_unit metadata also gates the INTERVAL DAY-TIME format.
387+ if ( typeof value === 'bigint' ) {
388+ const durationUnit = field ?. metadata . get ( DURATION_UNIT_METADATA_KEY ) ;
389+ if ( durationUnit ) {
390+ return formatDurationToIntervalDayTime ( value , durationUnit ) ;
391+ }
392+ return Number ( value ) ;
393+ }
394+
207395 // Return other values as is
208- return typeof value === 'bigint' ? Number ( value ) : value ;
396+ return value ;
209397 }
210398
211399 private convertThriftTypes ( record : Record < string , any > ) : any {
0 commit comments