Skip to content

Commit bf21af2

Browse files
committed
Added support for custom timestamp format
1 parent 36fb376 commit bf21af2

File tree

8 files changed

+121
-3
lines changed

8 files changed

+121
-3
lines changed

src/databricks/sql/backend/sea/result_set.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def _convert_json_types(self, row: List[str]) -> List[Any]:
104104
column_name=column_name,
105105
precision=precision,
106106
scale=scale,
107+
timestamp_format=self.connection.non_arrow_timestamp_format,
107108
)
108109
converted_row.append(converted_value)
109110

src/databricks/sql/backend/sea/utils/conversion.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from dateutil import parser
1212
from typing import Callable, Dict, Optional
1313

14+
from databricks.sql.utils import parse_timestamp
15+
1416
logger = logging.getLogger(__name__)
1517

1618

@@ -162,6 +164,9 @@ def convert_value(
162164
precision = kwargs.get("precision", None)
163165
scale = kwargs.get("scale", None)
164166
return converter_func(value, precision, scale)
167+
elif sql_type == SqlType.TIMESTAMP:
168+
timestamp_format = kwargs.get("timestamp_format", None)
169+
return parse_timestamp(value, timestamp_format)
165170
else:
166171
return converter_func(value)
167172
except Exception as e:

src/databricks/sql/backend/thrift_backend.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,6 +1296,7 @@ def fetch_results(
12961296
description,
12971297
chunk_id: int,
12981298
use_cloud_fetch=True,
1299+
timestamp_format=None,
12991300
):
13001301
thrift_handle = command_id.to_thrift_handle()
13011302
if not thrift_handle:
@@ -1336,6 +1337,7 @@ def fetch_results(
13361337
statement_id=command_id.to_hex_guid(),
13371338
chunk_id=chunk_id,
13381339
http_client=self._http_client,
1340+
timestamp_format=timestamp_format,
13391341
)
13401342

13411343
return (

src/databricks/sql/client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ def read(self) -> Optional[OAuthToken]:
295295
self.disable_pandas = kwargs.get("_disable_pandas", False)
296296
self.lz4_compression = kwargs.get("enable_query_result_lz4_compression", True)
297297
self.use_cloud_fetch = kwargs.get("use_cloud_fetch", True)
298+
self.non_arrow_timestamp_format = kwargs.get("non_arrow_timestamp_format", None)
298299
self._cursors = [] # type: List[Cursor]
299300
self.telemetry_batch_size = kwargs.get(
300301
"telemetry_batch_size", TelemetryClientFactory.DEFAULT_BATCH_SIZE

src/databricks/sql/result_set.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ def __init__(
245245
statement_id=execute_response.command_id.to_hex_guid(),
246246
chunk_id=self.num_chunks,
247247
http_client=connection.http_client,
248+
timestamp_format=connection.non_arrow_timestamp_format,
248249
)
249250
if t_row_set.resultLinks:
250251
self.num_chunks += len(t_row_set.resultLinks)
@@ -281,6 +282,7 @@ def _fill_results_buffer(self):
281282
description=self.description,
282283
use_cloud_fetch=self._use_cloud_fetch,
283284
chunk_id=self.num_chunks,
285+
timestamp_format=self.connection.non_arrow_timestamp_format,
284286
)
285287
self.results = results
286288
self.has_more_rows = has_more_rows

src/databricks/sql/utils.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from decimal import Decimal
1111
from enum import Enum
1212
import re
13+
import pytz
1314

1415
import lz4.frame
1516

@@ -53,6 +54,32 @@ def get_session_config_value(
5354
return None
5455

5556

57+
def parse_timestamp(
58+
value: str, timestamp_format: Optional[str] = None
59+
) -> datetime.datetime:
60+
"""Parse a timestamp string into a datetime object.
61+
62+
If timestamp_format is provided, tries strptime first and falls back to
63+
dateutil.parser.parse on ValueError. If timestamp_format is None, uses
64+
dateutil.parser.parse directly.
65+
66+
Args:
67+
value: The timestamp string to parse.
68+
timestamp_format: An optional strptime-compatible format string.
69+
70+
Returns:
71+
A datetime.datetime object.
72+
"""
73+
if timestamp_format is not None:
74+
try:
75+
return datetime.datetime.strptime(value, timestamp_format).replace(
76+
tzinfo=pytz.UTC
77+
)
78+
except ValueError:
79+
return parser.parse(value)
80+
return parser.parse(value)
81+
82+
5683
class ResultSetQueue(ABC):
5784
@abstractmethod
5885
def next_n_rows(self, num_rows: int):
@@ -81,6 +108,7 @@ def build_queue(
81108
http_client,
82109
lz4_compressed: bool = True,
83110
description: List[Tuple] = [],
111+
timestamp_format: Optional[str] = None,
84112
) -> ResultSetQueue:
85113
"""
86114
Factory method to build a result set queue for Thrift backend.
@@ -93,6 +121,7 @@ def build_queue(
93121
description (List[List[Any]]): Hive table schema description.
94122
max_download_threads (int): Maximum number of downloader thread pool threads.
95123
ssl_options (SSLOptions): SSLOptions object for CloudFetchQueue
124+
timestamp_format: Optional strptime-compatible format for timestamp parsing.
96125
97126
Returns:
98127
ResultSetQueue
@@ -112,7 +141,7 @@ def build_queue(
112141
)
113142

114143
converted_column_table = convert_to_assigned_datatypes_in_column_table(
115-
column_table, description
144+
column_table, description, timestamp_format=timestamp_format
116145
)
117146

118147
return ColumnQueue(ColumnTable(converted_column_table, column_names))
@@ -760,7 +789,9 @@ def convert_decimals_in_arrow_table(table, description) -> "pyarrow.Table":
760789
return pyarrow.Table.from_arrays(new_columns, schema=new_schema)
761790

762791

763-
def convert_to_assigned_datatypes_in_column_table(column_table, description):
792+
def convert_to_assigned_datatypes_in_column_table(
793+
column_table, description, timestamp_format=None
794+
):
764795

765796
converted_column_table = []
766797
for i, col in enumerate(column_table):
@@ -774,7 +805,10 @@ def convert_to_assigned_datatypes_in_column_table(column_table, description):
774805
)
775806
elif description[i][1] == "timestamp":
776807
converted_column_table.append(
777-
tuple((v if v is None else parser.parse(v)) for v in col)
808+
tuple(
809+
(v if v is None else parse_timestamp(v, timestamp_format))
810+
for v in col
811+
)
778812
)
779813
else:
780814
converted_column_table.append(col)

tests/unit/test_sea_conversion.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,37 @@ def test_convert_unsupported_type(self):
147147
SqlTypeConverter.convert_value("complex_value", SqlType.STRUCT, None)
148148
== "complex_value"
149149
)
150+
151+
def test_convert_timestamp_with_format(self):
152+
"""Test converting timestamp with an explicit strptime format."""
153+
fmt = "%Y-%m-%d %H:%M:%S.%f"
154+
result = SqlTypeConverter.convert_value(
155+
"2023-12-31 12:30:00.123000",
156+
SqlType.TIMESTAMP,
157+
None,
158+
timestamp_format=fmt,
159+
)
160+
assert isinstance(result, datetime.datetime)
161+
assert result == datetime.datetime(2023, 12, 31, 12, 30, 0, 123000)
162+
163+
def test_convert_timestamp_with_format_fallback(self):
164+
"""Test that non-matching format falls back to dateutil."""
165+
fmt = "%Y-%m-%d %H:%M:%S.%f"
166+
result = SqlTypeConverter.convert_value(
167+
"08-Mar-2024 14:30:15",
168+
SqlType.TIMESTAMP,
169+
None,
170+
timestamp_format=fmt,
171+
)
172+
assert isinstance(result, datetime.datetime)
173+
assert result == datetime.datetime(2024, 3, 8, 14, 30, 15)
174+
175+
def test_convert_timestamp_without_format(self):
176+
"""Test converting timestamp without explicit format uses dateutil."""
177+
result = SqlTypeConverter.convert_value(
178+
"2023-01-15T12:30:45",
179+
SqlType.TIMESTAMP,
180+
None,
181+
)
182+
assert isinstance(result, datetime.datetime)
183+
assert result == datetime.datetime(2023, 1, 15, 12, 30, 45)

tests/unit/test_util.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55
from databricks.sql.utils import (
66
convert_to_assigned_datatypes_in_column_table,
7+
parse_timestamp,
78
ColumnTable,
89
concat_table_chunks,
910
serialize_query_tags,
@@ -224,3 +225,41 @@ def test_serialize_query_tags_all_none_values(self):
224225
query_tags = {"key1": None, "key2": None, "key3": None}
225226
result = serialize_query_tags(query_tags)
226227
assert result == "key1,key2,key3"
228+
229+
230+
class TestParseTimestamp:
231+
def test_no_format_uses_dateutil(self):
232+
result = parse_timestamp("2023-12-31 12:30:00")
233+
assert result == datetime.datetime(2023, 12, 31, 12, 30, 0)
234+
235+
def test_matching_format_uses_strptime(self):
236+
fmt = "%Y-%m-%d %H:%M:%S.%f"
237+
result = parse_timestamp("2023-12-31 12:30:00.123000", fmt)
238+
assert result == datetime.datetime(2023, 12, 31, 12, 30, 0, 123000)
239+
240+
def test_non_matching_format_falls_back_to_dateutil(self):
241+
fmt = "%Y-%m-%d %H:%M:%S.%f"
242+
# This doesn't match the format, so should fall back to dateutil
243+
result = parse_timestamp("08-Mar-2024 14:30:15", fmt)
244+
assert result == datetime.datetime(2024, 3, 8, 14, 30, 15)
245+
246+
def test_convert_column_table_with_timestamp_format(self):
247+
description = [
248+
("ts_col", "timestamp", None, None, None, None, None),
249+
]
250+
column_table = [("2023-12-31 12:30:00.000000",)]
251+
fmt = "%Y-%m-%d %H:%M:%S.%f"
252+
result = convert_to_assigned_datatypes_in_column_table(
253+
column_table, description, timestamp_format=fmt
254+
)
255+
assert result[0][0] == datetime.datetime(2023, 12, 31, 12, 30, 0)
256+
257+
def test_convert_column_table_without_timestamp_format(self):
258+
description = [
259+
("ts_col", "timestamp", None, None, None, None, None),
260+
]
261+
column_table = [("2023-12-31 12:30:00",)]
262+
result = convert_to_assigned_datatypes_in_column_table(
263+
column_table, description
264+
)
265+
assert result[0][0] == datetime.datetime(2023, 12, 31, 12, 30, 0)

0 commit comments

Comments
 (0)