Skip to content

Commit 4ac598f

Browse files
authored
Merge branch 'main' into fix/local-only-format
2 parents 419d277 + 8e65b27 commit 4ac598f

7 files changed

Lines changed: 117 additions & 1 deletion

File tree

docs/integrations/engines/duckdb.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme
7979
type: ducklake
8080
path: 'catalog.ducklake'
8181
data_path: data/ducklake
82+
override_data_path: true
8283
encrypted: True
8384
data_inlining_row_limit: 10
8485
metadata_schema: main
@@ -105,6 +106,7 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme
105106
type="ducklake",
106107
path="catalog.ducklake",
107108
data_path="data/ducklake",
109+
override_data_path=False,
108110
encrypted=True,
109111
data_inlining_row_limit=10,
110112
metadata_schema="main",
@@ -120,6 +122,7 @@ SQLMesh will place models with the explicit catalog "ephemeral", such as `epheme
120122

121123
- `path`: Path to the DuckLake catalog file
122124
- `data_path`: Path where DuckLake data files are stored
125+
- `override_data_path`: Whether data_override_path option is set
123126
- `encrypted`: Whether to enable encryption for the catalog (default: `False`)
124127
- `data_inlining_row_limit`: Maximum number of rows to inline in the catalog (default: `0`)
125128
- `metadata_schema`: The schema in the catalog server in which to store the DuckLake metadata tables (default: `main`)
@@ -364,6 +367,7 @@ The `filesystems` accepts a list of file systems to register in the DuckDB conne
364367
type: ducklake
365368
path: myducklakecatalog.duckdb
366369
data_path: abfs://MyFabricWorkspace/MyFabricLakehouse.Lakehouse/Files/DuckLake.Files
370+
override_data_path: False
367371
extensions:
368372
- ducklake
369373
filesystems:

sqlmesh/core/config/connection.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ class DuckDBAttachOptions(BaseConfig):
238238

239239
# DuckLake specific options
240240
data_path: t.Optional[str] = None
241+
override_data_path: t.Optional[bool] = False
241242
encrypted: bool = False
242243
data_inlining_row_limit: t.Optional[int] = None
243244
metadata_schema: t.Optional[str] = None
@@ -258,6 +259,8 @@ def to_sql(self, alias: str) -> str:
258259
path = f"ducklake:{path}"
259260
if self.data_path is not None:
260261
options.append(f"DATA_PATH '{self.data_path}'")
262+
if self.override_data_path:
263+
options.append("OVERRIDE_DATA_PATH true")
261264
if self.encrypted:
262265
options.append("ENCRYPTED")
263266
if self.data_inlining_row_limit is not None:
@@ -2097,6 +2100,7 @@ class ClickhouseConnectionConfig(ConnectionConfig):
20972100
https_proxy: t.Optional[str] = None
20982101
server_host_name: t.Optional[str] = None
20992102
tls_mode: t.Optional[str] = None
2103+
secure: bool = False
21002104

21012105
concurrent_tasks: int = 1
21022106
register_comments: bool = True
@@ -2133,6 +2137,7 @@ def _connection_kwargs_keys(self) -> t.Set[str]:
21332137
"https_proxy",
21342138
"server_host_name",
21352139
"tls_mode",
2140+
"secure",
21362141
}
21372142
return kwargs
21382143

sqlmesh/core/dialect.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,8 @@ def format_model_expressions(
774774
if rewrite_casts:
775775

776776
def cast_to_colon(node: exp.Expr) -> exp.Expr:
777-
if isinstance(node, exp.Cast) and not any(
777+
# Directly check type instead of isinstance to avoid rewriting subclasses of CAST, e.g. JSONCast
778+
if type(node) is exp.Cast and not any(
778779
# Only convert CAST into :: if it doesn't have additional args set, otherwise this
779780
# conversion could alter the semantics (eg. changing SAFE_CAST in BigQuery to CAST)
780781
arg

sqlmesh/core/engine_adapter/databricks.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,3 +411,27 @@ def _build_column_defs(
411411
return super()._build_column_defs(
412412
target_columns_to_types, column_descriptions, is_view, materialized
413413
)
414+
415+
def columns(
416+
self, table_name: TableName, include_pseudo_columns: bool = False
417+
) -> t.Dict[str, exp.DataType]:
418+
table = exp.to_table(table_name)
419+
420+
column_catalog = table.catalog or self.get_current_catalog()
421+
query = (
422+
exp.select("columns.column_name", "columns.full_data_type")
423+
.from_("system.information_schema.columns")
424+
.where(
425+
exp.and_(
426+
exp.column("table_name").eq(table.name),
427+
exp.column("table_schema").eq(table.db),
428+
exp.column("table_catalog").eq(column_catalog),
429+
)
430+
)
431+
.order_by("ordinal_position ASC")
432+
)
433+
434+
self.cursor.execute(query)
435+
result = self.cursor.fetchall()
436+
437+
return {row[0]: exp.DataType.build(row[1], dialect=self.dialect) for row in result}

tests/core/engine_adapter/test_databricks.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,3 +526,61 @@ def test_drop_data_object_materialized_view_calls_correct_drop(mocker: MockFixtu
526526
drop_view_mock.assert_called_once_with(
527527
mv_data_object.to_table(), ignore_if_not_exists=True, materialized=True
528528
)
529+
530+
531+
def test_columns(mocker: MockFixture, make_mocked_engine_adapter: t.Callable):
532+
adapter = make_mocked_engine_adapter(DatabricksEngineAdapter, default_catalog="test_catalog")
533+
534+
# Override/mock get_current_catalog to return default
535+
current_catalog_mock = mocker.patch.object(
536+
adapter, "get_current_catalog", return_value="test_catalog"
537+
)
538+
# create long struct columns datatype
539+
long_struct_cols = [f"a_{i}:int" for i in range(50)]
540+
adapter.cursor.fetchall.return_value = [
541+
("bigint_col", "bigint"),
542+
("binary_col", "binary"),
543+
("boolean_col", "boolean"),
544+
("date_col", "date"),
545+
("decimal_col", "decimal(38,4)"),
546+
("double_col", "double"),
547+
("float_col", "float"),
548+
("int_col", "int"),
549+
("small_int", "smallint"),
550+
("string_col", "string"),
551+
("timestamp_col", "timestamp"),
552+
("timestamp_ntz_col", "timestamp_ntz"),
553+
("tinyint_col", "tinyint"),
554+
("array_col", "array<int>"),
555+
("simple_struct_col", "struct<a:int,b:string>"),
556+
("long_struct_col", f"struct<{','.join(long_struct_cols)}>"),
557+
]
558+
559+
resp = adapter.columns("test_db.test_table")
560+
assert resp == {
561+
"bigint_col": exp.DataType.build("bigint", dialect=adapter.dialect),
562+
"binary_col": exp.DataType.build("binary", dialect=adapter.dialect),
563+
"boolean_col": exp.DataType.build("boolean", dialect=adapter.dialect),
564+
"date_col": exp.DataType.build("date", dialect=adapter.dialect),
565+
"decimal_col": exp.DataType.build("decimal(38,4)", dialect=adapter.dialect),
566+
"double_col": exp.DataType.build("double", dialect=adapter.dialect),
567+
"float_col": exp.DataType.build("float", dialect=adapter.dialect),
568+
"int_col": exp.DataType.build("int", dialect=adapter.dialect),
569+
"small_int": exp.DataType.build("smallint", dialect=adapter.dialect),
570+
"string_col": exp.DataType.build("string", dialect=adapter.dialect),
571+
"timestamp_col": exp.DataType.build("timestamp", dialect=adapter.dialect),
572+
"timestamp_ntz_col": exp.DataType.build("timestamp_ntz", dialect=adapter.dialect),
573+
"tinyint_col": exp.DataType.build("tinyint", dialect=adapter.dialect),
574+
"array_col": exp.DataType.build("array<int>", dialect=adapter.dialect),
575+
"simple_struct_col": exp.DataType.build("struct<a:int,b:string>", dialect=adapter.dialect),
576+
"long_struct_col": exp.DataType.build(
577+
f"struct<{','.join(long_struct_cols)}>", dialect=adapter.dialect
578+
),
579+
}
580+
581+
adapter.cursor.execute.assert_called_once_with(
582+
parse_one(
583+
"""SELECT columns.column_name, columns.full_data_type FROM system.information_schema.columns WHERE table_name = 'test_table' AND table_schema = 'test_db' AND table_catalog = 'test_catalog' ORDER BY ordinal_position ASC""",
584+
dialect="databricks",
585+
)
586+
)

tests/core/test_connection_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,7 @@ def test_duckdb_attach_ducklake_catalog(make_config):
810810
type="ducklake",
811811
path="catalog.ducklake",
812812
data_path="/tmp/ducklake_data",
813+
override_data_path=False,
813814
encrypted=True,
814815
data_inlining_row_limit=10,
815816
),

tests/core/test_dialect.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,29 @@ def test_format_model_expressions():
207207
SAFE_CAST('bla' AS INT64) AS FOO"""
208208
)
209209

210+
x = format_model_expressions(
211+
parse(
212+
"""
213+
MODEL(name a.b, kind FULL, dialect clickhouse);
214+
SELECT data.:String AS foo, CAST(1 AS INT) AS bar
215+
"""
216+
),
217+
dialect="clickhouse",
218+
)
219+
# JSONCast (e.g. `.:` syntax in ClickHouse) must not be written to `::`
220+
assert (
221+
x
222+
== """MODEL (
223+
name a.b,
224+
kind FULL,
225+
dialect clickhouse
226+
);
227+
228+
SELECT
229+
data.:String AS foo,
230+
1::Int32 AS bar"""
231+
)
232+
210233
x = format_model_expressions(
211234
parse(
212235
"""

0 commit comments

Comments
 (0)