Skip to content

Commit 192593f

Browse files
timsaucerclaude
andcommitted
Add tests for new scalar functions
Tests for get_field, arrow_metadata, version, row, union_tag, and union_extract. Also fix codespell skip paths in pyproject.toml. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3a364d5 commit 192593f

File tree

2 files changed

+73
-3
lines changed

2 files changed

+73
-3
lines changed

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,10 @@ extend-allowed-calls = ["datafusion.lit", "lit"]
172172

173173
[tool.codespell]
174174
skip = [
175-
"./python/tests/test_functions.py",
176-
"./target",
175+
"python/tests/test_functions.py",
176+
"target",
177177
"uv.lock",
178-
"./examples/tpch/answers_sf1/*",
178+
"examples/tpch/answers_sf1/*",
179179
]
180180
count = true
181181
ignore-words-list = ["IST", "ans"]

python/tests/test_functions.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,3 +1435,73 @@ def test_coalesce(df):
14351435
assert result.column(0) == pa.array(
14361436
["Hello", "fallback", "!"], type=pa.string_view()
14371437
)
1438+
1439+
1440+
def test_get_field(df):
1441+
df = df.with_column(
1442+
"s",
1443+
f.named_struct(
1444+
[
1445+
("x", column("a")),
1446+
("y", column("b")),
1447+
]
1448+
),
1449+
)
1450+
result = df.select(
1451+
f.get_field(column("s"), string_literal("x")).alias("x_val"),
1452+
f.get_field(column("s"), string_literal("y")).alias("y_val"),
1453+
).collect()[0]
1454+
1455+
assert result.column(0) == pa.array(["Hello", "World", "!"], type=pa.string_view())
1456+
assert result.column(1) == pa.array([4, 5, 6])
1457+
1458+
1459+
def test_arrow_metadata(df):
1460+
result = df.select(
1461+
f.arrow_metadata(column("a")).alias("meta"),
1462+
).collect()[0]
1463+
# The metadata column should be returned as a map type (possibly empty)
1464+
assert result.column(0).type == pa.map_(pa.utf8(), pa.utf8())
1465+
1466+
1467+
def test_version():
1468+
ctx = SessionContext()
1469+
df = ctx.from_pydict({"a": [1]})
1470+
result = df.select(f.version().alias("v")).collect()[0]
1471+
version_str = result.column(0)[0].as_py()
1472+
assert "Apache DataFusion" in version_str
1473+
1474+
1475+
def test_row(df):
1476+
result = df.select(
1477+
f.row(column("a"), column("b")).alias("r"),
1478+
f.struct(column("a"), column("b")).alias("s"),
1479+
).collect()[0]
1480+
# row is an alias for struct, so they should produce the same output
1481+
assert result.column(0) == result.column(1)
1482+
1483+
1484+
def test_union_tag():
1485+
ctx = SessionContext()
1486+
types = pa.array([0, 1, 0], type=pa.int8())
1487+
offsets = pa.array([0, 0, 1], type=pa.int32())
1488+
children = [pa.array([1, 2]), pa.array(["hello"])]
1489+
arr = pa.UnionArray.from_dense(types, offsets, children, ["int", "str"], [0, 1])
1490+
df = ctx.create_dataframe([[pa.RecordBatch.from_arrays([arr], names=["u"])]])
1491+
1492+
result = df.select(f.union_tag(column("u")).alias("tag")).collect()[0]
1493+
assert result.column(0).to_pylist() == ["int", "str", "int"]
1494+
1495+
1496+
def test_union_extract():
1497+
ctx = SessionContext()
1498+
types = pa.array([0, 1, 0], type=pa.int8())
1499+
offsets = pa.array([0, 0, 1], type=pa.int32())
1500+
children = [pa.array([1, 2]), pa.array(["hello"])]
1501+
arr = pa.UnionArray.from_dense(types, offsets, children, ["int", "str"], [0, 1])
1502+
df = ctx.create_dataframe([[pa.RecordBatch.from_arrays([arr], names=["u"])]])
1503+
1504+
result = df.select(
1505+
f.union_extract(column("u"), string_literal("int")).alias("val")
1506+
).collect()[0]
1507+
assert result.column(0).to_pylist() == [1, None, 2]

0 commit comments

Comments
 (0)