Skip to content

Commit 5b592dc

Browse files
timsaucerclaude
andcommitted
Add unit tests for new array/list functions and aliases
Tests cover all functions and aliases added in the previous commit: array_any_value, array_distance, array_max, array_min, array_reverse, arrays_zip, string_to_array, gen_series, generate_series, array_contains, list_contains, list_empty, list_pop_back, list_pop_front, list_has, list_has_all, list_has_any, and list_* aliases for the new functions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3fd8b16 commit 5b592dc

File tree

1 file changed

+189
-0
lines changed

1 file changed

+189
-0
lines changed

python/tests/test_functions.py

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,3 +1435,192 @@ def test_coalesce(df):
14351435
assert result.column(0) == pa.array(
14361436
["Hello", "fallback", "!"], type=pa.string_view()
14371437
)
1438+
1439+
1440+
def test_array_any_value():
1441+
ctx = SessionContext()
1442+
df = ctx.from_pydict({"a": [[None, 2, 3], [None, None, None], [1, 2, 3]]})
1443+
result = df.select(f.array_any_value(column("a")).alias("v")).collect()
1444+
values = [row.as_py() for row in result[0].column(0)]
1445+
assert values[0] == 2
1446+
assert values[1] is None
1447+
assert values[2] == 1
1448+
1449+
1450+
def test_list_any_value():
1451+
ctx = SessionContext()
1452+
df = ctx.from_pydict({"a": [[None, 5]]})
1453+
result = df.select(f.list_any_value(column("a")).alias("v")).collect()
1454+
assert result[0].column(0)[0].as_py() == 5
1455+
1456+
1457+
def test_array_distance():
1458+
ctx = SessionContext()
1459+
df = ctx.from_pydict({"a": [[1.0, 2.0]], "b": [[1.0, 4.0]]})
1460+
result = df.select(f.array_distance(column("a"), column("b")).alias("v")).collect()
1461+
assert result[0].column(0)[0].as_py() == pytest.approx(2.0)
1462+
1463+
1464+
def test_list_distance():
1465+
ctx = SessionContext()
1466+
df = ctx.from_pydict({"a": [[3.0, 0.0]], "b": [[0.0, 4.0]]})
1467+
result = df.select(f.list_distance(column("a"), column("b")).alias("v")).collect()
1468+
assert result[0].column(0)[0].as_py() == pytest.approx(5.0)
1469+
1470+
1471+
def test_array_max():
1472+
ctx = SessionContext()
1473+
df = ctx.from_pydict({"a": [[1, 5, 3], [10, 2]]})
1474+
result = df.select(f.array_max(column("a")).alias("v")).collect()
1475+
values = [row.as_py() for row in result[0].column(0)]
1476+
assert values == [5, 10]
1477+
1478+
1479+
def test_list_max():
1480+
ctx = SessionContext()
1481+
df = ctx.from_pydict({"a": [[7, 2, 9]]})
1482+
result = df.select(f.list_max(column("a")).alias("v")).collect()
1483+
assert result[0].column(0)[0].as_py() == 9
1484+
1485+
1486+
def test_array_min():
1487+
ctx = SessionContext()
1488+
df = ctx.from_pydict({"a": [[1, 5, 3], [10, 2]]})
1489+
result = df.select(f.array_min(column("a")).alias("v")).collect()
1490+
values = [row.as_py() for row in result[0].column(0)]
1491+
assert values == [1, 2]
1492+
1493+
1494+
def test_list_min():
1495+
ctx = SessionContext()
1496+
df = ctx.from_pydict({"a": [[7, 2, 9]]})
1497+
result = df.select(f.list_min(column("a")).alias("v")).collect()
1498+
assert result[0].column(0)[0].as_py() == 2
1499+
1500+
1501+
def test_array_reverse():
1502+
ctx = SessionContext()
1503+
df = ctx.from_pydict({"a": [[1, 2, 3], [4, 5]]})
1504+
result = df.select(f.array_reverse(column("a")).alias("v")).collect()
1505+
values = [row.as_py() for row in result[0].column(0)]
1506+
assert values == [[3, 2, 1], [5, 4]]
1507+
1508+
1509+
def test_list_reverse():
1510+
ctx = SessionContext()
1511+
df = ctx.from_pydict({"a": [[10, 20, 30]]})
1512+
result = df.select(f.list_reverse(column("a")).alias("v")).collect()
1513+
assert result[0].column(0)[0].as_py() == [30, 20, 10]
1514+
1515+
1516+
def test_arrays_zip():
1517+
ctx = SessionContext()
1518+
df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
1519+
result = df.select(f.arrays_zip(column("a"), column("b")).alias("v")).collect()
1520+
values = result[0].column(0)[0].as_py()
1521+
assert values == [{"c0": 1, "c1": 3}, {"c0": 2, "c1": 4}]
1522+
1523+
1524+
def test_list_zip():
1525+
ctx = SessionContext()
1526+
df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
1527+
result = df.select(f.list_zip(column("a"), column("b")).alias("v")).collect()
1528+
values = result[0].column(0)[0].as_py()
1529+
assert values == [{"c0": 1, "c1": 3}, {"c0": 2, "c1": 4}]
1530+
1531+
1532+
def test_string_to_array():
1533+
ctx = SessionContext()
1534+
df = ctx.from_pydict({"a": ["hello,world,foo"]})
1535+
result = df.select(
1536+
f.string_to_array(column("a"), literal(","), literal("")).alias("v")
1537+
).collect()
1538+
assert result[0].column(0)[0].as_py() == ["hello", "world", "foo"]
1539+
1540+
1541+
def test_string_to_list():
1542+
ctx = SessionContext()
1543+
df = ctx.from_pydict({"a": ["a-b-c"]})
1544+
result = df.select(
1545+
f.string_to_list(column("a"), literal("-"), literal("")).alias("v")
1546+
).collect()
1547+
assert result[0].column(0)[0].as_py() == ["a", "b", "c"]
1548+
1549+
1550+
def test_gen_series():
1551+
ctx = SessionContext()
1552+
df = ctx.from_pydict({"a": [0]})
1553+
result = df.select(
1554+
f.gen_series(literal(1), literal(5), literal(1)).alias("v")
1555+
).collect()
1556+
assert result[0].column(0)[0].as_py() == [1, 2, 3, 4, 5]
1557+
1558+
1559+
def test_generate_series():
1560+
ctx = SessionContext()
1561+
df = ctx.from_pydict({"a": [0]})
1562+
result = df.select(
1563+
f.generate_series(literal(1), literal(3), literal(1)).alias("v")
1564+
).collect()
1565+
assert result[0].column(0)[0].as_py() == [1, 2, 3]
1566+
1567+
1568+
def test_array_contains():
1569+
ctx = SessionContext()
1570+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1571+
result = df.select(f.array_contains(column("a"), literal(2)).alias("v")).collect()
1572+
assert result[0].column(0)[0].as_py() is True
1573+
1574+
1575+
def test_list_contains():
1576+
ctx = SessionContext()
1577+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1578+
result = df.select(f.list_contains(column("a"), literal(99)).alias("v")).collect()
1579+
assert result[0].column(0)[0].as_py() is False
1580+
1581+
1582+
def test_list_empty():
1583+
ctx = SessionContext()
1584+
df = ctx.from_pydict({"a": [[], [1, 2]]})
1585+
result = df.select(f.list_empty(column("a")).alias("v")).collect()
1586+
values = [row.as_py() for row in result[0].column(0)]
1587+
assert values == [True, False]
1588+
1589+
1590+
def test_list_pop_back():
1591+
ctx = SessionContext()
1592+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1593+
result = df.select(f.list_pop_back(column("a")).alias("v")).collect()
1594+
assert result[0].column(0)[0].as_py() == [1, 2]
1595+
1596+
1597+
def test_list_pop_front():
1598+
ctx = SessionContext()
1599+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1600+
result = df.select(f.list_pop_front(column("a")).alias("v")).collect()
1601+
assert result[0].column(0)[0].as_py() == [2, 3]
1602+
1603+
1604+
def test_list_has():
1605+
ctx = SessionContext()
1606+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1607+
result = df.select(f.list_has(column("a"), literal(2)).alias("v")).collect()
1608+
assert result[0].column(0)[0].as_py() is True
1609+
1610+
1611+
def test_list_has_all():
1612+
ctx = SessionContext()
1613+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1614+
result = df.select(
1615+
f.list_has_all(column("a"), f.make_array(literal(1), literal(2))).alias("v")
1616+
).collect()
1617+
assert result[0].column(0)[0].as_py() is True
1618+
1619+
1620+
def test_list_has_any():
1621+
ctx = SessionContext()
1622+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1623+
result = df.select(
1624+
f.list_has_any(column("a"), f.make_array(literal(5), literal(2))).alias("v")
1625+
).collect()
1626+
assert result[0].column(0)[0].as_py() is True

0 commit comments

Comments
 (0)