Skip to content

Commit 5b11478

Browse files
committed
test: add Test I SLT for TopK stats init + cumulative prune (no-WHERE path)
Test I.1: DESC LIMIT — EXPLAIN shows DynamicFilter [ empty ] + sort_order_for_reorder + reverse_row_groups=true (sort pushdown fires) Test I.2: DESC LIMIT result correctness Test I.3: ASC LIMIT in same direction as file — Exact path, SortExec eliminated, limit becomes static fetch on source Test I.4: DESC LIMIT with WHERE — stats init and cumulative prune both skip (predicate is not a bare DynamicFilter), result still correct via dynamic filter pushdown Test I.5: Larger LIMIT spanning multiple RGs Test I.6: LIMIT larger than total rows — returns all rows
1 parent b692306 commit 5b11478

1 file changed

Lines changed: 120 additions & 0 deletions

File tree

datafusion/sqllogictest/test_files/sort_pushdown.slt

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2392,6 +2392,126 @@ DROP TABLE th_mixed;
23922392
statement ok
23932393
DROP TABLE th_reorder;
23942394

2395+
# ===========================================================
2396+
# Test I: WITH ORDER + DESC LIMIT — stats init + cumulative prune
2397+
# Exercises the full optimisation chain on sorted non-overlapping data:
2398+
# file reorder → RG reorder → reverse → stats init → cumulative prune.
2399+
# Both stats init and cumulative prune are gated on no-WHERE (the
2400+
# predicate handed to the opener is a bare DynamicFilter) and on sort
2401+
# pushdown (sort_order_for_reorder is set).
2402+
# ===========================================================
2403+
2404+
statement ok
2405+
SET datafusion.execution.target_partitions = 1;
2406+
2407+
statement ok
2408+
SET datafusion.optimizer.enable_sort_pushdown = true;
2409+
2410+
# Create sorted data spread across multiple small row groups
2411+
statement ok
2412+
CREATE TABLE ti_data(id INT, value INT) AS VALUES
2413+
(1,10),(2,20),(3,30),(4,40),(5,50),(6,60),
2414+
(7,70),(8,80),(9,90),(10,100),(11,110),(12,120);
2415+
2416+
statement ok
2417+
SET datafusion.execution.parquet.max_row_group_size = 3;
2418+
2419+
query I
2420+
COPY (SELECT * FROM ti_data ORDER BY id ASC)
2421+
TO 'test_files/scratch/sort_pushdown/ti_sorted/data.parquet';
2422+
----
2423+
12
2424+
2425+
statement ok
2426+
SET datafusion.execution.parquet.max_row_group_size = 1048576;
2427+
2428+
statement ok
2429+
CREATE EXTERNAL TABLE ti_sorted(id INT, value INT)
2430+
STORED AS PARQUET
2431+
LOCATION 'test_files/scratch/sort_pushdown/ti_sorted/data.parquet'
2432+
WITH ORDER (id ASC);
2433+
2434+
# Test I.1: DESC LIMIT with sort pushdown — EXPLAIN shows reverse_row_groups=true
2435+
query TT
2436+
EXPLAIN SELECT * FROM ti_sorted ORDER BY id DESC LIMIT 3;
2437+
----
2438+
logical_plan
2439+
01)Sort: ti_sorted.id DESC NULLS FIRST, fetch=3
2440+
02)--TableScan: ti_sorted projection=[id, value]
2441+
physical_plan
2442+
01)SortExec: TopK(fetch=3), expr=[id@0 DESC], preserve_partitioning=[false]
2443+
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/ti_sorted/data.parquet]]}, projection=[id, value], file_type=parquet, predicate=DynamicFilter [ empty ], sort_order_for_reorder=[id@0 DESC], reverse_row_groups=true
2444+
2445+
# Test I.2: DESC LIMIT results — should return the largest values
2446+
query II
2447+
SELECT * FROM ti_sorted ORDER BY id DESC LIMIT 3;
2448+
----
2449+
12 120
2450+
11 110
2451+
10 100
2452+
2453+
# Test I.3: ASC LIMIT (same direction as file order) — sort elimination,
2454+
# no TopK in the plan
2455+
query TT
2456+
EXPLAIN SELECT * FROM ti_sorted ORDER BY id ASC LIMIT 3;
2457+
----
2458+
logical_plan
2459+
01)Sort: ti_sorted.id ASC NULLS LAST, fetch=3
2460+
02)--TableScan: ti_sorted projection=[id, value]
2461+
physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/sort_pushdown/ti_sorted/data.parquet]]}, projection=[id, value], limit=3, output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
2462+
2463+
query II
2464+
SELECT * FROM ti_sorted ORDER BY id ASC LIMIT 3;
2465+
----
2466+
1 10
2467+
2 20
2468+
3 30
2469+
2470+
# Test I.4: DESC LIMIT with WHERE — stats init and cumulative prune both
2471+
# skip (predicate is not a bare DynamicFilter), result still correct
2472+
query II
2473+
SELECT * FROM ti_sorted WHERE value > 50 ORDER BY id DESC LIMIT 2;
2474+
----
2475+
12 120
2476+
11 110
2477+
2478+
# Test I.5: Larger LIMIT spanning multiple RGs (4 RGs of 3 rows each)
2479+
query II
2480+
SELECT * FROM ti_sorted ORDER BY id DESC LIMIT 8;
2481+
----
2482+
12 120
2483+
11 110
2484+
10 100
2485+
9 90
2486+
8 80
2487+
7 70
2488+
6 60
2489+
5 50
2490+
2491+
# Test I.6: LIMIT larger than total rows — returns all rows
2492+
query II
2493+
SELECT * FROM ti_sorted ORDER BY id DESC LIMIT 100;
2494+
----
2495+
12 120
2496+
11 110
2497+
10 100
2498+
9 90
2499+
8 80
2500+
7 70
2501+
6 60
2502+
5 50
2503+
4 40
2504+
3 30
2505+
2 20
2506+
1 10
2507+
2508+
# Cleanup Test I
2509+
statement ok
2510+
DROP TABLE ti_data;
2511+
2512+
statement ok
2513+
DROP TABLE ti_sorted;
2514+
23952515
# ===========================================================
23962516
# Test J: Non-overlapping RGs without WITH ORDER —
23972517
# RG reorder via DynamicFilter sort_options

0 commit comments

Comments
 (0)