Skip to content

Commit 334de79

Browse files
timsaucerclaude
andcommitted
Add ExplainFormat enum and format option to DataFrame.explain()
Extend the existing explain() method with an optional format parameter instead of adding a separate explain_with_options() method. This keeps the API simple while exposing all upstream ExplainOption functionality. Available formats: indent (default), tree, pgjson, graphviz. The ExplainFormat enum is exported from the top-level datafusion module. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d7d3a7a commit 334de79

File tree

4 files changed

+72
-5
lines changed

4 files changed

+72
-5
lines changed

crates/core/src/dataframe.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -804,9 +804,25 @@ impl PyDataFrame {
804804
}
805805

806806
/// Print the query plan
807-
#[pyo3(signature = (verbose=false, analyze=false))]
808-
fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyDataFusionResult<()> {
809-
let df = self.df.as_ref().clone().explain(verbose, analyze)?;
807+
#[pyo3(signature = (verbose=false, analyze=false, format=None))]
808+
fn explain(
809+
&self,
810+
py: Python,
811+
verbose: bool,
812+
analyze: bool,
813+
format: Option<&str>,
814+
) -> PyDataFusionResult<()> {
815+
let explain_format = match format {
816+
Some(f) => f
817+
.parse::<datafusion::common::format::ExplainFormat>()
818+
.map_err(|e| PyDataFusionError::Common(e.to_string()))?,
819+
None => datafusion::common::format::ExplainFormat::Indent,
820+
};
821+
let opts = datafusion::logical_expr::ExplainOption::default()
822+
.with_verbose(verbose)
823+
.with_analyze(analyze)
824+
.with_format(explain_format);
825+
let df = self.df.as_ref().clone().explain_with_options(opts)?;
810826
print_dataframe(py, df)
811827
}
812828

python/datafusion/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from .dataframe import (
4848
DataFrame,
4949
DataFrameWriteOptions,
50+
ExplainFormat,
5051
InsertOp,
5152
ParquetColumnOptions,
5253
ParquetWriterOptions,
@@ -82,6 +83,7 @@
8283
"DataFrameWriteOptions",
8384
"Database",
8485
"ExecutionPlan",
86+
"ExplainFormat",
8587
"Expr",
8688
"InsertOp",
8789
"LogicalPlan",

python/datafusion/dataframe.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,25 @@
6565
from enum import Enum
6666

6767

68+
class ExplainFormat(Enum):
69+
"""Output format for explain plans.
70+
71+
Controls how the query plan is rendered in :py:meth:`DataFrame.explain`.
72+
"""
73+
74+
INDENT = "indent"
75+
"""Default indented text format."""
76+
77+
TREE = "tree"
78+
"""Tree-style visual format with box-drawing characters."""
79+
80+
PGJSON = "pgjson"
81+
"""PostgreSQL-compatible JSON format for use with visualization tools."""
82+
83+
GRAPHVIZ = "graphviz"
84+
"""Graphviz DOT format for graph rendering."""
85+
86+
6887
# excerpt from deltalake
6988
# https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
7089
class Compression(Enum):
@@ -918,16 +937,24 @@ def join_on(
918937
exprs = [ensure_expr(expr) for expr in on_exprs]
919938
return DataFrame(self.df.join_on(right.df, exprs, how))
920939

921-
def explain(self, verbose: bool = False, analyze: bool = False) -> None:
940+
def explain(
941+
self,
942+
verbose: bool = False,
943+
analyze: bool = False,
944+
format: ExplainFormat | None = None,
945+
) -> None:
922946
"""Print an explanation of the DataFrame's plan so far.
923947
924948
If ``analyze`` is specified, runs the plan and reports metrics.
925949
926950
Args:
927951
verbose: If ``True``, more details will be included.
928952
analyze: If ``True``, the plan will run and metrics reported.
953+
format: Output format for the plan. Defaults to
954+
:py:attr:`ExplainFormat.INDENT`.
929955
"""
930-
self.df.explain(verbose, analyze)
956+
fmt = format.value if format is not None else None
957+
self.df.explain(verbose, analyze, fmt)
931958

932959
def logical_plan(self) -> LogicalPlan:
933960
"""Return the unoptimized ``LogicalPlan``.

python/tests/test_dataframe.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3632,3 +3632,25 @@ def test_sort_by():
36323632
df = ctx.from_pydict({"a": [3, 1, 2]})
36333633
result = df.sort_by(column("a")).collect()[0]
36343634
assert result.column(0).to_pylist() == [1, 2, 3]
3635+
3636+
3637+
def test_explain_with_format(capsys):
3638+
from datafusion import ExplainFormat
3639+
3640+
ctx = SessionContext()
3641+
df = ctx.from_pydict({"a": [1]})
3642+
3643+
# Default format works
3644+
df.explain()
3645+
captured = capsys.readouterr()
3646+
assert "plan_type" in captured.out
3647+
3648+
# Tree format produces box-drawing characters
3649+
df.explain(format=ExplainFormat.TREE)
3650+
captured = capsys.readouterr()
3651+
assert "\u250c" in captured.out or "plan_type" in captured.out
3652+
3653+
# Verbose + analyze still works with format
3654+
df.explain(verbose=True, analyze=True, format=ExplainFormat.INDENT)
3655+
captured = capsys.readouterr()
3656+
assert "plan_type" in captured.out

0 commit comments

Comments
 (0)