Skip to content

Commit 6cea061

Browse files
authored
Update remaining existing examples to make testable/standalone executable (#1437)
* Move example to doctestable examples for context.py * Add more standard dafusion namespaces to reduce clutter * Update project to use ruff compatible with pre-commit version * Resolve ruff errors for newer version but just ignore them * Convert dataframe examples to doctestable. Found bug in dropping A * Move expr.py to doctestable examples * Move user_defined.py to doctestable examples
1 parent 876646d commit 6cea061

File tree

10 files changed

+238
-167
lines changed

10 files changed

+238
-167
lines changed

conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,15 @@
2020
import datafusion as dfn
2121
import numpy as np
2222
import pytest
23+
from datafusion import col, lit
24+
from datafusion import functions as F
2325

2426

2527
@pytest.fixture(autouse=True)
2628
def _doctest_namespace(doctest_namespace: dict) -> None:
2729
"""Add common imports to the doctest namespace."""
2830
doctest_namespace["dfn"] = dfn
2931
doctest_namespace["np"] = np
32+
doctest_namespace["col"] = col
33+
doctest_namespace["lit"] = lit
34+
doctest_namespace["F"] = F

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ ignore = [
8989
"FIX002", # Allow TODO lines - consider removing at some point
9090
"ISC001", # Recommended to ignore these rules when using with ruff-format
9191
"N812", # Allow importing functions as `F`
92-
"PD901", # Allow variable name df
9392
"PLR0913", # Allow many arguments in function definition
9493
"SLF001", # Allow accessing private members
9594
"TD002", # Do not require author names in TODO statements
@@ -195,7 +194,7 @@ dev = [
195194
"pytest-asyncio>=0.23.3",
196195
"pytest>=7.4.4",
197196
"pyyaml>=6.0.3",
198-
"ruff>=0.9.1",
197+
"ruff>=0.15.1",
199198
"toml>=0.10.2",
200199
]
201200
docs = [

python/datafusion/context.py

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,8 @@ def with_fair_spill_pool(self, size: int) -> RuntimeEnvBuilder:
371371
Returns:
372372
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
373373
374-
Examples usage::
375-
376-
config = RuntimeEnvBuilder().with_fair_spill_pool(1024)
374+
Examples:
375+
>>> config = dfn.RuntimeEnvBuilder().with_fair_spill_pool(1024)
377376
"""
378377
self.config_internal = self.config_internal.with_fair_spill_pool(size)
379378
return self
@@ -391,9 +390,8 @@ def with_greedy_memory_pool(self, size: int) -> RuntimeEnvBuilder:
391390
Returns:
392391
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
393392
394-
Example usage::
395-
396-
config = RuntimeEnvBuilder().with_greedy_memory_pool(1024)
393+
Examples:
394+
>>> config = dfn.RuntimeEnvBuilder().with_greedy_memory_pool(1024)
397395
"""
398396
self.config_internal = self.config_internal.with_greedy_memory_pool(size)
399397
return self
@@ -407,9 +405,8 @@ def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeEnvBuilder:
407405
Returns:
408406
A new :py:class:`RuntimeEnvBuilder` object with the updated setting.
409407
410-
Example usage::
411-
412-
config = RuntimeEnvBuilder().with_temp_file_path("/tmp")
408+
Examples:
409+
>>> config = dfn.RuntimeEnvBuilder().with_temp_file_path("/tmp")
413410
"""
414411
self.config_internal = self.config_internal.with_temp_file_path(str(path))
415412
return self
@@ -444,9 +441,8 @@ def with_allow_ddl(self, allow: bool = True) -> SQLOptions:
444441
Returns:
445442
A new :py:class:`SQLOptions` object with the updated setting.
446443
447-
Example usage::
448-
449-
options = SQLOptions().with_allow_ddl(True)
444+
Examples:
445+
>>> options = dfn.SQLOptions().with_allow_ddl(True)
450446
"""
451447
self.options_internal = self.options_internal.with_allow_ddl(allow)
452448
return self
@@ -462,9 +458,8 @@ def with_allow_dml(self, allow: bool = True) -> SQLOptions:
462458
Returns:
463459
A new :py:class:`SQLOptions` object with the updated setting.
464460
465-
Example usage::
466-
467-
options = SQLOptions().with_allow_dml(True)
461+
Examples:
462+
>>> options = dfn.SQLOptions().with_allow_dml(True)
468463
"""
469464
self.options_internal = self.options_internal.with_allow_dml(allow)
470465
return self
@@ -478,9 +473,8 @@ def with_allow_statements(self, allow: bool = True) -> SQLOptions:
478473
Returns:
479474
A new :py:class:SQLOptions` object with the updated setting.
480475
481-
Example usage::
482-
483-
options = SQLOptions().with_allow_statements(True)
476+
Examples:
477+
>>> options = dfn.SQLOptions().with_allow_statements(True)
484478
"""
485479
self.options_internal = self.options_internal.with_allow_statements(allow)
486480
return self

python/datafusion/dataframe.py

Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def into_view(self, temporary: bool = False) -> Table:
331331
>>> result[0].column("value").to_pylist()
332332
[1]
333333
"""
334-
from datafusion.catalog import Table as _Table
334+
from datafusion.catalog import Table as _Table # noqa: PLC0415
335335

336336
return _Table(self.df.into_view(temporary))
337337

@@ -451,9 +451,20 @@ def drop(self, *columns: str) -> DataFrame:
451451
Returns:
452452
DataFrame with those columns removed in the projection.
453453
454-
Example Usage::
455-
df.drop('a') # To drop a lower-cased column 'a'
456-
df.drop('"a"') # To drop an upper-cased column 'A'
454+
Examples:
455+
To drop a lower-cased column 'a'
456+
457+
>>> ctx = dfn.SessionContext()
458+
>>> df = ctx.from_pydict({"a": [1, 2], "b": [3, 4]})
459+
>>> df.drop("a").schema().names
460+
['b']
461+
462+
Or to drop an upper-cased column 'A'
463+
464+
>>> ctx = dfn.SessionContext()
465+
>>> df = ctx.from_pydict({"A": [1, 2], "b": [3, 4]})
466+
>>> df.drop('"A"').schema().names
467+
['b']
457468
"""
458469
return DataFrame(self.df.drop(*columns))
459470

@@ -468,11 +479,13 @@ def filter(self, *predicates: Expr | str) -> DataFrame:
468479
that will be parsed against the DataFrame schema. If more complex logic is
469480
required, see the logical operations in :py:mod:`~datafusion.functions`.
470481
471-
Example::
472-
473-
from datafusion import col, lit
474-
df.filter(col("a") > lit(1))
475-
df.filter("a > 1")
482+
Examples:
483+
>>> ctx = dfn.SessionContext()
484+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
485+
>>> df.filter(col("a") > lit(1)).to_pydict()
486+
{'a': [2, 3]}
487+
>>> df.filter("a > 1").to_pydict()
488+
{'a': [2, 3]}
476489
477490
Args:
478491
predicates: Predicate expression(s) or SQL strings to filter the DataFrame.
@@ -495,14 +508,12 @@ def parse_sql_expr(self, expr: str) -> Expr:
495508
496509
The expression is created and processed against the current schema.
497510
498-
Example::
499-
500-
from datafusion import col, lit
501-
df.parse_sql_expr("a > 1")
502-
503-
should produce:
504-
505-
col("a") > lit(1)
511+
Examples:
512+
>>> ctx = dfn.SessionContext()
513+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
514+
>>> expr = df.parse_sql_expr("a > 1")
515+
>>> df.filter(expr).to_pydict()
516+
{'a': [2, 3]}
506517
507518
Args:
508519
expr: Expression string to be converted to datafusion expression
@@ -519,10 +530,11 @@ def with_column(self, name: str, expr: Expr | str) -> DataFrame:
519530
:func:`datafusion.col` or :func:`datafusion.lit`, or a SQL expression
520531
string that will be parsed against the DataFrame schema.
521532
522-
Example::
523-
524-
from datafusion import col, lit
525-
df.with_column("b", col("a") + lit(1))
533+
Examples:
534+
>>> ctx = dfn.SessionContext()
535+
>>> df = ctx.from_pydict({"a": [1, 2]})
536+
>>> df.with_column("b", col("a") + lit(10)).to_pydict()
537+
{'a': [1, 2], 'b': [11, 12]}
526538
527539
Args:
528540
name: Name of the column to add.
@@ -885,10 +897,14 @@ def join_on(
885897
built with :func:`datafusion.col`. On expressions are used to support
886898
in-equality predicates. Equality predicates are correctly optimized.
887899
888-
Example::
889-
890-
from datafusion import col
891-
df.join_on(other_df, col("id") == col("other_id"))
900+
Examples:
901+
>>> ctx = dfn.SessionContext()
902+
>>> left = ctx.from_pydict({"a": [1, 2], "x": ["a", "b"]})
903+
>>> right = ctx.from_pydict({"b": [1, 2], "y": ["c", "d"]})
904+
>>> left.join_on(
905+
... right, col("a") == col("b")
906+
... ).sort(col("x")).to_pydict()
907+
{'a': [1, 2], 'x': ['a', 'b'], 'b': [1, 2], 'y': ['c', 'd']}
892908
893909
Args:
894910
right: Other DataFrame to join with.
@@ -1350,15 +1366,17 @@ def __aiter__(self) -> AsyncIterator[RecordBatch]:
13501366
def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame:
13511367
"""Apply a function to the current DataFrame which returns another DataFrame.
13521368
1353-
This is useful for chaining together multiple functions. For example::
1354-
1355-
def add_3(df: DataFrame) -> DataFrame:
1356-
return df.with_column("modified", lit(3))
1369+
This is useful for chaining together multiple functions.
13571370
1358-
def within_limit(df: DataFrame, limit: int) -> DataFrame:
1359-
return df.filter(col("a") < lit(limit)).distinct()
1360-
1361-
df = df.transform(modify_df).transform(within_limit, 4)
1371+
Examples:
1372+
>>> ctx = dfn.SessionContext()
1373+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
1374+
>>> def add_3(df):
1375+
... return df.with_column("modified", dfn.lit(3))
1376+
>>> def within_limit(df: DataFrame, limit: int) -> DataFrame:
1377+
... return df.filter(col("a") < lit(limit)).distinct()
1378+
>>> df.transform(add_3).transform(within_limit, 4).sort("a").to_pydict()
1379+
{'a': [1, 2, 3], 'modified': [3, 3, 3]}
13621380
13631381
Args:
13641382
func: A callable function that takes a DataFrame as it's first argument

python/datafusion/expr.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ def sort_list_to_raw_sort_list(
342342
return raw_sort_list
343343

344344

345-
class Expr:
345+
class Expr: # noqa: PLW1641
346346
"""Expression object.
347347
348348
Expressions are one of the core concepts in DataFusion. See
@@ -1367,16 +1367,18 @@ def is_unbounded(self) -> bool:
13671367
class CaseBuilder:
13681368
"""Builder class for constructing case statements.
13691369
1370-
An example usage would be as follows::
1371-
1372-
import datafusion.functions as f
1373-
from datafusion import lit, col
1374-
df.select(
1375-
f.case(col("column_a"))
1376-
.when(lit(1), lit("One"))
1377-
.when(lit(2), lit("Two"))
1378-
.otherwise(lit("Unknown"))
1379-
)
1370+
Examples:
1371+
>>> ctx = dfn.SessionContext()
1372+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
1373+
>>> result = df.select(
1374+
... dfn.functions.case(dfn.col("a"))
1375+
... .when(dfn.lit(1), dfn.lit("One"))
1376+
... .when(dfn.lit(2), dfn.lit("Two"))
1377+
... .otherwise(dfn.lit("Other"))
1378+
... .alias("label")
1379+
... )
1380+
>>> result.to_pydict()
1381+
{'label': ['One', 'Two', 'Other']}
13801382
"""
13811383

13821384
def __init__(self, case_builder: expr_internal.CaseBuilder) -> None:

python/datafusion/input/location.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def build_table(
4646
num_rows = 0 # Total number of rows in the file. Used for statistics
4747
columns = []
4848
if file_format == "parquet":
49-
import pyarrow.parquet as pq
49+
import pyarrow.parquet as pq # noqa: PLC0415
5050

5151
# Read the Parquet metadata
5252
metadata = pq.read_metadata(input_item)
@@ -61,7 +61,7 @@ def build_table(
6161
]
6262

6363
elif format == "csv":
64-
import csv
64+
import csv # noqa: PLC0415
6565

6666
# Consume header row and count number of rows for statistics.
6767
# TODO: Possibly makes sense to have the eager number of rows

python/datafusion/plan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
]
3333

3434

35-
class LogicalPlan:
35+
class LogicalPlan: # noqa: PLW1641
3636
"""Logical Plan.
3737
3838
A `LogicalPlan` is a node in a tree of relational operators (such as

0 commit comments

Comments
 (0)