Skip to content

Commit 024c86b

Browse files
AdMubtimsaucer
andauthored
docs: Clarify first_value usage in select vs aggregate (#1348)
* docs: Add warning to first_value about usage in select vs aggregate Clarifies that aggregate functions like first_value must be used within .aggregate() and not .select(). Closes #1300. * chore: remove temporary reproduction script * Update all aggregate functions to have an example usage that is correct --------- Co-authored-by: Tim Saucer <timsaucer@gmail.com>
1 parent 16f98ff commit 024c86b

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

python/datafusion/functions.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,7 +1779,7 @@ def array_agg(
17791779
17801780
For example::
17811781
1782-
df.select(array_agg(col("a"), order_by="b"))
1782+
df.aggregate([], array_agg(col("a"), order_by="b"))
17831783
"""
17841784
order_by_raw = sort_list_to_raw_sort_list(order_by)
17851785
filter_raw = filter.expr if filter is not None else None
@@ -1941,7 +1941,7 @@ def median(
19411941

19421942

19431943
def min(expression: Expr, filter: Expr | None = None) -> Expr:
1944-
"""Returns the minimum value of the argument.
1944+
"""Aggregate function that returns the minimum value of the argument.
19451945
19461946
If using the builder functions described in ref:`_aggregation` this function ignores
19471947
the options ``order_by``, ``null_treatment``, and ``distinct``.
@@ -2282,7 +2282,7 @@ def first_value(
22822282
22832283
For example::
22842284
2285-
df.select(first_value(col("a"), order_by="ts"))
2285+
df.aggregate([], first_value(col("a"), order_by="ts"))
22862286
"""
22872287
order_by_raw = sort_list_to_raw_sort_list(order_by)
22882288
filter_raw = filter.expr if filter is not None else None
@@ -2319,7 +2319,7 @@ def last_value(
23192319
23202320
For example::
23212321
2322-
df.select(last_value(col("a"), order_by="ts"))
2322+
df.aggregate([], last_value(col("a"), order_by="ts"))
23232323
"""
23242324
order_by_raw = sort_list_to_raw_sort_list(order_by)
23252325
filter_raw = filter.expr if filter is not None else None
@@ -2358,7 +2358,7 @@ def nth_value(
23582358
23592359
For example::
23602360
2361-
df.select(nth_value(col("a"), 2, order_by="ts"))
2361+
df.aggregate([], nth_value(col("a"), 2, order_by="ts"))
23622362
"""
23632363
order_by_raw = sort_list_to_raw_sort_list(order_by)
23642364
filter_raw = filter.expr if filter is not None else None
@@ -2843,7 +2843,7 @@ def string_agg(
28432843
28442844
For example::
28452845
2846-
df.select(string_agg(col("a"), ",", order_by="b"))
2846+
df.aggregate([], string_agg(col("a"), ",", order_by="b"))
28472847
"""
28482848
order_by_raw = sort_list_to_raw_sort_list(order_by)
28492849
filter_raw = filter.expr if filter is not None else None

0 commit comments

Comments
 (0)