@@ -2370,6 +2370,15 @@ def approx_distinct(
23702370 Args:
23712371 expression: Values to check for distinct entries
23722372 filter: If provided, only compute against rows for which the filter is True
2373+
2374+ Examples:
2375+ ---------
2376+ >>> ctx = dfn.SessionContext()
2377+ >>> df = ctx.from_pydict({"a": [1, 1, 2, 3]})
2378+ >>> result = df.aggregate(
2379+ ... [], [dfn.functions.approx_distinct(dfn.col("a")).alias("v")])
2380+ >>> result.collect_column("v")[0].as_py() == 3
2381+ True
23732382 """
23742383 filter_raw = filter .expr if filter is not None else None
23752384
@@ -2388,6 +2397,15 @@ def approx_median(expression: Expr, filter: Expr | None = None) -> Expr:
23882397 Args:
23892398 expression: Values to find the median for
23902399 filter: If provided, only compute against rows for which the filter is True
2400+
2401+ Examples:
2402+ ---------
2403+ >>> ctx = dfn.SessionContext()
2404+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2405+ >>> result = df.aggregate(
2406+ ... [], [dfn.functions.approx_median(dfn.col("a")).alias("v")])
2407+ >>> result.collect_column("v")[0].as_py()
2408+ 2.0
23912409 """
23922410 filter_raw = filter .expr if filter is not None else None
23932411 return Expr (f .approx_median (expression .expr , filter = filter_raw ))
@@ -2419,6 +2437,15 @@ def approx_percentile_cont(
24192437 percentile: This must be between 0.0 and 1.0, inclusive
24202438 num_centroids: Max bin size for the t-digest algorithm
24212439 filter: If provided, only compute against rows for which the filter is True
2440+
2441+ Examples:
2442+ ---------
2443+ >>> ctx = dfn.SessionContext()
2444+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0, 4.0, 5.0]})
2445+ >>> result = df.aggregate(
2446+ ... [], [dfn.functions.approx_percentile_cont(dfn.col("a"), 0.5).alias("v")])
2447+ >>> result.collect_column("v")[0].as_py()
2448+ 3.0
24222449 """
24232450 sort_expr_raw = sort_or_default (sort_expression )
24242451 filter_raw = filter .expr if filter is not None else None
@@ -2451,6 +2478,15 @@ def approx_percentile_cont_with_weight(
24512478 num_centroids: Max bin size for the t-digest algorithm
24522479 filter: If provided, only compute against rows for which the filter is True
24532480
2481+ Examples:
2482+ ---------
2483+ >>> ctx = dfn.SessionContext()
2484+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0], "w": [1.0, 1.0, 1.0]})
2485+ >>> result = df.aggregate(
2486+ ... [], [dfn.functions.approx_percentile_cont_with_weight(dfn.col("a"),
2487+ ... dfn.col("w"), 0.5).alias("v")])
2488+ >>> result.collect_column("v")[0].as_py()
2489+ 2.0
24542490 """
24552491 sort_expr_raw = sort_or_default (sort_expression )
24562492 filter_raw = filter .expr if filter is not None else None
@@ -2514,6 +2550,14 @@ def avg(
25142550 Args:
25152551 expression: Values to combine into an array
25162552 filter: If provided, only compute against rows for which the filter is True
2553+
2554+ Examples:
2555+ ---------
2556+ >>> ctx = dfn.SessionContext()
2557+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2558+ >>> result = df.aggregate([], [dfn.functions.avg(dfn.col("a")).alias("v")])
2559+ >>> result.collect_column("v")[0].as_py()
2560+ 2.0
25172561 """
25182562 filter_raw = filter .expr if filter is not None else None
25192563 return Expr (f .avg (expression .expr , filter = filter_raw ))
@@ -2552,6 +2596,14 @@ def count(
25522596 expressions: Argument to perform bitwise calculation on
25532597 distinct: If True, a single entry for each distinct value will be in the result
25542598 filter: If provided, only compute against rows for which the filter is True
2599+
2600+ Examples:
2601+ ---------
2602+ >>> ctx = dfn.SessionContext()
2603+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2604+ >>> result = df.aggregate([], [dfn.functions.count(dfn.col("a")).alias("v")])
2605+ >>> result.collect_column("v")[0].as_py()
2606+ 3
25552607 """
25562608 filter_raw = filter .expr if filter is not None else None
25572609
@@ -2616,6 +2668,14 @@ def max(expression: Expr, filter: Expr | None = None) -> Expr:
26162668 Args:
26172669 expression: The value to find the maximum of
26182670 filter: If provided, only compute against rows for which the filter is True
2671+
2672+ Examples:
2673+ ---------
2674+ >>> ctx = dfn.SessionContext()
2675+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2676+ >>> result = df.aggregate([], [dfn.functions.max(dfn.col("a")).alias("v")])
2677+ >>> result.collect_column("v")[0].as_py()
2678+ 3
26192679 """
26202680 filter_raw = filter .expr if filter is not None else None
26212681 return Expr (f .max (expression .expr , filter = filter_raw ))
@@ -2625,6 +2685,14 @@ def mean(expression: Expr, filter: Expr | None = None) -> Expr:
26252685 """Returns the average (mean) value of the argument.
26262686
26272687 This is an alias for :py:func:`avg`.
2688+
2689+ Examples:
2690+ ---------
2691+ >>> ctx = dfn.SessionContext()
2692+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2693+ >>> result = df.aggregate([], [dfn.functions.mean(dfn.col("a")).alias("v")])
2694+ >>> result.collect_column("v")[0].as_py()
2695+ 2.0
26282696 """
26292697 return avg (expression , filter )
26302698
@@ -2644,6 +2712,14 @@ def median(
26442712 expression: The value to compute the median of
26452713 distinct: If True, a single entry for each distinct value will be in the result
26462714 filter: If provided, only compute against rows for which the filter is True
2715+
2716+ Examples:
2717+ ---------
2718+ >>> ctx = dfn.SessionContext()
2719+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2720+ >>> result = df.aggregate([], [dfn.functions.median(dfn.col("a")).alias("v")])
2721+ >>> result.collect_column("v")[0].as_py()
2722+ 2.0
26472723 """
26482724 filter_raw = filter .expr if filter is not None else None
26492725 return Expr (f .median (expression .expr , distinct = distinct , filter = filter_raw ))
@@ -2658,6 +2734,14 @@ def min(expression: Expr, filter: Expr | None = None) -> Expr:
26582734 Args:
26592735 expression: The value to find the minimum of
26602736 filter: If provided, only compute against rows for which the filter is True
2737+
2738+ Examples:
2739+ ---------
2740+ >>> ctx = dfn.SessionContext()
2741+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2742+ >>> result = df.aggregate([], [dfn.functions.min(dfn.col("a")).alias("v")])
2743+ >>> result.collect_column("v")[0].as_py()
2744+ 1
26612745 """
26622746 filter_raw = filter .expr if filter is not None else None
26632747 return Expr (f .min (expression .expr , filter = filter_raw ))
@@ -2677,6 +2761,14 @@ def sum(
26772761 Args:
26782762 expression: Values to combine into an array
26792763 filter: If provided, only compute against rows for which the filter is True
2764+
2765+ Examples:
2766+ ---------
2767+ >>> ctx = dfn.SessionContext()
2768+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2769+ >>> result = df.aggregate([], [dfn.functions.sum(dfn.col("a")).alias("v")])
2770+ >>> result.collect_column("v")[0].as_py()
2771+ 6
26802772 """
26812773 filter_raw = filter .expr if filter is not None else None
26822774 return Expr (f .sum (expression .expr , filter = filter_raw ))
@@ -3094,6 +3186,14 @@ def bit_and(expression: Expr, filter: Expr | None = None) -> Expr:
30943186 Args:
30953187 expression: Argument to perform bitwise calculation on
30963188 filter: If provided, only compute against rows for which the filter is True
3189+
3190+ Examples:
3191+ ---------
3192+ >>> ctx = dfn.SessionContext()
3193+ >>> df = ctx.from_pydict({"a": [7, 3]})
3194+ >>> result = df.aggregate([], [dfn.functions.bit_and(dfn.col("a")).alias("v")])
3195+ >>> result.collect_column("v")[0].as_py()
3196+ 3
30973197 """
30983198 filter_raw = filter .expr if filter is not None else None
30993199 return Expr (f .bit_and (expression .expr , filter = filter_raw ))
@@ -3110,6 +3210,14 @@ def bit_or(expression: Expr, filter: Expr | None = None) -> Expr:
31103210 Args:
31113211 expression: Argument to perform bitwise calculation on
31123212 filter: If provided, only compute against rows for which the filter is True
3213+
3214+ Examples:
3215+ ---------
3216+ >>> ctx = dfn.SessionContext()
3217+ >>> df = ctx.from_pydict({"a": [1, 2]})
3218+ >>> result = df.aggregate([], [dfn.functions.bit_or(dfn.col("a")).alias("v")])
3219+ >>> result.collect_column("v")[0].as_py()
3220+ 3
31133221 """
31143222 filter_raw = filter .expr if filter is not None else None
31153223 return Expr (f .bit_or (expression .expr , filter = filter_raw ))
@@ -3129,6 +3237,14 @@ def bit_xor(
31293237 expression: Argument to perform bitwise calculation on
31303238 distinct: If True, evaluate each unique value of expression only once
31313239 filter: If provided, only compute against rows for which the filter is True
3240+
3241+ Examples:
3242+ ---------
3243+ >>> ctx = dfn.SessionContext()
3244+ >>> df = ctx.from_pydict({"a": [5, 3]})
3245+ >>> result = df.aggregate([], [dfn.functions.bit_xor(dfn.col("a")).alias("v")])
3246+ >>> result.collect_column("v")[0].as_py()
3247+ 6
31323248 """
31333249 filter_raw = filter .expr if filter is not None else None
31343250 return Expr (f .bit_xor (expression .expr , distinct = distinct , filter = filter_raw ))
@@ -3146,6 +3262,14 @@ def bool_and(expression: Expr, filter: Expr | None = None) -> Expr:
31463262 Args:
31473263 expression: Argument to perform calculation on
31483264 filter: If provided, only compute against rows for which the filter is True
3265+
3266+ Examples:
3267+ ---------
3268+ >>> ctx = dfn.SessionContext()
3269+ >>> df = ctx.from_pydict({"a": [True, True, False]})
3270+ >>> result = df.aggregate([], [dfn.functions.bool_and(dfn.col("a")).alias("v")])
3271+ >>> result.collect_column("v")[0].as_py()
3272+ False
31493273 """
31503274 filter_raw = filter .expr if filter is not None else None
31513275 return Expr (f .bool_and (expression .expr , filter = filter_raw ))
@@ -3163,6 +3287,14 @@ def bool_or(expression: Expr, filter: Expr | None = None) -> Expr:
31633287 Args:
31643288 expression: Argument to perform calculation on
31653289 filter: If provided, only compute against rows for which the filter is True
3290+
3291+ Examples:
3292+ ---------
3293+ >>> ctx = dfn.SessionContext()
3294+ >>> df = ctx.from_pydict({"a": [False, False, True]})
3295+ >>> result = df.aggregate([], [dfn.functions.bool_or(dfn.col("a")).alias("v")])
3296+ >>> result.collect_column("v")[0].as_py()
3297+ True
31663298 """
31673299 filter_raw = filter .expr if filter is not None else None
31683300 return Expr (f .bool_or (expression .expr , filter = filter_raw ))
@@ -3553,6 +3685,15 @@ def string_agg(
35533685 For example::
35543686
35553687 df.aggregate([], string_agg(col("a"), ",", order_by="b"))
3688+
3689+ Examples:
3690+ ---------
3691+ >>> ctx = dfn.SessionContext()
3692+ >>> df = ctx.from_pydict({"a": ["x", "y", "z"]})
3693+ >>> result = df.aggregate(
3694+ ... [], [dfn.functions.string_agg(dfn.col("a"), ",", order_by="a").alias("s")])
3695+ >>> result.collect_column("s")[0].as_py()
3696+ 'x,y,z'
35563697 """
35573698 order_by_raw = sort_list_to_raw_sort_list (order_by )
35583699 filter_raw = filter .expr if filter is not None else None
0 commit comments