Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
410 changes: 403 additions & 7 deletions docs/sql-ref-sketch-aggregates.md

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions python/docs/source/reference/pyspark.sql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,12 @@ Aggregate Functions
theta_intersection_agg
theta_sketch_agg
theta_union_agg
tuple_intersection_agg_double
tuple_intersection_agg_integer
tuple_sketch_agg_double
tuple_sketch_agg_integer
tuple_union_agg_double
tuple_union_agg_integer
try_avg
try_sum
var_pop
Expand Down Expand Up @@ -665,6 +671,18 @@ Misc Functions
theta_intersection
theta_sketch_estimate
theta_union
tuple_difference_double
tuple_difference_integer
tuple_intersection_double
tuple_intersection_integer
tuple_sketch_estimate_double
tuple_sketch_estimate_integer
tuple_sketch_summary_double
tuple_sketch_summary_integer
tuple_sketch_theta_double
tuple_sketch_theta_integer
tuple_union_double
tuple_union_integer
try_aes_decrypt
try_reflect
typeof
Expand Down
224 changes: 224 additions & 0 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4581,6 +4581,96 @@ def theta_intersection_agg(
theta_intersection_agg.__doc__ = pysparkfuncs.theta_intersection_agg.__doc__


def tuple_sketch_agg_double(
key: "ColumnOrName",
summary: "ColumnOrName",
lgNomEntries: Optional[Union[int, Column]] = None,
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_sketch_agg_double"
_lgNomEntries = lit(12) if lgNomEntries is None else lit(lgNomEntries)
_mode = lit("sum") if mode is None else lit(mode)

return _invoke_function_over_columns(fn, key, summary, _lgNomEntries, _mode)


tuple_sketch_agg_double.__doc__ = pysparkfuncs.tuple_sketch_agg_double.__doc__


def tuple_sketch_agg_integer(
key: "ColumnOrName",
summary: "ColumnOrName",
lgNomEntries: Optional[Union[int, Column]] = None,
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_sketch_agg_integer"
_lgNomEntries = lit(12) if lgNomEntries is None else lit(lgNomEntries)
_mode = lit("sum") if mode is None else lit(mode)

return _invoke_function_over_columns(fn, key, summary, _lgNomEntries, _mode)


tuple_sketch_agg_integer.__doc__ = pysparkfuncs.tuple_sketch_agg_integer.__doc__


def tuple_union_agg_double(
col: "ColumnOrName",
lgNomEntries: Optional[Union[int, Column]] = None,
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_union_agg_double"
_lgNomEntries = lit(12) if lgNomEntries is None else lit(lgNomEntries)
_mode = lit("sum") if mode is None else lit(mode)

return _invoke_function_over_columns(fn, col, _lgNomEntries, _mode)


tuple_union_agg_double.__doc__ = pysparkfuncs.tuple_union_agg_double.__doc__


def tuple_union_agg_integer(
col: "ColumnOrName",
lgNomEntries: Optional[Union[int, Column]] = None,
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_union_agg_integer"
_lgNomEntries = lit(12) if lgNomEntries is None else lit(lgNomEntries)
_mode = lit("sum") if mode is None else lit(mode)

return _invoke_function_over_columns(fn, col, _lgNomEntries, _mode)


tuple_union_agg_integer.__doc__ = pysparkfuncs.tuple_union_agg_integer.__doc__


def tuple_intersection_agg_double(
col: "ColumnOrName",
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_intersection_agg_double"
if mode is None:
return _invoke_function_over_columns(fn, col)
else:
return _invoke_function_over_columns(fn, col, lit(mode))


tuple_intersection_agg_double.__doc__ = pysparkfuncs.tuple_intersection_agg_double.__doc__


def tuple_intersection_agg_integer(
col: "ColumnOrName",
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_intersection_agg_integer"
if mode is None:
return _invoke_function_over_columns(fn, col)
else:
return _invoke_function_over_columns(fn, col, lit(mode))


tuple_intersection_agg_integer.__doc__ = pysparkfuncs.tuple_intersection_agg_integer.__doc__


def kll_sketch_agg_bigint(
col: "ColumnOrName",
k: Optional[Union[int, Column]] = None,
Expand Down Expand Up @@ -4822,6 +4912,140 @@ def theta_difference(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
theta_difference.__doc__ = pysparkfuncs.theta_difference.__doc__


def tuple_sketch_estimate_double(col: "ColumnOrName") -> Column:
fn = "tuple_sketch_estimate_double"
return _invoke_function_over_columns(fn, col)


tuple_sketch_estimate_double.__doc__ = pysparkfuncs.tuple_sketch_estimate_double.__doc__


def tuple_sketch_estimate_integer(col: "ColumnOrName") -> Column:
fn = "tuple_sketch_estimate_integer"
return _invoke_function_over_columns(fn, col)


tuple_sketch_estimate_integer.__doc__ = pysparkfuncs.tuple_sketch_estimate_integer.__doc__


def tuple_sketch_summary_double(
col: "ColumnOrName",
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_sketch_summary_double"
if mode is None:
return _invoke_function_over_columns(fn, col)
else:
return _invoke_function_over_columns(fn, col, lit(mode))


tuple_sketch_summary_double.__doc__ = pysparkfuncs.tuple_sketch_summary_double.__doc__


def tuple_sketch_summary_integer(
col: "ColumnOrName",
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_sketch_summary_integer"
if mode is None:
return _invoke_function_over_columns(fn, col)
else:
return _invoke_function_over_columns(fn, col, lit(mode))


tuple_sketch_summary_integer.__doc__ = pysparkfuncs.tuple_sketch_summary_integer.__doc__


def tuple_sketch_theta_double(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("tuple_sketch_theta_double", col)


tuple_sketch_theta_double.__doc__ = pysparkfuncs.tuple_sketch_theta_double.__doc__


def tuple_sketch_theta_integer(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("tuple_sketch_theta_integer", col)


tuple_sketch_theta_integer.__doc__ = pysparkfuncs.tuple_sketch_theta_integer.__doc__


def tuple_union_double(
col1: "ColumnOrName",
col2: "ColumnOrName",
lgNomEntries: Optional[Union[int, Column]] = None,
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_union_double"
_lgNomEntries = lit(12) if lgNomEntries is None else lit(lgNomEntries)
_mode = lit("sum") if mode is None else lit(mode)

return _invoke_function_over_columns(fn, col1, col2, _lgNomEntries, _mode)


tuple_union_double.__doc__ = pysparkfuncs.tuple_union_double.__doc__


def tuple_union_integer(
col1: "ColumnOrName",
col2: "ColumnOrName",
lgNomEntries: Optional[Union[int, Column]] = None,
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_union_integer"
_lgNomEntries = lit(12) if lgNomEntries is None else lit(lgNomEntries)
_mode = lit("sum") if mode is None else lit(mode)

return _invoke_function_over_columns(fn, col1, col2, _lgNomEntries, _mode)


tuple_union_integer.__doc__ = pysparkfuncs.tuple_union_integer.__doc__


def tuple_intersection_double(
col1: "ColumnOrName",
col2: "ColumnOrName",
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_intersection_double"
if mode is None:
return _invoke_function_over_columns(fn, col1, col2)
else:
return _invoke_function_over_columns(fn, col1, col2, lit(mode))


tuple_intersection_double.__doc__ = pysparkfuncs.tuple_intersection_double.__doc__


def tuple_intersection_integer(
col1: "ColumnOrName",
col2: "ColumnOrName",
mode: Optional[Union[str, Column]] = None,
) -> Column:
fn = "tuple_intersection_integer"
if mode is None:
return _invoke_function_over_columns(fn, col1, col2)
else:
return _invoke_function_over_columns(fn, col1, col2, lit(mode))


tuple_intersection_integer.__doc__ = pysparkfuncs.tuple_intersection_integer.__doc__


def tuple_difference_double(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
return _invoke_function_over_columns("tuple_difference_double", col1, col2)


tuple_difference_double.__doc__ = pysparkfuncs.tuple_difference_double.__doc__


def tuple_difference_integer(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
return _invoke_function_over_columns("tuple_difference_integer", col1, col2)


tuple_difference_integer.__doc__ = pysparkfuncs.tuple_difference_integer.__doc__


# Predicates Function


Expand Down
18 changes: 18 additions & 0 deletions python/pyspark/sql/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@
"theta_intersection_agg",
"theta_sketch_agg",
"theta_union_agg",
"tuple_intersection_agg_double",
"tuple_intersection_agg_integer",
"tuple_sketch_agg_double",
"tuple_sketch_agg_integer",
"tuple_union_agg_double",
"tuple_union_agg_integer",
"try_avg",
"try_sum",
"var_pop",
Expand Down Expand Up @@ -531,6 +537,18 @@
"theta_intersection",
"theta_sketch_estimate",
"theta_union",
"tuple_difference_double",
"tuple_difference_integer",
"tuple_intersection_double",
"tuple_intersection_integer",
"tuple_sketch_estimate_double",
"tuple_sketch_estimate_integer",
"tuple_sketch_summary_double",
"tuple_sketch_summary_integer",
"tuple_sketch_theta_double",
"tuple_sketch_theta_integer",
"tuple_union_double",
"tuple_union_integer",
"try_aes_decrypt",
"try_reflect",
"typeof",
Expand Down
Loading