apache · cboumalh · Feb 9, 2026 · Feb 9, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -641,6 +641,27 @@ Misc Functions
     current_database
     current_schema
     current_user
+    input_file_block_length
+    input_file_block_start
+    input_file_name
+    java_method
+    monotonically_increasing_id
+    raise_error
+    reflect
+    session_user
+    spark_partition_id
+    try_aes_decrypt
+    try_reflect
+    typeof
+    user
+    uuid
+    version
+
+Datasketch Functions
+--------------------
+.. autosummary::
+    :toctree: api/
+
     hll_sketch_estimate
     hll_union
     kll_sketch_get_n_bigint
@@ -658,15 +679,6 @@ Misc Functions
     kll_sketch_to_string_bigint
     kll_sketch_to_string_double
     kll_sketch_to_string_float
-    input_file_block_length
-    input_file_block_start
-    input_file_name
-    java_method
-    monotonically_increasing_id
-    raise_error
-    reflect
-    session_user
-    spark_partition_id
     theta_difference
     theta_intersection
     theta_sketch_estimate
@@ -683,12 +695,6 @@ Misc Functions
     tuple_sketch_theta_integer
     tuple_union_double
     tuple_union_integer
-    try_aes_decrypt
-    try_reflect
-    typeof
-    user
-    uuid
-    version
 
 
 Geospatial ST Functions

diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py
@@ -4489,6 +4489,8 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
 
 sha2.__doc__ = pysparkfuncs.sha2.__doc__
 
+# Datasketch Functions
+
 
 def hll_sketch_agg(
     col: "ColumnOrName",

diff --git a/python/pyspark/sql/functions/__init__.py b/python/pyspark/sql/functions/__init__.py
@@ -507,12 +507,36 @@
     "current_database",
     "current_schema",
     "current_user",
-    "hll_sketch_estimate",
-    "hll_union",
     "input_file_block_length",
     "input_file_block_start",
     "input_file_name",
     "java_method",
+    "monotonically_increasing_id",
+    "raise_error",
+    "reflect",
+    "session_user",
+    "spark_partition_id",
+    "try_aes_decrypt",
+    "try_reflect",
+    "typeof",
+    "user",
+    # "uuid": Excluded because of the name conflict with builtin uuid module
+    "version",
+    # UDF, UDTF and UDT
+    "AnalyzeArgument",
+    "AnalyzeResult",
+    "ArrowUDFType",
+    "OrderingColumn",
+    "PandasUDFType",
+    "PartitioningColumn",
+    "SelectedColumn",
+    "SkipRestOfInputTableException",
+    "UserDefinedFunction",
+    "UserDefinedTableFunction",
+    "arrow_udf",
+    # Datasketch Functions
+    "hll_sketch_estimate",
+    "hll_union",
     "kll_sketch_get_n_bigint",
     "kll_sketch_get_n_double",
     "kll_sketch_get_n_float",
@@ -528,11 +552,6 @@
     "kll_sketch_to_string_bigint",
     "kll_sketch_to_string_double",
     "kll_sketch_to_string_float",
-    "monotonically_increasing_id",
-    "raise_error",
-    "reflect",
-    "session_user",
-    "spark_partition_id",
     "theta_difference",
     "theta_intersection",
     "theta_sketch_estimate",
@@ -549,24 +568,6 @@
     "tuple_sketch_theta_integer",
     "tuple_union_double",
     "tuple_union_integer",
-    "try_aes_decrypt",
-    "try_reflect",
-    "typeof",
-    "user",
-    # "uuid": Excluded because of the name conflict with builtin uuid module
-    "version",
-    # UDF, UDTF and UDT
-    "AnalyzeArgument",
-    "AnalyzeResult",
-    "ArrowUDFType",
-    "OrderingColumn",
-    "PandasUDFType",
-    "PartitioningColumn",
-    "SelectedColumn",
-    "SkipRestOfInputTableException",
-    "UserDefinedFunction",
-    "UserDefinedTableFunction",
-    "arrow_udf",
     # Geospatial ST Functions
     "st_asbinary",
     "st_geogfromwkb",

diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
@@ -26507,6 +26507,9 @@ def unwrap_udt(col: "ColumnOrName") -> Column:
     return _invoke_function("unwrap_udt", _to_java_column(col))
 
 
+# ---------------------- Datasketch functions ------------------------------
+
+
 @_try_remote_functions
 def hll_sketch_agg(
     col: "ColumnOrName",