Skip to content

Commit b038c8c

Browse files
Add bigframes.bigquery.rand() function
This change adds `bigframes.bigquery.rand()` which wraps the BigQuery `RAND()` function. It accepts a Series or DataFrame as input to determine the shape and index of the output Series. It includes a warning about non-determinism in the docstring. Tests are added in `tests/unit/bigquery/test_mathematical.py` and `tests/system/small/bigquery/test_mathematical.py`.
1 parent 95763ff commit b038c8c

File tree

4 files changed

+164
-0
lines changed

4 files changed

+164
-0
lines changed

bigframes/bigquery/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
to_json,
5858
to_json_string,
5959
)
60+
from bigframes.bigquery._operations.mathematical import rand
6061
from bigframes.bigquery._operations.search import create_vector_index, vector_search
6162
from bigframes.bigquery._operations.sql import sql_scalar
6263
from bigframes.bigquery._operations.struct import struct
@@ -97,6 +98,8 @@
9798
parse_json,
9899
to_json,
99100
to_json_string,
101+
# mathematical ops
102+
rand,
100103
# search ops
101104
create_vector_index,
102105
vector_search,
@@ -148,6 +151,8 @@
148151
"parse_json",
149152
"to_json",
150153
"to_json_string",
154+
# mathematical ops
155+
"rand",
151156
# search ops
152157
"create_vector_index",
153158
"vector_search",
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import Union
18+
19+
from bigframes import dataframe
20+
from bigframes import dtypes
21+
from bigframes import operations as ops
22+
from bigframes import series
23+
24+
25+
def rand(input_data: Union[series.Series, dataframe.DataFrame]) -> series.Series:
26+
"""
27+
Generates a pseudo-random value of type FLOAT64 in the range of [0, 1),
28+
inclusive of 0 and exclusive of 1.
29+
30+
.. warning::
31+
This method introduces non-determinism to the expression. Reading the
32+
same column twice may result in different results.
33+
34+
**Examples:**
35+
36+
>>> import bigframes.pandas as bpd
37+
>>> import bigframes.bigquery as bbq
38+
>>> df = bpd.DataFrame({"a": [1, 2, 3]})
39+
>>> df['random'] = bbq.rand(df)
40+
>>> # Resulting column 'random' will contain random floats between 0 and 1.
41+
42+
Args:
43+
input_data (bigframes.pandas.Series or bigframes.pandas.DataFrame):
44+
A Series or DataFrame to determine the number of rows and the index
45+
of the result. The actual values in this input are ignored.
46+
47+
Returns:
48+
bigframes.pandas.Series: A new Series of random float values.
49+
"""
50+
if isinstance(input_data, dataframe.DataFrame):
51+
if len(input_data.columns) == 0:
52+
raise ValueError("Input DataFrame must have at least one column.")
53+
# Use the first column as anchor
54+
anchor = input_data.iloc[:, 0]
55+
elif isinstance(input_data, series.Series):
56+
anchor = input_data
57+
else:
58+
raise TypeError(
59+
f"Unsupported type {type(input_data)}. "
60+
"Expected bigframes.pandas.Series or bigframes.pandas.DataFrame."
61+
)
62+
63+
op = ops.SqlScalarOp(
64+
_output_type=dtypes.FLOAT_DTYPE,
65+
sql_template="RAND()",
66+
)
67+
return anchor._apply_nary_op(op, [])
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes.bigquery as bbq
16+
17+
18+
def test_rand(scalars_df_index):
19+
df = scalars_df_index
20+
21+
# Apply rand
22+
result = bbq.rand(df)
23+
24+
# Eagerly evaluate
25+
result_pd = result.to_pandas()
26+
27+
# Check length
28+
assert len(result_pd) == len(df)
29+
30+
# Check values in [0, 1)
31+
assert (result_pd >= 0).all()
32+
assert (result_pd < 1).all()
33+
34+
# Check not all values are equal (unlikely collision for random)
35+
if len(result_pd) > 1:
36+
assert result_pd.nunique() > 1
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest.mock as mock
16+
17+
import bigframes.bigquery as bbq
18+
import bigframes.dataframe as dataframe
19+
import bigframes.dtypes as dtypes
20+
import bigframes.operations as ops
21+
import bigframes.series as series
22+
23+
24+
def test_rand_calls_apply_nary_op():
25+
mock_series = mock.create_autospec(series.Series, instance=True)
26+
27+
bbq.rand(mock_series)
28+
29+
mock_series._apply_nary_op.assert_called_once()
30+
args, _ = mock_series._apply_nary_op.call_args
31+
op = args[0]
32+
assert isinstance(op, ops.SqlScalarOp)
33+
assert op.sql_template == "RAND()"
34+
assert op._output_type == dtypes.FLOAT_DTYPE
35+
assert args[1] == []
36+
37+
38+
def test_rand_with_dataframe():
39+
mock_df = mock.create_autospec(dataframe.DataFrame, instance=True)
40+
# mock columns length > 0
41+
mock_df.columns = ["col1"]
42+
# mock iloc to return a series
43+
mock_series = mock.create_autospec(series.Series, instance=True)
44+
# Configure mock_df.iloc to return mock_series when indexed
45+
# iloc is indexable, so we mock __getitem__
46+
mock_indexer = mock.MagicMock()
47+
mock_indexer.__getitem__.return_value = mock_series
48+
type(mock_df).iloc = mock.PropertyMock(return_value=mock_indexer)
49+
50+
bbq.rand(mock_df)
51+
52+
mock_series._apply_nary_op.assert_called_once()
53+
args, _ = mock_series._apply_nary_op.call_args
54+
op = args[0]
55+
assert isinstance(op, ops.SqlScalarOp)
56+
assert op.sql_template == "RAND()"

0 commit comments

Comments
 (0)