Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit c9016a3

Browse files
authored
Merge branch 'main' into sycai_ai_generate_output_schema
2 parents 780ff9f + fa4e46f commit c9016a3

14 files changed

Lines changed: 172 additions & 134 deletions

File tree

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.24.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.23.0...v2.24.0) (2025-10-07)
8+
9+
10+
### Features
11+
12+
* Add ai.classify() to bigframes.bigquery package ([#2137](https://github.com/googleapis/python-bigquery-dataframes/issues/2137)) ([56e5033](https://github.com/googleapis/python-bigquery-dataframes/commit/56e50331d198b7f517f85695c208f893ab9389d2))
13+
* Add ai.generate() to bigframes.bigquery module ([#2128](https://github.com/googleapis/python-bigquery-dataframes/issues/2128)) ([3810452](https://github.com/googleapis/python-bigquery-dataframes/commit/3810452f16d8d6c9d3eb9075f1537177d98b4725))
14+
* Add ai.if_() and ai.score() to bigframes.bigquery package ([#2132](https://github.com/googleapis/python-bigquery-dataframes/issues/2132)) ([32502f4](https://github.com/googleapis/python-bigquery-dataframes/commit/32502f4195306d262788f39d1ab4206fc84ae50e))
15+
16+
17+
### Bug Fixes
18+
19+
* Fix internal type errors with temporal accessors ([#2125](https://github.com/googleapis/python-bigquery-dataframes/issues/2125)) ([c390da1](https://github.com/googleapis/python-bigquery-dataframes/commit/c390da11b7c2aa710bc2fbc692efb9f06059e4c4))
20+
* Fix row count local execution bug ([#2133](https://github.com/googleapis/python-bigquery-dataframes/issues/2133)) ([ece0762](https://github.com/googleapis/python-bigquery-dataframes/commit/ece07623e354a1dde2bd37020349e13f682e863f))
21+
* Join on, how args are now positional ([#2140](https://github.com/googleapis/python-bigquery-dataframes/issues/2140)) ([b711815](https://github.com/googleapis/python-bigquery-dataframes/commit/b7118152bfecc6ecf67aa4df23ec3f0a2b08aa30))
22+
* Only show JSON dtype warning when accessing dtypes directly ([#2136](https://github.com/googleapis/python-bigquery-dataframes/issues/2136)) ([eca22ee](https://github.com/googleapis/python-bigquery-dataframes/commit/eca22ee3104104cea96189391e527cad09bd7509))
23+
* Remove noisy AmbiguousWindowWarning from partial ordering mode ([#2129](https://github.com/googleapis/python-bigquery-dataframes/issues/2129)) ([4607f86](https://github.com/googleapis/python-bigquery-dataframes/commit/4607f86ebd77b916aafc37f69725b676e203b332))
24+
25+
26+
### Performance Improvements
27+
28+
* Scale read stream workers to cpu count ([#2135](https://github.com/googleapis/python-bigquery-dataframes/issues/2135)) ([67e46cd](https://github.com/googleapis/python-bigquery-dataframes/commit/67e46cd47933b84b55808003ed344b559e47c498))
29+
730
## [2.23.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.22.0...v2.23.0) (2025-09-29)
831

932

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818

1919
from bigframes import dtypes
2020
from bigframes import operations as ops
21+
from bigframes.core.compile.sqlglot import sqlglot_types
2122
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
2223
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
23-
from bigframes.core.compile.sqlglot.sqlglot_types import SQLGlotType
2424

2525
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
2626

@@ -29,7 +29,7 @@
2929
def _(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
3030
from_type = expr.dtype
3131
to_type = op.to_type
32-
sg_to_type = SQLGlotType.from_bigframes_dtype(to_type)
32+
sg_to_type = sqlglot_types.from_bigframes_dtype(to_type)
3333
sg_expr = expr.expr
3434

3535
if to_type == dtypes.JSON_DTYPE:

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def from_pyarrow(
7979
expressions=[
8080
sge.ColumnDef(
8181
this=sge.to_identifier(field.column, quoted=True),
82-
kind=sgt.SQLGlotType.from_bigframes_dtype(field.dtype),
82+
kind=sgt.from_bigframes_dtype(field.dtype),
8383
)
8484
for field in schema.items
8585
],
@@ -620,7 +620,7 @@ def _select_to_cte(expr: sge.Select, cte_name: sge.Identifier) -> sge.Select:
620620

621621

622622
def _literal(value: typing.Any, dtype: dtypes.Dtype) -> sge.Expression:
623-
sqlglot_type = sgt.SQLGlotType.from_bigframes_dtype(dtype)
623+
sqlglot_type = sgt.from_bigframes_dtype(dtype)
624624
if value is None:
625625
return _cast(sge.Null(), sqlglot_type)
626626
elif dtype == dtypes.BYTES_DTYPE:

bigframes/core/compile/sqlglot/sqlglot_types.py

Lines changed: 52 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -25,62 +25,57 @@
2525
import bigframes.dtypes
2626

2727

28-
class SQLGlotType:
29-
@classmethod
30-
def from_bigframes_dtype(
31-
cls,
32-
bigframes_dtype: typing.Union[
33-
bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype, np.dtype[typing.Any]
34-
],
35-
) -> str:
36-
if bigframes_dtype == bigframes.dtypes.INT_DTYPE:
37-
return "INT64"
38-
elif bigframes_dtype == bigframes.dtypes.FLOAT_DTYPE:
39-
return "FLOAT64"
40-
elif bigframes_dtype == bigframes.dtypes.STRING_DTYPE:
41-
return "STRING"
42-
elif bigframes_dtype == bigframes.dtypes.BOOL_DTYPE:
43-
return "BOOLEAN"
44-
elif bigframes_dtype == bigframes.dtypes.DATE_DTYPE:
45-
return "DATE"
46-
elif bigframes_dtype == bigframes.dtypes.TIME_DTYPE:
47-
return "TIME"
48-
elif bigframes_dtype == bigframes.dtypes.DATETIME_DTYPE:
49-
return "DATETIME"
50-
elif bigframes_dtype == bigframes.dtypes.TIMESTAMP_DTYPE:
51-
return "TIMESTAMP"
52-
elif bigframes_dtype == bigframes.dtypes.BYTES_DTYPE:
53-
return "BYTES"
54-
elif bigframes_dtype == bigframes.dtypes.NUMERIC_DTYPE:
55-
return "NUMERIC"
56-
elif bigframes_dtype == bigframes.dtypes.BIGNUMERIC_DTYPE:
57-
return "BIGNUMERIC"
58-
elif bigframes_dtype == bigframes.dtypes.JSON_DTYPE:
59-
return "JSON"
60-
elif bigframes_dtype == bigframes.dtypes.GEO_DTYPE:
61-
return "GEOGRAPHY"
62-
elif bigframes_dtype == bigframes.dtypes.TIMEDELTA_DTYPE:
63-
return "INT64"
64-
elif isinstance(bigframes_dtype, pd.ArrowDtype):
65-
if pa.types.is_list(bigframes_dtype.pyarrow_dtype):
66-
inner_bigframes_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
67-
bigframes_dtype.pyarrow_dtype.value_type
28+
def from_bigframes_dtype(
29+
bigframes_dtype: typing.Union[
30+
bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype, np.dtype[typing.Any]
31+
],
32+
) -> str:
33+
if bigframes_dtype == bigframes.dtypes.INT_DTYPE:
34+
return "INT64"
35+
elif bigframes_dtype == bigframes.dtypes.FLOAT_DTYPE:
36+
return "FLOAT64"
37+
elif bigframes_dtype == bigframes.dtypes.STRING_DTYPE:
38+
return "STRING"
39+
elif bigframes_dtype == bigframes.dtypes.BOOL_DTYPE:
40+
return "BOOLEAN"
41+
elif bigframes_dtype == bigframes.dtypes.DATE_DTYPE:
42+
return "DATE"
43+
elif bigframes_dtype == bigframes.dtypes.TIME_DTYPE:
44+
return "TIME"
45+
elif bigframes_dtype == bigframes.dtypes.DATETIME_DTYPE:
46+
return "DATETIME"
47+
elif bigframes_dtype == bigframes.dtypes.TIMESTAMP_DTYPE:
48+
return "TIMESTAMP"
49+
elif bigframes_dtype == bigframes.dtypes.BYTES_DTYPE:
50+
return "BYTES"
51+
elif bigframes_dtype == bigframes.dtypes.NUMERIC_DTYPE:
52+
return "NUMERIC"
53+
elif bigframes_dtype == bigframes.dtypes.BIGNUMERIC_DTYPE:
54+
return "BIGNUMERIC"
55+
elif bigframes_dtype == bigframes.dtypes.JSON_DTYPE:
56+
return "JSON"
57+
elif bigframes_dtype == bigframes.dtypes.GEO_DTYPE:
58+
return "GEOGRAPHY"
59+
elif bigframes_dtype == bigframes.dtypes.TIMEDELTA_DTYPE:
60+
return "INT64"
61+
elif isinstance(bigframes_dtype, pd.ArrowDtype):
62+
if pa.types.is_list(bigframes_dtype.pyarrow_dtype):
63+
inner_bigframes_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
64+
bigframes_dtype.pyarrow_dtype.value_type
65+
)
66+
return f"ARRAY<{from_bigframes_dtype(inner_bigframes_dtype)}>"
67+
elif pa.types.is_struct(bigframes_dtype.pyarrow_dtype):
68+
struct_type = typing.cast(pa.StructType, bigframes_dtype.pyarrow_dtype)
69+
inner_fields: list[str] = []
70+
for i in range(struct_type.num_fields):
71+
field = struct_type.field(i)
72+
key = sg.to_identifier(field.name).sql("bigquery")
73+
dtype = from_bigframes_dtype(
74+
bigframes.dtypes.arrow_dtype_to_bigframes_dtype(field.type)
6875
)
69-
return (
70-
f"ARRAY<{SQLGlotType.from_bigframes_dtype(inner_bigframes_dtype)}>"
71-
)
72-
elif pa.types.is_struct(bigframes_dtype.pyarrow_dtype):
73-
struct_type = typing.cast(pa.StructType, bigframes_dtype.pyarrow_dtype)
74-
inner_fields: list[str] = []
75-
for i in range(struct_type.num_fields):
76-
field = struct_type.field(i)
77-
key = sg.to_identifier(field.name).sql("bigquery")
78-
dtype = SQLGlotType.from_bigframes_dtype(
79-
bigframes.dtypes.arrow_dtype_to_bigframes_dtype(field.type)
80-
)
81-
inner_fields.append(f"{key} {dtype}")
82-
return "STRUCT<{}>".format(", ".join(inner_fields))
76+
inner_fields.append(f"{key} {dtype}")
77+
return "STRUCT<{}>".format(", ".join(inner_fields))
8378

84-
raise ValueError(
85-
f"Unsupported type for {bigframes_dtype}. {constants.FEEDBACK_LINK}"
86-
)
79+
raise ValueError(
80+
f"Unsupported type for {bigframes_dtype}. {constants.FEEDBACK_LINK}"
81+
)

bigframes/functions/_function_session.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -983,7 +983,17 @@ def _convert_row_processor_sig(
983983
if len(signature.parameters) >= 1:
984984
first_param = next(iter(signature.parameters.values()))
985985
param_type = first_param.annotation
986-
if (param_type == bf_series.Series) or (param_type == pandas.Series):
986+
# Type hints for Series inputs should use pandas.Series because the
987+
# underlying serialization process converts the input to a string
988+
# representation of a pandas Series (not bigframes Series). Using
989+
# bigframes Series will lead to TypeError when creating the function
990+
# remotely. See more from b/445182819.
991+
if param_type == bf_series.Series:
992+
raise bf_formatting.create_exception_with_feedback_link(
993+
TypeError,
994+
"Argument type hint must be Pandas Series, not BigFrames Series.",
995+
)
996+
if param_type == pandas.Series:
987997
msg = bfe.format_message("input_types=Series is in preview.")
988998
warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning)
989999
return signature.replace(

bigframes/functions/function_template.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,16 @@ def generate_managed_function_code(
363363
return {udf_name}(*args)"""
364364
)
365365

366-
udf_code_block = textwrap.dedent(
367-
f"{udf_code}\n{func_code}\n{bigframes_handler_code}"
368-
)
369-
370-
return udf_code_block
366+
udf_code_block = []
367+
if not capture_references and is_row_processor:
368+
# Enable postponed evaluation of type annotations. This converts all
369+
# type hints to strings at runtime, which is necessary for correctly
370+
# handling the type annotation of pandas.Series after the UDF code is
371+
# serialized for remote execution. See more from b/445182819.
372+
udf_code_block.append("from __future__ import annotations")
373+
374+
udf_code_block.append(udf_code)
375+
udf_code_block.append(func_code)
376+
udf_code_block.append(bigframes_handler_code)
377+
378+
return textwrap.dedent("\n".join(udf_code_block))

bigframes/session/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2064,8 +2064,9 @@ def read_gbq_function(
20642064
note, row processor implies that the function has only one input
20652065
parameter.
20662066
2067+
>>> import pandas as pd
20672068
>>> @bpd.remote_function(cloud_function_service_account="default")
2068-
... def row_sum(s: bpd.Series) -> float:
2069+
... def row_sum(s: pd.Series) -> float:
20692070
... return s['a'] + s['b'] + s['c']
20702071
20712072
>>> row_sum_ref = bpd.read_gbq_function(

bigframes/version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
__version__ = "2.23.0"
15+
__version__ = "2.24.0"
1616

1717
# {x-release-please-start-date}
18-
__release_date__ = "2025-09-29"
18+
__release_date__ = "2025-10-07"
1919
# {x-release-please-end}

tests/system/large/functions/test_managed_function.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -701,8 +701,19 @@ def serialize_row(row):
701701
}
702702
)
703703

704+
with pytest.raises(
705+
TypeError,
706+
match="Argument type hint must be Pandas Series, not BigFrames Series.",
707+
):
708+
serialize_row_mf = session.udf(
709+
input_types=bigframes.series.Series,
710+
output_type=str,
711+
dataset=dataset_id,
712+
name=prefixer.create_prefix(),
713+
)(serialize_row)
714+
704715
serialize_row_mf = session.udf(
705-
input_types=bigframes.series.Series,
716+
input_types=pandas.Series,
706717
output_type=str,
707718
dataset=dataset_id,
708719
name=prefixer.create_prefix(),
@@ -762,7 +773,7 @@ def analyze(row):
762773
):
763774

764775
analyze_mf = session.udf(
765-
input_types=bigframes.series.Series,
776+
input_types=pandas.Series,
766777
output_type=str,
767778
dataset=dataset_id,
768779
name=prefixer.create_prefix(),
@@ -876,7 +887,7 @@ def serialize_row(row):
876887
)
877888

878889
serialize_row_mf = session.udf(
879-
input_types=bigframes.series.Series,
890+
input_types=pandas.Series,
880891
output_type=str,
881892
dataset=dataset_id,
882893
name=prefixer.create_prefix(),
@@ -926,7 +937,7 @@ def test_managed_function_df_apply_axis_1_na_nan_inf(dataset_id, session):
926937

927938
try:
928939

929-
def float_parser(row):
940+
def float_parser(row: pandas.Series):
930941
import numpy as mynp
931942
import pandas as mypd
932943

@@ -937,7 +948,7 @@ def float_parser(row):
937948
return float(row["text"])
938949

939950
float_parser_mf = session.udf(
940-
input_types=bigframes.series.Series,
951+
input_types=pandas.Series,
941952
output_type=float,
942953
dataset=dataset_id,
943954
name=prefixer.create_prefix(),
@@ -1027,7 +1038,7 @@ def test_managed_function_df_apply_axis_1_series_args(session, dataset_id, scala
10271038

10281039
try:
10291040

1030-
def analyze(s, x, y):
1041+
def analyze(s: pandas.Series, x: bool, y: float) -> str:
10311042
value = f"value is {s['int64_col']} and {s['float64_col']}"
10321043
if x:
10331044
return f"{value}, x is True!"
@@ -1036,8 +1047,6 @@ def analyze(s, x, y):
10361047
return f"{value}, x is False, y is non-positive!"
10371048

10381049
analyze_mf = session.udf(
1039-
input_types=[bigframes.series.Series, bool, float],
1040-
output_type=str,
10411050
dataset=dataset_id,
10421051
name=prefixer.create_prefix(),
10431052
)(analyze)
@@ -1151,7 +1160,7 @@ def is_sum_positive_series(s):
11511160
return s["int64_col"] + s["int64_too"] > 0
11521161

11531162
is_sum_positive_series_mf = session.udf(
1154-
input_types=bigframes.series.Series,
1163+
input_types=pandas.Series,
11551164
output_type=bool,
11561165
dataset=dataset_id,
11571166
name=prefixer.create_prefix(),
@@ -1217,12 +1226,10 @@ def func_for_other(x):
12171226
def test_managed_function_df_where_other_issue(session, dataset_id, scalars_df_index):
12181227
try:
12191228

1220-
def the_sum(s):
1229+
def the_sum(s: pandas.Series) -> int:
12211230
return s["int64_col"] + s["int64_too"]
12221231

12231232
the_sum_mf = session.udf(
1224-
input_types=bigframes.series.Series,
1225-
output_type=int,
12261233
dataset=dataset_id,
12271234
name=prefixer.create_prefix(),
12281235
)(the_sum)

0 commit comments

Comments
 (0)