Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit f88e263

Browse files
committed
Merge branch 'main' into shuowei-anywidget-single-index
2 parents 2e41e54 + 7f1d3df commit f88e263

29 files changed

Lines changed: 1136 additions & 814 deletions

File tree

.librarian/state.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
22
libraries:
33
- id: bigframes
4-
version: 2.30.0
4+
version: 2.31.0
55
last_generated_commit: ""
66
apis: []
77
source_roots:

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,19 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.31.0](https://github.com/googleapis/google-cloud-python/compare/bigframes-v2.30.0...bigframes-v2.31.0) (2025-12-10)
8+
9+
10+
### Features
11+
12+
* add `bigframes.bigquery.ml` methods (#2300) ([719b278c844ca80c1bec741873b30a9ee4fd6c56](https://github.com/googleapis/google-cloud-python/commit/719b278c844ca80c1bec741873b30a9ee4fd6c56))
13+
* add 'weekday' property to DatatimeMethod (#2304) ([fafd7c732d434eca3f8b5d849a87149f106e3d5d](https://github.com/googleapis/google-cloud-python/commit/fafd7c732d434eca3f8b5d849a87149f106e3d5d))
14+
15+
16+
### Bug Fixes
17+
18+
* cache DataFrames to temp tables in bigframes.bigquery.ml methods to avoid time travel (#2318) ([d99383195ac3f1683842cfe472cca5a914b04d8e](https://github.com/googleapis/google-cloud-python/commit/d99383195ac3f1683842cfe472cca5a914b04d8e))
19+
720
## [2.30.0](https://github.com/googleapis/google-cloud-python/compare/bigframes-v2.29.0...bigframes-v2.30.0) (2025-12-03)
821

922

bigframes/bigquery/_operations/ml.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ def _to_sql(df_or_sql: Union[pd.DataFrame, dataframe.DataFrame, str]) -> str:
3939
else:
4040
bf_df = cast(dataframe.DataFrame, df_or_sql)
4141

42+
# Cache dataframes to make sure base table is not a snapshot.
43+
# Cached dataframe creates a full copy, never uses snapshot.
44+
# This is a workaround for internal issue b/310266666.
45+
bf_df.cache()
4246
sql, _, _ = bf_df._to_sql_query(include_index=False)
4347
return sql
4448

bigframes/core/compile/polars/compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ def _(
152152
value = None
153153
if expression.dtype is None:
154154
return pl.lit(None)
155+
156+
# Polars lit does not handle pandas timedelta well at v1.36
157+
if isinstance(value, pd.Timedelta):
158+
value = value.to_pytimedelta()
159+
155160
return pl.lit(value, _bigframes_dtype_to_polars_dtype(expression.dtype))
156161

157162
@compile_expression.register

bigframes/core/compile/sqlglot/expressions/datetime_ops.py

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,272 @@
2323
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
2424

2525
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
26+
register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op
27+
28+
29+
def _calculate_resample_first(y: TypedExpr, origin: str) -> sge.Expression:
30+
if origin == "epoch":
31+
return sge.convert(0)
32+
elif origin == "start_day":
33+
return sge.func(
34+
"UNIX_MICROS",
35+
sge.Cast(
36+
this=sge.Cast(
37+
this=y.expr, to=sge.DataType(this=sge.DataType.Type.DATE)
38+
),
39+
to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ),
40+
),
41+
)
42+
elif origin == "start":
43+
return sge.func(
44+
"UNIX_MICROS",
45+
sge.Cast(this=y.expr, to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ)),
46+
)
47+
else:
48+
raise ValueError(f"Origin {origin} not supported")
49+
50+
51+
@register_binary_op(ops.DatetimeToIntegerLabelOp, pass_op=True)
52+
def datetime_to_integer_label_op(
53+
x: TypedExpr, y: TypedExpr, op: ops.DatetimeToIntegerLabelOp
54+
) -> sge.Expression:
55+
# Determine if the frequency is fixed by checking if 'op.freq.nanos' is defined.
56+
try:
57+
return _datetime_to_integer_label_fixed_frequency(x, y, op)
58+
except ValueError:
59+
return _datetime_to_integer_label_non_fixed_frequency(x, y, op)
60+
61+
62+
def _datetime_to_integer_label_fixed_frequency(
63+
x: TypedExpr, y: TypedExpr, op: ops.DatetimeToIntegerLabelOp
64+
) -> sge.Expression:
65+
"""
66+
This function handles fixed frequency conversions where the unit can range
67+
from microseconds (us) to days.
68+
"""
69+
us = op.freq.nanos / 1000
70+
x_int = sge.func(
71+
"UNIX_MICROS",
72+
sge.Cast(this=x.expr, to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ)),
73+
)
74+
first = _calculate_resample_first(y, op.origin) # type: ignore
75+
x_int_label = sge.Cast(
76+
this=sge.Floor(
77+
this=sge.func(
78+
"IEEE_DIVIDE",
79+
sge.Sub(this=x_int, expression=first),
80+
sge.convert(int(us)),
81+
)
82+
),
83+
to=sge.DataType.build("INT64"),
84+
)
85+
return x_int_label
86+
87+
88+
def _datetime_to_integer_label_non_fixed_frequency(
89+
x: TypedExpr, y: TypedExpr, op: ops.DatetimeToIntegerLabelOp
90+
) -> sge.Expression:
91+
"""
92+
This function handles non-fixed frequency conversions for units ranging
93+
from weeks to years.
94+
"""
95+
rule_code = op.freq.rule_code
96+
n = op.freq.n
97+
if rule_code == "W-SUN": # Weekly
98+
us = n * 7 * 24 * 60 * 60 * 1000000
99+
x_trunc = sge.TimestampTrunc(this=x.expr, unit=sge.Var(this="WEEK(MONDAY)"))
100+
y_trunc = sge.TimestampTrunc(this=y.expr, unit=sge.Var(this="WEEK(MONDAY)"))
101+
x_plus_6 = sge.Add(
102+
this=x_trunc,
103+
expression=sge.Interval(
104+
this=sge.convert(6), unit=sge.Identifier(this="DAY")
105+
),
106+
)
107+
y_plus_6 = sge.Add(
108+
this=y_trunc,
109+
expression=sge.Interval(
110+
this=sge.convert(6), unit=sge.Identifier(this="DAY")
111+
),
112+
)
113+
x_int = sge.func(
114+
"UNIX_MICROS",
115+
sge.Cast(
116+
this=x_plus_6, to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ)
117+
),
118+
)
119+
first = sge.func(
120+
"UNIX_MICROS",
121+
sge.Cast(
122+
this=y_plus_6, to=sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ)
123+
),
124+
)
125+
return sge.Case(
126+
ifs=[
127+
sge.If(
128+
this=sge.EQ(this=x_int, expression=first),
129+
true=sge.convert(0),
130+
)
131+
],
132+
default=sge.Add(
133+
this=sge.Cast(
134+
this=sge.Floor(
135+
this=sge.func(
136+
"IEEE_DIVIDE",
137+
sge.Sub(
138+
this=sge.Sub(this=x_int, expression=first),
139+
expression=sge.convert(1),
140+
),
141+
sge.convert(us),
142+
)
143+
),
144+
to=sge.DataType.build("INT64"),
145+
),
146+
expression=sge.convert(1),
147+
),
148+
)
149+
elif rule_code == "ME": # Monthly
150+
x_int = sge.Paren( # type: ignore
151+
this=sge.Add(
152+
this=sge.Mul(
153+
this=sge.Extract(
154+
this=sge.Identifier(this="YEAR"), expression=x.expr
155+
),
156+
expression=sge.convert(12),
157+
),
158+
expression=sge.Sub(
159+
this=sge.Extract(
160+
this=sge.Identifier(this="MONTH"), expression=x.expr
161+
),
162+
expression=sge.convert(1),
163+
),
164+
)
165+
)
166+
first = sge.Paren( # type: ignore
167+
this=sge.Add(
168+
this=sge.Mul(
169+
this=sge.Extract(
170+
this=sge.Identifier(this="YEAR"), expression=y.expr
171+
),
172+
expression=sge.convert(12),
173+
),
174+
expression=sge.Sub(
175+
this=sge.Extract(
176+
this=sge.Identifier(this="MONTH"), expression=y.expr
177+
),
178+
expression=sge.convert(1),
179+
),
180+
)
181+
)
182+
return sge.Case(
183+
ifs=[
184+
sge.If(
185+
this=sge.EQ(this=x_int, expression=first),
186+
true=sge.convert(0),
187+
)
188+
],
189+
default=sge.Add(
190+
this=sge.Cast(
191+
this=sge.Floor(
192+
this=sge.func(
193+
"IEEE_DIVIDE",
194+
sge.Sub(
195+
this=sge.Sub(this=x_int, expression=first),
196+
expression=sge.convert(1),
197+
),
198+
sge.convert(n),
199+
)
200+
),
201+
to=sge.DataType.build("INT64"),
202+
),
203+
expression=sge.convert(1),
204+
),
205+
)
206+
elif rule_code == "QE-DEC": # Quarterly
207+
x_int = sge.Paren( # type: ignore
208+
this=sge.Add(
209+
this=sge.Mul(
210+
this=sge.Extract(
211+
this=sge.Identifier(this="YEAR"), expression=x.expr
212+
),
213+
expression=sge.convert(4),
214+
),
215+
expression=sge.Sub(
216+
this=sge.Extract(
217+
this=sge.Identifier(this="QUARTER"), expression=x.expr
218+
),
219+
expression=sge.convert(1),
220+
),
221+
)
222+
)
223+
first = sge.Paren( # type: ignore
224+
this=sge.Add(
225+
this=sge.Mul(
226+
this=sge.Extract(
227+
this=sge.Identifier(this="YEAR"), expression=y.expr
228+
),
229+
expression=sge.convert(4),
230+
),
231+
expression=sge.Sub(
232+
this=sge.Extract(
233+
this=sge.Identifier(this="QUARTER"), expression=y.expr
234+
),
235+
expression=sge.convert(1),
236+
),
237+
)
238+
)
239+
return sge.Case(
240+
ifs=[
241+
sge.If(
242+
this=sge.EQ(this=x_int, expression=first),
243+
true=sge.convert(0),
244+
)
245+
],
246+
default=sge.Add(
247+
this=sge.Cast(
248+
this=sge.Floor(
249+
this=sge.func(
250+
"IEEE_DIVIDE",
251+
sge.Sub(
252+
this=sge.Sub(this=x_int, expression=first),
253+
expression=sge.convert(1),
254+
),
255+
sge.convert(n),
256+
)
257+
),
258+
to=sge.DataType.build("INT64"),
259+
),
260+
expression=sge.convert(1),
261+
),
262+
)
263+
elif rule_code == "YE-DEC": # Yearly
264+
x_int = sge.Extract(this=sge.Identifier(this="YEAR"), expression=x.expr)
265+
first = sge.Extract(this=sge.Identifier(this="YEAR"), expression=y.expr)
266+
return sge.Case(
267+
ifs=[
268+
sge.If(
269+
this=sge.EQ(this=x_int, expression=first),
270+
true=sge.convert(0),
271+
)
272+
],
273+
default=sge.Add(
274+
this=sge.Cast(
275+
this=sge.Floor(
276+
this=sge.func(
277+
"IEEE_DIVIDE",
278+
sge.Sub(
279+
this=sge.Sub(this=x_int, expression=first),
280+
expression=sge.convert(1),
281+
),
282+
sge.convert(n),
283+
)
284+
),
285+
to=sge.DataType.build("INT64"),
286+
),
287+
expression=sge.convert(1),
288+
),
289+
)
290+
else:
291+
raise ValueError(rule_code)
26292

27293

28294
@register_unary_op(ops.FloorDtOp, pass_op=True)

bigframes/dataframe.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -844,36 +844,45 @@ def _get_display_df_and_blob_cols(self) -> tuple[DataFrame, list[str]]:
844844
df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True)
845845
return df, blob_cols
846846

847-
def _get_anywidget_bundle(self, include=None, exclude=None):
847+
def _get_anywidget_bundle(
848+
self, include=None, exclude=None
849+
) -> tuple[dict[str, Any], dict[str, Any]]:
848850
"""
849851
Helper method to create and return the anywidget mimebundle.
850852
This function encapsulates the logic for anywidget display.
851853
"""
852854
from bigframes import display
853855

854-
# TODO(shuowei): Keep blob_cols and pass them to TableWidget so that they can render properly.
855-
df, _ = self._get_display_df_and_blob_cols()
856+
df, blob_cols = self._get_display_df_and_blob_cols()
856857

857858
# Create and display the widget
858859
widget = display.TableWidget(df)
859860
widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude)
860861

861862
# Handle both tuple (data, metadata) and dict returns
862863
if isinstance(widget_repr_result, tuple):
863-
widget_repr = dict(widget_repr_result[0]) # Extract data dict from tuple
864+
widget_repr, widget_metadata = widget_repr_result
864865
else:
865-
widget_repr = dict(widget_repr_result)
866+
widget_repr = widget_repr_result
867+
widget_metadata = {}
868+
869+
widget_repr = dict(widget_repr)
866870

867871
# At this point, we have already executed the query as part of the
868872
# widget construction. Let's use the information available to render
869873
# the HTML and plain text versions.
870-
widget_repr["text/html"] = widget.table_html
874+
widget_repr["text/html"] = self._create_html_representation(
875+
widget._cached_data,
876+
widget.row_count,
877+
len(self.columns),
878+
blob_cols,
879+
)
871880

872881
widget_repr["text/plain"] = self._create_text_representation(
873882
widget._cached_data, widget.row_count
874883
)
875884

876-
return widget_repr
885+
return widget_repr, widget_metadata
877886

878887
def _create_text_representation(
879888
self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int]
@@ -909,6 +918,8 @@ def _repr_mimebundle_(self, include=None, exclude=None):
909918
Custom display method for IPython/Jupyter environments.
910919
This is called by IPython's display system when the object is displayed.
911920
"""
921+
# TODO(b/467647693): Anywidget integration has been tested in Jupyter, VS Code, and
922+
# BQ Studio, but there is a known compatibility issue with Marimo that needs to be addressed.
912923
opts = bigframes.options.display
913924
# Only handle widget display in anywidget mode
914925
if opts.repr_mode == "anywidget":

0 commit comments

Comments
 (0)