Skip to content

Commit a3c3844

Browse files
authored
Merge branch 'main' into ai-gsutil-migration-0fbb1a7f1f0d45afa766d587ef7e1404
2 parents eac89bd + fbd3a55 commit a3c3844

File tree

37 files changed

+879
-300
lines changed

37 files changed

+879
-300
lines changed

.librarian/state.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:e7cc6823efb073a8a26e7cefdd869f12ec228abfbd2a44aa9a7eacc284023677
22
libraries:
33
- id: bigframes
4-
version: 2.33.0
4+
version: 2.34.0
55
last_generated_commit: ""
66
apis: []
77
source_roots:

CHANGELOG.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,22 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.34.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.33.0...v2.34.0) (2026-02-02)
8+
9+
10+
### Features
11+
12+
* add `bigframes.pandas.options.experiments.sql_compiler` for switching the backend compiler (#2417) ([7eba6ee03f07938315d99e2aeaf72368c02074cf](https://github.com/googleapis/python-bigquery-dataframes/commit/7eba6ee03f07938315d99e2aeaf72368c02074cf))
13+
* add bigquery.ml.generate_embedding function (#2422) ([35f3f5e6f8c64b47e6e7214034f96f047785e647](https://github.com/googleapis/python-bigquery-dataframes/commit/35f3f5e6f8c64b47e6e7214034f96f047785e647))
14+
* add bigquery.create_external_table method (#2415) ([76db2956e505aec4f1055118ac7ca523facc10ff](https://github.com/googleapis/python-bigquery-dataframes/commit/76db2956e505aec4f1055118ac7ca523facc10ff))
15+
* add deprecation warnings for .blob accessor and read_gbq_object_table (#2408) ([7261a4ea5cdab6b30f5bc333501648c60e70be59](https://github.com/googleapis/python-bigquery-dataframes/commit/7261a4ea5cdab6b30f5bc333501648c60e70be59))
16+
* add bigquery.ml.generate_text function (#2403) ([5ac681028624de15e31f0c2ae360b47b2dcf1e8d](https://github.com/googleapis/python-bigquery-dataframes/commit/5ac681028624de15e31f0c2ae360b47b2dcf1e8d))
17+
18+
19+
### Bug Fixes
20+
21+
* broken job url (#2411) ([fcb5bc1761c656e1aec61dbcf96a36d436833b7a](https://github.com/googleapis/python-bigquery-dataframes/commit/fcb5bc1761c656e1aec61dbcf96a36d436833b7a))
22+
723
## [2.33.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.32.0...v2.33.0) (2026-01-22)
824

925

bigframes/_config/experiment_options.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import Optional
15+
from typing import Literal, Optional
1616
import warnings
1717

1818
import bigframes
@@ -27,6 +27,7 @@ class ExperimentOptions:
2727
def __init__(self):
2828
self._semantic_operators: bool = False
2929
self._ai_operators: bool = False
30+
self._sql_compiler: Literal["legacy", "stable", "experimental"] = "stable"
3031

3132
@property
3233
def semantic_operators(self) -> bool:
@@ -55,6 +56,24 @@ def ai_operators(self, value: bool):
5556
warnings.warn(msg, category=bfe.PreviewWarning)
5657
self._ai_operators = value
5758

59+
@property
60+
def sql_compiler(self) -> Literal["legacy", "stable", "experimental"]:
61+
return self._sql_compiler
62+
63+
@sql_compiler.setter
64+
def sql_compiler(self, value: Literal["legacy", "stable", "experimental"]):
65+
if value not in ["legacy", "stable", "experimental"]:
66+
raise ValueError(
67+
"sql_compiler must be one of 'legacy', 'stable', or 'experimental'"
68+
)
69+
if value == "experimental":
70+
msg = bfe.format_message(
71+
"The experimental SQL compiler is still under experiments, and is subject "
72+
"to change in the future."
73+
)
74+
warnings.warn(msg, category=FutureWarning)
75+
self._sql_compiler = value
76+
5877
@property
5978
def blob(self) -> bool:
6079
msg = bfe.format_message(

bigframes/bigquery/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from bigframes.bigquery._operations.search import create_vector_index, vector_search
6161
from bigframes.bigquery._operations.sql import sql_scalar
6262
from bigframes.bigquery._operations.struct import struct
63+
from bigframes.bigquery._operations.table import create_external_table
6364
from bigframes.core.logging import log_adapter
6465

6566
_functions = [
@@ -104,6 +105,8 @@
104105
sql_scalar,
105106
# struct ops
106107
struct,
108+
# table ops
109+
create_external_table,
107110
]
108111

109112
_module = sys.modules[__name__]
@@ -155,6 +158,8 @@
155158
"sql_scalar",
156159
# struct ops
157160
"struct",
161+
# table ops
162+
"create_external_table",
158163
# Modules / SQL namespaces
159164
"ai",
160165
"ml",

bigframes/bigquery/_operations/ml.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,3 +520,63 @@ def generate_text(
520520
return bpd.read_gbq_query(sql)
521521
else:
522522
return session.read_gbq_query(sql)
523+
524+
525+
@log_adapter.method_logger(custom_base_name="bigquery_ml")
526+
def generate_embedding(
527+
model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
528+
input_: Union[pd.DataFrame, dataframe.DataFrame, str],
529+
*,
530+
flatten_json_output: Optional[bool] = None,
531+
task_type: Optional[str] = None,
532+
output_dimensionality: Optional[int] = None,
533+
) -> dataframe.DataFrame:
534+
"""
535+
Generates text embedding using a BigQuery ML model.
536+
537+
See the `BigQuery ML GENERATE_EMBEDDING function syntax
538+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-embedding>`_
539+
for additional reference.
540+
541+
Args:
542+
model (bigframes.ml.base.BaseEstimator or str):
543+
The model to use for text embedding.
544+
input_ (Union[bigframes.pandas.DataFrame, str]):
545+
The DataFrame or query to use for text embedding.
546+
flatten_json_output (bool, optional):
547+
A BOOL value that determines the content of the generated JSON column.
548+
task_type (str, optional):
549+
A STRING value that specifies the intended downstream application task.
550+
Supported values are:
551+
- `RETRIEVAL_QUERY`
552+
- `RETRIEVAL_DOCUMENT`
553+
- `SEMANTIC_SIMILARITY`
554+
- `CLASSIFICATION`
555+
- `CLUSTERING`
556+
- `QUESTION_ANSWERING`
557+
- `FACT_VERIFICATION`
558+
- `CODE_RETRIEVAL_QUERY`
559+
output_dimensionality (int, optional):
560+
An INT64 value that specifies the size of the output embedding.
561+
562+
Returns:
563+
bigframes.pandas.DataFrame:
564+
The generated text embedding.
565+
"""
566+
import bigframes.pandas as bpd
567+
568+
model_name, session = _get_model_name_and_session(model, input_)
569+
table_sql = _to_sql(input_)
570+
571+
sql = bigframes.core.sql.ml.generate_embedding(
572+
model_name=model_name,
573+
table=table_sql,
574+
flatten_json_output=flatten_json_output,
575+
task_type=task_type,
576+
output_dimensionality=output_dimensionality,
577+
)
578+
579+
if session is None:
580+
return bpd.read_gbq_query(sql)
581+
else:
582+
return session.read_gbq_query(sql)
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import Mapping, Optional, Union
18+
19+
import google.cloud.bigquery
20+
import pandas as pd
21+
22+
import bigframes.core.logging.log_adapter as log_adapter
23+
import bigframes.core.sql.table
24+
import bigframes.session
25+
26+
27+
def _get_table_metadata(
28+
*,
29+
bqclient: google.cloud.bigquery.Client,
30+
table_name: str,
31+
) -> pd.Series:
32+
table_metadata = bqclient.get_table(table_name)
33+
table_dict = table_metadata.to_api_repr()
34+
return pd.Series(table_dict)
35+
36+
37+
@log_adapter.method_logger(custom_base_name="bigquery_table")
38+
def create_external_table(
39+
table_name: str,
40+
*,
41+
replace: bool = False,
42+
if_not_exists: bool = False,
43+
columns: Optional[Mapping[str, str]] = None,
44+
partition_columns: Optional[Mapping[str, str]] = None,
45+
connection_name: Optional[str] = None,
46+
options: Mapping[str, Union[str, int, float, bool, list]],
47+
session: Optional[bigframes.session.Session] = None,
48+
) -> pd.Series:
49+
"""
50+
Creates a BigQuery external table.
51+
52+
See the `BigQuery CREATE EXTERNAL TABLE DDL syntax
53+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_external_table_statement>`_
54+
for additional reference.
55+
56+
Args:
57+
table_name (str):
58+
The name of the table in BigQuery.
59+
replace (bool, default False):
60+
Whether to replace the table if it already exists.
61+
if_not_exists (bool, default False):
62+
Whether to ignore the error if the table already exists.
63+
columns (Mapping[str, str], optional):
64+
The table's schema.
65+
partition_columns (Mapping[str, str], optional):
66+
The table's partition columns.
67+
connection_name (str, optional):
68+
The connection to use for the table.
69+
options (Mapping[str, Union[str, int, float, bool, list]]):
70+
The OPTIONS clause, which specifies the table options.
71+
session (bigframes.session.Session, optional):
72+
The session to use. If not provided, the default session is used.
73+
74+
Returns:
75+
pandas.Series:
76+
A Series with object dtype containing the table metadata. Reference
77+
the `BigQuery Table REST API reference
78+
<https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table>`_
79+
for available fields.
80+
"""
81+
import bigframes.pandas as bpd
82+
83+
sql = bigframes.core.sql.table.create_external_table_ddl(
84+
table_name=table_name,
85+
replace=replace,
86+
if_not_exists=if_not_exists,
87+
columns=columns,
88+
partition_columns=partition_columns,
89+
connection_name=connection_name,
90+
options=options,
91+
)
92+
93+
if session is None:
94+
bpd.read_gbq_query(sql)
95+
session = bpd.get_global_session()
96+
else:
97+
session.read_gbq_query(sql)
98+
99+
return _get_table_metadata(bqclient=session.bqclient, table_name=table_name)

bigframes/bigquery/ml.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
create_model,
2424
evaluate,
2525
explain_predict,
26+
generate_embedding,
2627
generate_text,
2728
global_explain,
2829
predict,
@@ -37,4 +38,5 @@
3738
"global_explain",
3839
"transform",
3940
"generate_text",
41+
"generate_embedding",
4042
]

bigframes/core/compile/__init__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,28 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16+
from typing import Any
17+
18+
from bigframes import options
1619
from bigframes.core.compile.api import test_only_ibis_inferred_schema
1720
from bigframes.core.compile.configs import CompileRequest, CompileResult
18-
from bigframes.core.compile.ibis_compiler.ibis_compiler import compile_sql
21+
22+
23+
def compiler() -> Any:
24+
"""Returns the appropriate compiler module based on session options."""
25+
if options.experiments.sql_compiler == "experimental":
26+
import bigframes.core.compile.sqlglot.compiler as sqlglot_compiler
27+
28+
return sqlglot_compiler
29+
else:
30+
import bigframes.core.compile.ibis_compiler.ibis_compiler as ibis_compiler
31+
32+
return ibis_compiler
33+
1934

2035
__all__ = [
2136
"test_only_ibis_inferred_schema",
22-
"compile_sql",
2337
"CompileRequest",
2438
"CompileResult",
39+
"compiler",
2540
]

bigframes/core/compile/configs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ class CompileResult:
3434
sql: str
3535
sql_schema: typing.Sequence[google.cloud.bigquery.SchemaField]
3636
row_order: typing.Optional[ordering.RowOrdering]
37+
encoded_type_refs: str

bigframes/core/compile/ibis_compiler/ibis_compiler.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import bigframes.core.compile.concat as concat_impl
3030
import bigframes.core.compile.configs as configs
3131
import bigframes.core.compile.explode
32+
from bigframes.core.logging import data_types as data_type_logger
3233
import bigframes.core.nodes as nodes
3334
import bigframes.core.ordering as bf_ordering
3435
import bigframes.core.rewrite as rewrites
@@ -56,23 +57,30 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
5657
)
5758
if request.sort_rows:
5859
result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node))
60+
encoded_type_refs = data_type_logger.encode_type_refs(result_node)
5961
sql = compile_result_node(result_node)
6062
return configs.CompileResult(
61-
sql, result_node.schema.to_bigquery(), result_node.order_by
63+
sql,
64+
result_node.schema.to_bigquery(),
65+
result_node.order_by,
66+
encoded_type_refs,
6267
)
6368

6469
ordering: Optional[bf_ordering.RowOrdering] = result_node.order_by
6570
result_node = dataclasses.replace(result_node, order_by=None)
6671
result_node = cast(nodes.ResultNode, rewrites.column_pruning(result_node))
6772
result_node = cast(nodes.ResultNode, rewrites.defer_selection(result_node))
73+
encoded_type_refs = data_type_logger.encode_type_refs(result_node)
6874
sql = compile_result_node(result_node)
6975
# Return the ordering iff no extra columns are needed to define the row order
7076
if ordering is not None:
7177
output_order = (
7278
ordering if ordering.referenced_columns.issubset(result_node.ids) else None
7379
)
7480
assert (not request.materialize_all_order_keys) or (output_order is not None)
75-
return configs.CompileResult(sql, result_node.schema.to_bigquery(), output_order)
81+
return configs.CompileResult(
82+
sql, result_node.schema.to_bigquery(), output_order, encoded_type_refs
83+
)
7684

7785

7886
def _replace_unsupported_ops(node: nodes.BigFrameNode):

0 commit comments

Comments
 (0)