Skip to content

Commit e904d32

Browse files
authored
Merge branch 'main' into generate-embedding-impl-11924477578091076513
2 parents 9a774ac + d8c007e commit e904d32

File tree

65 files changed

+1702
-520
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1702
-520
lines changed

.librarian/state.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
1+
image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:e7cc6823efb073a8a26e7cefdd869f12ec228abfbd2a44aa9a7eacc284023677
22
libraries:
33
- id: bigframes
4-
version: 2.32.0
4+
version: 2.33.0
55
last_generated_commit: ""
66
apis: []
77
source_roots:

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,24 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.33.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.32.0...v2.33.0) (2026-01-22)
8+
9+
10+
### Features
11+
12+
* add bigquery.ml.transform function (#2394) ([1f9ee373c1f1d0cd08b80169c3063b862ea46465](https://github.com/googleapis/python-bigquery-dataframes/commit/1f9ee373c1f1d0cd08b80169c3063b862ea46465))
13+
* Add BigQuery ObjectRef functions to `bigframes.bigquery.obj` (#2380) ([9c3bbc36983dffb265454f27b37450df8c5fbc71](https://github.com/googleapis/python-bigquery-dataframes/commit/9c3bbc36983dffb265454f27b37450df8c5fbc71))
14+
* Stabilize interactive table height to prevent notebook layout shifts (#2378) ([a634e976c0f44087ca2a65f68cf2775ae6f04024](https://github.com/googleapis/python-bigquery-dataframes/commit/a634e976c0f44087ca2a65f68cf2775ae6f04024))
15+
* Add max_columns control for anywidget mode (#2374) ([34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78](https://github.com/googleapis/python-bigquery-dataframes/commit/34b5975f6911c5aa5ffc64a2fe6967a9f3d86f78))
16+
* Add dark mode to anywidget mode (#2365) ([2763b41d4b86939e389f76789f5b2acd44f18169](https://github.com/googleapis/python-bigquery-dataframes/commit/2763b41d4b86939e389f76789f5b2acd44f18169))
17+
* Configure Biome for Consistent Code Style (#2364) ([81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f](https://github.com/googleapis/python-bigquery-dataframes/commit/81e27b3d81da9b1684eae0b7f0b9abfd7badcc4f))
18+
19+
20+
### Bug Fixes
21+
22+
* Throw if write api commit op has stream_errors (#2385) ([7abfef0598d476ef233364a01f72d73291983c30](https://github.com/googleapis/python-bigquery-dataframes/commit/7abfef0598d476ef233364a01f72d73291983c30))
23+
* implement retry logic for cloud function endpoint fetching (#2369) ([0f593c27bfee89fe1bdfc880504f9ab0ac28a24e](https://github.com/googleapis/python-bigquery-dataframes/commit/0f593c27bfee89fe1bdfc880504f9ab0ac28a24e))
24+
725
## [2.32.0](https://github.com/googleapis/google-cloud-python/compare/bigframes-v2.31.0...bigframes-v2.32.0) (2026-01-05)
826

927

bigframes/_config/experiment_options.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import Optional
15+
from typing import Literal, Optional
1616
import warnings
1717

1818
import bigframes
@@ -27,6 +27,7 @@ class ExperimentOptions:
2727
def __init__(self):
2828
self._semantic_operators: bool = False
2929
self._ai_operators: bool = False
30+
self._sql_compiler: Literal["legacy", "stable", "experimental"] = "stable"
3031

3132
@property
3233
def semantic_operators(self) -> bool:
@@ -55,6 +56,24 @@ def ai_operators(self, value: bool):
5556
warnings.warn(msg, category=bfe.PreviewWarning)
5657
self._ai_operators = value
5758

59+
@property
60+
def sql_compiler(self) -> Literal["legacy", "stable", "experimental"]:
61+
return self._sql_compiler
62+
63+
@sql_compiler.setter
64+
def sql_compiler(self, value: Literal["legacy", "stable", "experimental"]):
65+
if value not in ["legacy", "stable", "experimental"]:
66+
raise ValueError(
67+
"sql_compiler must be one of 'legacy', 'stable', or 'experimental'"
68+
)
69+
if value == "experimental":
70+
msg = bfe.format_message(
71+
"The experimental SQL compiler is still under experiments, and is subject "
72+
"to change in the future."
73+
)
74+
warnings.warn(msg, category=FutureWarning)
75+
self._sql_compiler = value
76+
5877
@property
5978
def blob(self) -> bool:
6079
msg = bfe.format_message(

bigframes/bigquery/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from bigframes.bigquery._operations.search import create_vector_index, vector_search
6161
from bigframes.bigquery._operations.sql import sql_scalar
6262
from bigframes.bigquery._operations.struct import struct
63+
from bigframes.bigquery._operations.table import create_external_table
6364
from bigframes.core.logging import log_adapter
6465

6566
_functions = [
@@ -104,6 +105,8 @@
104105
sql_scalar,
105106
# struct ops
106107
struct,
108+
# table ops
109+
create_external_table,
107110
]
108111

109112
_module = sys.modules[__name__]
@@ -155,6 +158,8 @@
155158
"sql_scalar",
156159
# struct ops
157160
"struct",
161+
# table ops
162+
"create_external_table",
158163
# Modules / SQL namespaces
159164
"ai",
160165
"ml",

bigframes/bigquery/_operations/ai.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ def generate(
5959
>>> import bigframes.pandas as bpd
6060
>>> import bigframes.bigquery as bbq
6161
>>> country = bpd.Series(["Japan", "Canada"])
62-
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only"))
63-
0 {'result': 'Tokyo\\n', 'full_response': '{"cand...
64-
1 {'result': 'Ottawa\\n', 'full_response': '{"can...
62+
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) # doctest: +SKIP
63+
0 {'result': 'Tokyo', 'full_response': '{"cand...
64+
1 {'result': 'Ottawa', 'full_response': '{"can...
6565
dtype: struct<result: string, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
6666
67-
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result")
68-
0 Tokyo\\n
69-
1 Ottawa\\n
67+
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result") # doctest: +SKIP
68+
0 Tokyo
69+
1 Ottawa
7070
Name: result, dtype: string
7171
7272
You get structured output when the `output_schema` parameter is set:

bigframes/bigquery/_operations/ml.py

Lines changed: 188 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from __future__ import annotations
1616

17-
from typing import cast, Mapping, Optional, Union
17+
from typing import cast, List, Mapping, Optional, Union
1818

1919
import bigframes_vendored.constants
2020
import google.cloud.bigquery
@@ -393,3 +393,190 @@ def global_explain(
393393
return bpd.read_gbq_query(sql)
394394
else:
395395
return session.read_gbq_query(sql)
396+
397+
398+
@log_adapter.method_logger(custom_base_name="bigquery_ml")
399+
def transform(
400+
model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
401+
input_: Union[pd.DataFrame, dataframe.DataFrame, str],
402+
) -> dataframe.DataFrame:
403+
"""
404+
Transforms input data using a BigQuery ML model.
405+
406+
See the `BigQuery ML TRANSFORM function syntax
407+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-transform>`_
408+
for additional reference.
409+
410+
Args:
411+
model (bigframes.ml.base.BaseEstimator or str):
412+
The model to use for transformation.
413+
input_ (Union[bigframes.pandas.DataFrame, str]):
414+
The DataFrame or query to use for transformation.
415+
416+
Returns:
417+
bigframes.pandas.DataFrame:
418+
The transformed data.
419+
"""
420+
import bigframes.pandas as bpd
421+
422+
model_name, session = _get_model_name_and_session(model, input_)
423+
table_sql = _to_sql(input_)
424+
425+
sql = bigframes.core.sql.ml.transform(
426+
model_name=model_name,
427+
table=table_sql,
428+
)
429+
430+
if session is None:
431+
return bpd.read_gbq_query(sql)
432+
else:
433+
return session.read_gbq_query(sql)
434+
435+
436+
@log_adapter.method_logger(custom_base_name="bigquery_ml")
437+
def generate_text(
438+
model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
439+
input_: Union[pd.DataFrame, dataframe.DataFrame, str],
440+
*,
441+
temperature: Optional[float] = None,
442+
max_output_tokens: Optional[int] = None,
443+
top_k: Optional[int] = None,
444+
top_p: Optional[float] = None,
445+
flatten_json_output: Optional[bool] = None,
446+
stop_sequences: Optional[List[str]] = None,
447+
ground_with_google_search: Optional[bool] = None,
448+
request_type: Optional[str] = None,
449+
) -> dataframe.DataFrame:
450+
"""
451+
Generates text using a BigQuery ML model.
452+
453+
See the `BigQuery ML GENERATE_TEXT function syntax
454+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-text>`_
455+
for additional reference.
456+
457+
Args:
458+
model (bigframes.ml.base.BaseEstimator or str):
459+
The model to use for text generation.
460+
input_ (Union[bigframes.pandas.DataFrame, str]):
461+
The DataFrame or query to use for text generation.
462+
temperature (float, optional):
463+
A FLOAT64 value that is used for sampling promiscuity. The value
464+
must be in the range ``[0.0, 1.0]``. A lower temperature works well
465+
for prompts that expect a more deterministic and less open-ended
466+
or creative response, while a higher temperature can lead to more
467+
diverse or creative results. A temperature of ``0`` is
468+
deterministic, meaning that the highest probability response is
469+
always selected.
470+
max_output_tokens (int, optional):
471+
An INT64 value that sets the maximum number of tokens in the
472+
generated text.
473+
top_k (int, optional):
474+
An INT64 value that changes how the model selects tokens for
475+
output. A ``top_k`` of ``1`` means the next selected token is the
476+
most probable among all tokens in the model's vocabulary. A
477+
``top_k`` of ``3`` means that the next token is selected from
478+
among the three most probable tokens by using temperature. The
479+
default value is ``40``.
480+
top_p (float, optional):
481+
A FLOAT64 value that changes how the model selects tokens for
482+
output. Tokens are selected from most probable to least probable
483+
until the sum of their probabilities equals the ``top_p`` value.
484+
For example, if tokens A, B, and C have a probability of 0.3, 0.2,
485+
and 0.1 and the ``top_p`` value is ``0.5``, then the model will
486+
select either A or B as the next token by using temperature. The
487+
default value is ``0.95``.
488+
flatten_json_output (bool, optional):
489+
A BOOL value that determines the content of the generated JSON column.
490+
stop_sequences (List[str], optional):
491+
An ARRAY<STRING> value that contains the stop sequences for the model.
492+
ground_with_google_search (bool, optional):
493+
A BOOL value that determines whether to ground the model with Google Search.
494+
request_type (str, optional):
495+
A STRING value that contains the request type for the model.
496+
497+
Returns:
498+
bigframes.pandas.DataFrame:
499+
The generated text.
500+
"""
501+
import bigframes.pandas as bpd
502+
503+
model_name, session = _get_model_name_and_session(model, input_)
504+
table_sql = _to_sql(input_)
505+
506+
sql = bigframes.core.sql.ml.generate_text(
507+
model_name=model_name,
508+
table=table_sql,
509+
temperature=temperature,
510+
max_output_tokens=max_output_tokens,
511+
top_k=top_k,
512+
top_p=top_p,
513+
flatten_json_output=flatten_json_output,
514+
stop_sequences=stop_sequences,
515+
ground_with_google_search=ground_with_google_search,
516+
request_type=request_type,
517+
)
518+
519+
if session is None:
520+
return bpd.read_gbq_query(sql)
521+
else:
522+
return session.read_gbq_query(sql)
523+
524+
525+
@log_adapter.method_logger(custom_base_name="bigquery_ml")
526+
def generate_embedding(
527+
model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series],
528+
input_: Union[pd.DataFrame, dataframe.DataFrame, str],
529+
*,
530+
flatten_json_output: Optional[bool] = None,
531+
task_type: Optional[str] = None,
532+
output_dimensionality: Optional[int] = None,
533+
) -> dataframe.DataFrame:
534+
"""
535+
Generates text embedding using a BigQuery ML model.
536+
537+
See the `BigQuery ML GENERATE_EMBEDDING function syntax
538+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-generate-embedding>`_
539+
for additional reference.
540+
541+
Args:
542+
model (bigframes.ml.base.BaseEstimator or str):
543+
The model to use for text embedding.
544+
input_ (Union[bigframes.pandas.DataFrame, str]):
545+
The DataFrame or query to use for text embedding.
546+
flatten_json_output (bool, optional):
547+
A BOOL value that determines the content of the generated JSON column.
548+
task_type (str, optional):
549+
A STRING value that specifies the intended downstream application task.
550+
Supported values are:
551+
- `RETRIEVAL_QUERY`
552+
- `RETRIEVAL_DOCUMENT`
553+
- `SEMANTIC_SIMILARITY`
554+
- `CLASSIFICATION`
555+
- `CLUSTERING`
556+
- `QUESTION_ANSWERING`
557+
- `FACT_VERIFICATION`
558+
- `CODE_RETRIEVAL_QUERY`
559+
output_dimensionality (int, optional):
560+
An INT64 value that specifies the size of the output embedding.
561+
562+
Returns:
563+
bigframes.pandas.DataFrame:
564+
The generated text embedding.
565+
"""
566+
import bigframes.pandas as bpd
567+
568+
model_name, session = _get_model_name_and_session(model, input_)
569+
table_sql = _to_sql(input_)
570+
571+
sql = bigframes.core.sql.ml.generate_embedding(
572+
model_name=model_name,
573+
table=table_sql,
574+
flatten_json_output=flatten_json_output,
575+
task_type=task_type,
576+
output_dimensionality=output_dimensionality,
577+
)
578+
579+
if session is None:
580+
return bpd.read_gbq_query(sql)
581+
else:
582+
return session.read_gbq_query(sql)

0 commit comments

Comments
 (0)