Skip to content

Commit cdec057

Browse files
committed
feat: add allow_large_results option to read_gbq_query
1 parent 40e7638 commit cdec057

File tree

2 files changed

+89
-6
lines changed

2 files changed

+89
-6
lines changed

bigframes/session/__init__.py

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ def read_gbq( # type: ignore[overload-overlap]
394394
use_cache: Optional[bool] = ...,
395395
col_order: Iterable[str] = ...,
396396
dry_run: Literal[False] = ...,
397+
allow_large_results: bool = ...,
397398
) -> dataframe.DataFrame:
398399
...
399400

@@ -410,6 +411,7 @@ def read_gbq(
410411
use_cache: Optional[bool] = ...,
411412
col_order: Iterable[str] = ...,
412413
dry_run: Literal[True] = ...,
414+
allow_large_results: bool = ...,
413415
) -> pandas.Series:
414416
...
415417

@@ -424,8 +426,8 @@ def read_gbq(
424426
filters: third_party_pandas_gbq.FiltersType = (),
425427
use_cache: Optional[bool] = None,
426428
col_order: Iterable[str] = (),
427-
dry_run: bool = False
428-
# Add a verify index argument that fails if the index is not unique.
429+
dry_run: bool = False,
430+
allow_large_results: bool = True,
429431
) -> dataframe.DataFrame | pandas.Series:
430432
# TODO(b/281571214): Generate prompt to show the progress of read_gbq.
431433
if columns and col_order:
@@ -445,6 +447,7 @@ def read_gbq(
445447
use_cache=use_cache,
446448
filters=filters,
447449
dry_run=dry_run,
450+
allow_large_results=allow_large_results,
448451
)
449452
else:
450453
if configuration is not None:
@@ -551,6 +554,7 @@ def read_gbq_query( # type: ignore[overload-overlap]
551554
col_order: Iterable[str] = ...,
552555
filters: third_party_pandas_gbq.FiltersType = ...,
553556
dry_run: Literal[False] = ...,
557+
allow_large_results: bool = ...,
554558
) -> dataframe.DataFrame:
555559
...
556560

@@ -567,6 +571,7 @@ def read_gbq_query(
567571
col_order: Iterable[str] = ...,
568572
filters: third_party_pandas_gbq.FiltersType = ...,
569573
dry_run: Literal[True] = ...,
574+
allow_large_results: bool = ...,
570575
) -> pandas.Series:
571576
...
572577

@@ -582,6 +587,7 @@ def read_gbq_query(
582587
col_order: Iterable[str] = (),
583588
filters: third_party_pandas_gbq.FiltersType = (),
584589
dry_run: bool = False,
590+
allow_large_results: bool = True,
585591
) -> dataframe.DataFrame | pandas.Series:
586592
"""Turn a SQL query into a DataFrame.
587593
@@ -631,9 +637,48 @@ def read_gbq_query(
631637
632638
See also: :meth:`Session.read_gbq`.
633639
640+
Args:
641+
query (str):
642+
A SQL query to execute.
643+
index_col (Iterable[str] or str, optional):
644+
The column(s) to use as the index for the DataFrame. This can be
645+
a single column name or a list of column names. If not provided,
646+
a default index will be used.
647+
columns (Iterable[str], optional):
648+
The columns to read from the query result. If not
649+
specified, all columns will be read.
650+
configuration (dict, optional):
651+
A dictionary of query job configuration options. See the
652+
BigQuery REST API documentation for a list of available options:
653+
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query
654+
max_results (int, optional):
655+
The maximum number of rows to retrieve from the query
656+
result. If not specified, all rows will be loaded.
657+
use_cache (bool, optional):
658+
Whether to use cached results for the query. Defaults to ``True``.
659+
Setting this to ``False`` will force a re-execution of the query.
660+
col_order (Iterable[str], optional):
661+
The desired order of columns in the resulting DataFrame. This
662+
parameter is deprecated and will be removed in a future version.
663+
Use ``columns`` instead.
664+
filters (list[tuple], optional):
665+
A list of filters to apply to the data. Filters are specified
666+
as a list of tuples, where each tuple contains a column name,
667+
an operator (e.g., '==', '!='), and a value.
668+
dry_run (bool, optional):
669+
If ``True``, the function will not actually execute the query but
670+
will instead return statistics about the query. Defaults to
671+
``False``.
672+
allow_large_results (bool, optional):
673+
Whether to allow large query results. If ``True``, the query
674+
results can be larger than the maximum response size.
675+
Defaults to ``True``.
676+
634677
Returns:
635-
bigframes.pandas.DataFrame:
636-
A DataFrame representing results of the query or table.
678+
bigframes.pandas.DataFrame or pandas.Series:
679+
A DataFrame representing the result of the query. If ``dry_run``
680+
is ``True``, a ``pandas.Series`` containing query statistics is
681+
returned.
637682
638683
Raises:
639684
ValueError:
@@ -657,6 +702,7 @@ def read_gbq_query(
657702
use_cache=use_cache,
658703
filters=filters,
659704
dry_run=dry_run,
705+
allow_large_results=allow_large_results,
660706
)
661707

662708
@overload
@@ -714,9 +760,40 @@ def read_gbq_table(
714760
715761
See also: :meth:`Session.read_gbq`.
716762
763+
Args:
764+
table_id (str):
765+
The identifier of the BigQuery table to read.
766+
index_col (Iterable[str] or str, optional):
767+
The column(s) to use as the index for the DataFrame. This can be
768+
a single column name or a list of column names. If not provided,
769+
a default index will be used.
770+
columns (Iterable[str], optional):
771+
The columns to read from the table. If not specified, all
772+
columns will be read.
773+
max_results (int, optional):
774+
The maximum number of rows to retrieve from the table. If not
775+
specified, all rows will be loaded.
776+
filters (list[tuple], optional):
777+
A list of filters to apply to the data. Filters are specified
778+
as a list of tuples, where each tuple contains a column name,
779+
an operator (e.g., '==', '!='), and a value.
780+
use_cache (bool, optional):
781+
Whether to use cached results for the query. Defaults to ``True``.
782+
Setting this to ``False`` will force a re-execution of the query.
783+
col_order (Iterable[str], optional):
784+
The desired order of columns in the resulting DataFrame. This
785+
parameter is deprecated and will be removed in a future version.
786+
Use ``columns`` instead.
787+
dry_run (bool, optional):
788+
If ``True``, the function will not actually execute the query but
789+
will instead return statistics about the table. Defaults to
790+
``False``.
791+
717792
Returns:
718-
bigframes.pandas.DataFrame:
719-
A DataFrame representing results of the query or table.
793+
bigframes.pandas.DataFrame or pandas.Series:
794+
A DataFrame representing the contents of the table. If
795+
``dry_run`` is ``True``, a ``pandas.Series`` containing table
796+
statistics is returned.
720797
721798
Raises:
722799
ValueError:

third_party/bigframes_vendored/pandas/io/gbq.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def read_gbq(
2525
filters: FiltersType = (),
2626
use_cache: Optional[bool] = None,
2727
col_order: Iterable[str] = (),
28+
allow_large_results: bool = True,
2829
):
2930
"""Loads a DataFrame from BigQuery.
3031
@@ -156,6 +157,11 @@ def read_gbq(
156157
`configuration` to avoid conflicts.
157158
col_order (Iterable[str]):
158159
Alias for columns, retained for backwards compatibility.
160+
allow_large_results (bool, optional):
161+
Whether to allow large query results. If ``True``, the query
162+
results can be larger than the maximum response size. This
163+
option is only applicable when ``query_or_table`` is a query.
164+
Defaults to ``True``.
159165
160166
Raises:
161167
bigframes.exceptions.DefaultIndexWarning:

0 commit comments

Comments
 (0)