Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas_gbq/load/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# license that can be found in the LICENSE file.

from pandas_gbq.load.core import (
cast_dataframe_for_csv,
cast_dataframe_for_parquet,
encode_chunk,
load_chunks,
Expand All @@ -13,6 +14,7 @@
)

__all__ = [
"cast_dataframe_for_csv",
"cast_dataframe_for_parquet",
"encode_chunk",
"load_chunks",
Expand Down
42 changes: 42 additions & 0 deletions pandas_gbq/load/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,45 @@ def convert(x):
return dataframe


def cast_dataframe_for_csv(
dataframe: pandas.DataFrame,
schema: Optional[Dict[str, Any]],
) -> pandas.DataFrame:
"""Cast columns to needed dtype when writing CSV files."""

columns = schema.get("fields", [])

# Protect against an explicit None in the dictionary.
columns = columns if columns is not None else []

new_columns = {}
for column in columns:
# Schema can be a superset of the columns in the dataframe, so ignore
# columns that aren't present.
column_name = column.get("name")
if column_name not in dataframe.columns:
continue

column_type = column.get("type", "").upper()
if column_type in {"DATETIME", "TIMESTAMP"}:
# Use isoformat to ensure that the years are 4 digits.
# https://github.com/googleapis/python-bigquery-pandas/issues/365
def convert(x):
if pandas.isna(x):
return None
try:
return x.isoformat(sep=" ")
except AttributeError:
# It might be a string already or some other type.
return x

new_columns[column_name] = dataframe[column_name].map(convert)

if new_columns:
dataframe = dataframe.assign(**new_columns)
return dataframe


def load_parquet(
client: bigquery.Client,
dataframe: pandas.DataFrame,
Expand Down Expand Up @@ -195,6 +234,9 @@ def load_csv_from_dataframe(
bq_schema = pandas_gbq.schema.to_google_cloud_bigquery(schema)

def load_chunk(chunk, job_config):
if schema is not None:
chunk = cast_dataframe_for_csv(chunk, schema)

client.load_table_from_dataframe(
chunk,
destination_table_ref,
Expand Down
21 changes: 21 additions & 0 deletions tests/system/test_to_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,27 @@ def test_series_round_trip(
),
}
),
expected_df=pandas.DataFrame(
{
"row_num": [0, 1, 2],
"bool_col": pandas.Series(
[True, False, True],
dtype="bool",
),
"boolean_col": pandas.Series(
[None, True, False],
dtype="boolean",
),
"object_col": pandas.Series(
[
False,
(pandas.NA if hasattr(pandas, "NA") else None),
True,
],
dtype="object",
),
}
),
table_schema=[
{"name": "bool_col", "type": "BOOLEAN"},
{"name": "boolean_col", "type": "BOOLEAN"},
Expand Down