Skip to content

Commit 29bdab5

Browse files
committed
patch ibis
1 parent 64c8872 commit 29bdab5

File tree

4 files changed

+61
-8
lines changed

4 files changed

+61
-8
lines changed

tests/system/small/test_dataframe.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -906,15 +906,53 @@ def test_df_to_pandas_batches(scalars_dfs):
906906
assert_pandas_df_equal(pd.concat(filtered_batches), pd_result)
907907

908908

909-
def test_assign_new_column(scalars_dfs):
909+
@pytest.mark.parametrize(
910+
("literal", "expected_dtype"),
911+
(
912+
pytest.param(
913+
2,
914+
dtypes.INT_DTYPE,
915+
id="INT64",
916+
),
917+
# ====================================================================
918+
# NULL values
919+
#
920+
# These are regression tests for b/428999884. It needs to be possible to
921+
# set a column to NULL with a desired type (not just the pandas default
922+
# of float64).
923+
# ====================================================================
924+
pytest.param(None, dtypes.FLOAT_DTYPE, id="NULL-None"),
925+
pytest.param(
926+
pa.scalar(None, type=pa.int64()),
927+
dtypes.INT_DTYPE,
928+
id="NULL-pyarrow-TIMESTAMP",
929+
),
930+
pytest.param(
931+
pa.scalar(None, type=pa.timestamp("us", tz="UTC")),
932+
dtypes.TIMESTAMP_DTYPE,
933+
id="NULL-pyarrow-TIMESTAMP",
934+
),
935+
pytest.param(
936+
pa.scalar(None, type=pa.timestamp("us")),
937+
dtypes.DATETIME_DTYPE,
938+
id="NULL-pyarrow-DATETIME",
939+
),
940+
),
941+
)
942+
def test_assign_new_column_w_literal(scalars_dfs, literal, expected_dtype):
910943
scalars_df, scalars_pandas_df = scalars_dfs
911-
kwargs = {"new_col": 2}
912-
df = scalars_df.assign(**kwargs)
944+
df = scalars_df.assign(new_col=literal)
913945
bf_result = df.to_pandas()
914-
pd_result = scalars_pandas_df.assign(**kwargs)
915946

916-
# Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
917-
pd_result["new_col"] = pd_result["new_col"].astype("Int64")
947+
new_col_pd = literal
948+
if isinstance(literal, pa.Scalar):
949+
# PyArrow integer scalars aren't yet supported in pandas Int64Dtype.
950+
new_col_pd = literal.as_py()
951+
952+
# Pandas might not pick the same dtype as BigFrames, but it should at least
953+
# be castable to it.
954+
pd_result = scalars_pandas_df.assign(new_col=new_col_pd)
955+
pd_result["new_col"] = pd_result["new_col"].astype(expected_dtype)
918956

919957
assert_pandas_df_equal(bf_result, pd_result)
920958

third_party/bigframes_vendored/ibis/common/temporal.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,3 +260,8 @@ def _from_numpy_datetime64(value):
260260
raise TypeError("Unable to convert np.datetime64 without pandas")
261261
else:
262262
return pd.Timestamp(value).to_pydatetime()
263+
264+
265+
@normalize_datetime.register("pyarrow.Scalar")
266+
def _from_pyarrow_scalar(value):
267+
return value.as_py()

third_party/bigframes_vendored/ibis/expr/datatypes/value.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import bigframes_vendored.ibis.expr.datatypes as dt
2828
from bigframes_vendored.ibis.expr.datatypes.cast import highest_precedence
2929
from public import public
30+
import pyarrow as pa
3031
import toolz
3132

3233

@@ -71,6 +72,14 @@ def infer_list(values: Sequence[Any]) -> dt.Array:
7172
return dt.Array(highest_precedence(map(infer, values)))
7273

7374

75+
@infer.register("pyarrow.Scalar")
76+
def infer_pyarrow_scalar(value: "pa.Scalar"):
77+
"""Infert the type of a PyArrow Scalar value."""
78+
import bigframes_vendored.ibis.formats.pyarrow
79+
80+
return bigframes_vendored.ibis.formats.pyarrow.PyArrowType.to_ibis(value.type)
81+
82+
7483
@infer.register(datetime.time)
7584
def infer_time(value: datetime.time) -> dt.Time:
7685
return dt.time
@@ -253,6 +262,9 @@ def infer_shapely_multipolygon(value) -> dt.MultiPolygon:
253262
def normalize(typ, value):
254263
"""Ensure that the Python type underlying a literal resolves to a single type."""
255264

265+
if pa is not None and isinstance(value, pa.Scalar):
266+
value = value.as_py()
267+
256268
dtype = dt.dtype(typ)
257269
if value is None:
258270
if not dtype.nullable:

third_party/bigframes_vendored/ibis/formats/pyarrow.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
@functools.cache
2525
def _from_pyarrow_types():
2626
import pyarrow as pa
27-
import pyarrow_hotfix # noqa: F401
2827

2928
return {
3029
pa.int8(): dt.Int8,
@@ -87,7 +86,6 @@ class PyArrowType(TypeMapper):
8786
def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType:
8887
"""Convert a pyarrow type to an ibis type."""
8988
import pyarrow as pa
90-
import pyarrow_hotfix # noqa: F401
9189

9290
if pa.types.is_null(typ):
9391
return dt.null

0 commit comments

Comments
 (0)