Skip to content

Commit 1ce9c09

Browse files
committed
rebase and some minor work
1 parent 5489b68 commit 1ce9c09

File tree

14 files changed

+47
-53
lines changed

14 files changed

+47
-53
lines changed

python/pyarrow-stubs/pyarrow/_stubs_typing.pyi

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Compression: TypeAlias = Literal[
4545
]
4646
NullEncoding: TypeAlias = Literal["mask", "encode"]
4747
NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"]
48+
TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"]
4849
Mask: TypeAlias = (
4950
Sequence[bool | None]
5051
| NDArray[np.bool_]
@@ -123,7 +124,7 @@ class SupportArrowDeviceArray(Protocol):
123124

124125

125126
class SupportArrowSchema(Protocol):
126-
def __arrow_c_schema(self) -> Any: ...
127+
def __arrow_c_schema__(self) -> Any: ...
127128

128129

129130
class NullableCollection(Protocol[_V]): # type: ignore[reportInvalidTypeVarUse]

python/pyarrow-stubs/pyarrow/_types.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ from typing_extensions import TypeVar, deprecated
4444

4545
from .io import Buffer
4646
from .scalar import ExtensionScalar
47-
47+
from ._stubs_typing import TimeUnit
4848

4949
class _Weakrefable:
5050
...
@@ -192,7 +192,7 @@ class BinaryViewType(_BasicDataType[bytes]):
192192
...
193193

194194

195-
_Unit = TypeVar("_Unit", bound=Literal["s", "ms", "us", "ns"], default=Literal["us"])
195+
_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"])
196196
_Tz = TypeVar("_Tz", str, None, default=None)
197197

198198

python/pyarrow-stubs/pyarrow/compute.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ from pyarrow._compute import HashAggregateKernel as HashAggregateKernel # noqa:
116116
# Udf
117117

118118
from pyarrow._compute import _Order, _Placement
119-
from pyarrow._stubs_typing import ArrayLike, ScalarLike, PyScalar
119+
from pyarrow._stubs_typing import ArrayLike, ScalarLike, PyScalar, TimeUnit
120120
from pyarrow._types import _RunEndType
121121
from . import lib
122122

@@ -1351,7 +1351,7 @@ def strptime(
13511351
strings: StringScalar | StringArray | Expression,
13521352
/,
13531353
format: str,
1354-
unit: Literal["s", "ms", "us", "ns"],
1354+
unit: TimeUnit,
13551355
error_is_null: bool = False,
13561356
*,
13571357
options: StrptimeOptions | None = None,

python/pyarrow/tests/parquet/common.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,9 @@ def alltypes_sample(size=10000, seed=0, categorical=False):
162162
'float32': np.arange(size, dtype=np.float32),
163163
'float64': np.arange(size, dtype=np.float64),
164164
'bool': np.random.randn(size) > 0,
165-
'datetime_ms': np.arange("2016-01-01T00:00:00.001", size,
166-
dtype='datetime64[ms]'),
167-
'datetime_us': np.arange("2016-01-01T00:00:00.000001", size,
168-
dtype='datetime64[us]'),
169-
'datetime_ns': np.arange("2016-01-01T00:00:00.000000001", size,
170-
dtype='datetime64[ns]'),
165+
'datetime_ms': pd.date_range("2016-01-01T00:00:00.001", periods=size, freq='ms').values,
166+
'datetime_us': pd.date_range("2016-01-01T00:00:00.000001", periods=size, freq='us').values,
167+
'datetime_ns': pd.date_range("2016-01-01T00:00:00.000000001", periods=size, freq='ns').values,
171168
'timedelta': np.arange(0, size, dtype="timedelta64[s]"),
172169
'str': pd.Series([str(x) for x in range(size)]),
173170
'empty_str': [''] * size,

python/pyarrow/tests/parquet/test_data_types.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import decimal
1919
import io
2020
import random
21-
from typing import cast
2221

2322
try:
2423
import numpy as np
@@ -391,7 +390,7 @@ def test_parquet_nested_convenience(tempdir):
391390

392391
read = pq.read_table(
393392
path, columns=['a'])
394-
tm.assert_frame_equal(read.to_pandas(), cast(pd.DataFrame, df[['a']]))
393+
tm.assert_frame_equal(read.to_pandas(), df[['a']])
395394

396395
read = pq.read_table(
397396
path, columns=['a', 'b'])

python/pyarrow/tests/parquet/test_dataset.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -937,8 +937,7 @@ def _test_write_to_dataset_with_partitions(base_path,
937937
'group2': list('eefeffgeee'),
938938
'num': list(range(10)),
939939
'nan': [np.nan] * 10,
940-
'date': np.arange('2017-01-01', '2017-01-11', dtype='datetime64[D]').astype(
941-
'datetime64[ns]')
940+
'date': pd.date_range('2017-01-01', periods=10, freq='D').values.astype('datetime64[ns]')
942941
})
943942
cols = output_df.columns.tolist()
944943
partition_by = ['group1', 'group2']
@@ -995,8 +994,7 @@ def _test_write_to_dataset_no_partitions(base_path,
995994
'group1': list('aaabbbbccc'),
996995
'group2': list('eefeffgeee'),
997996
'num': list(range(10)),
998-
'date': np.arange('2017-01-01', '2017-01-11', dtype='datetime64[D]').astype(
999-
'datetime64[ns]')
997+
'date': pd.date_range('2017-01-01', periods=10, freq='D').values.astype('datetime64[ns]')
1000998
})
1001999
cols = output_df.columns.tolist()
10021000
output_table = pa.Table.from_pandas(output_df)

python/pyarrow/tests/parquet/test_pandas.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -572,17 +572,17 @@ def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
572572
)
573573
result = pq.read_table(str(tempdir / "case1")).to_pandas()
574574
tm.assert_frame_equal(
575-
cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
575+
result[["col"]], df[["col"]])
576576

577577
pq.write_to_dataset(table, str(tempdir / "case2"))
578578
result = pq.read_table(str(tempdir / "case2")).to_pandas()
579579
tm.assert_frame_equal(
580-
cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
580+
result[["col"]], df[["col"]])
581581

582582
pq.write_table(table, str(tempdir / "data.parquet"))
583583
result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
584584
tm.assert_frame_equal(
585-
cast(pd.DataFrame, result[["col"]]), cast(pd.DataFrame, df[["col"]]))
585+
result[["col"]], df[["col"]])
586586

587587

588588
@pytest.mark.pandas
@@ -599,7 +599,7 @@ def test_write_to_dataset_pandas_preserve_index(tempdir):
599599
table, str(tempdir / "case1"), partition_cols=['part'],
600600
)
601601
result = pq.read_table(str(tempdir / "case1")).to_pandas()
602-
tm.assert_frame_equal(result, cast(pd.DataFrame, df_cat))
602+
tm.assert_frame_equal(result, df_cat)
603603

604604
pq.write_to_dataset(table, str(tempdir / "case2"))
605605
result = pq.read_table(str(tempdir / "case2")).to_pandas()

python/pyarrow/tests/test_compute.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,7 +2297,7 @@ def test_strptime():
22972297
@pytest.mark.pandas
22982298
@pytest.mark.timezone_data
22992299
def test_strftime():
2300-
times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
2300+
times: list[str | None] = ["2018-03-10 09:00", "2038-01-31 12:23", None]
23012301
timezones = ["CET", "UTC", "Europe/Ljubljana"]
23022302

23032303
formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I",
@@ -2307,7 +2307,7 @@ def test_strftime():
23072307
formats.extend(["%c", "%x", "%X"])
23082308

23092309
for timezone in timezones:
2310-
ts = pd.to_datetime(times).tz_localize(timezone)
2310+
ts = pd.to_datetime(times).tz_localize(timezone) # type: ignore[no-matching-overload]
23112311
for unit in ["s", "ms", "us", "ns"]:
23122312
tsa = pa.array(ts, type=pa.timestamp(unit, timezone))
23132313
for fmt in formats:
@@ -2360,7 +2360,7 @@ def test_strftime():
23602360

23612361
# Test timestamps without timezone
23622362
fmt = "%Y-%m-%dT%H:%M:%S"
2363-
ts = pd.to_datetime(times)
2363+
ts = pd.to_datetime(times) # type: ignore[no-matching-overload]
23642364
tsa = pa.array(ts, type=pa.timestamp("s"))
23652365
result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
23662366
st = ts.strftime(fmt) # type: ignore[call-non-callable]
@@ -3440,7 +3440,7 @@ def test_struct_fields_options():
34403440
pc.struct_field(arr, '.a.foo')
34413441

34423442
with pytest.raises(pa.ArrowInvalid, match="cannot be called without options"):
3443-
pc.struct_field(arr)
3443+
pc.struct_field(arr) # type: ignore[call-arg]
34443444

34453445

34463446
def test_case_when():

python/pyarrow/tests/test_csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1872,7 +1872,7 @@ def use_threads(self):
18721872

18731873

18741874
class BaseTestCompressedCSVRead:
1875-
def write_file(self, path, content):
1875+
def write_file(self, path, contents):
18761876
pass
18771877
csv_filename = ""
18781878

python/pyarrow/tests/test_dataset.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4946,8 +4946,7 @@ def test_write_table_partitioned_dict(tempdir):
49464946
def test_write_dataset_parquet(tempdir):
49474947
table = pa.table([
49484948
pa.array(range(20), type="uint32"),
4949-
pa.array(np.arange("2012-01-01", 20, dtype="datetime64[D]").astype(
4950-
"datetime64[ns]")),
4949+
pa.array(pd.date_range("2012-01-01", periods=20, freq='D').values.astype("datetime64[ns]")),
49514950
pa.array(np.repeat(['a', 'b'], 10))
49524951
], names=["f1", "f2", "part"])
49534952

0 commit comments

Comments
 (0)