Skip to content

Commit 0f482ec

Browse files
committed
fix: address review findings in Arrow path
- read_result_arrow(): drain response into BytesIO before releasing the connection, eliminating the fragile try/finally ordering - Remove unused _FULL_TYPES_MAPPER dead code from arrow_convertor.py - Remove stale docstring claiming primary_labels are always empty dicts (compute_primary_labels() now populates them) - Add for_arrow_table() to DataFrameFactory class docstring - convert_arrow_table_to_dataframe stub in __init__.py now raises ImportError with a helpful pyarrow install hint instead of being silently absent
1 parent a77ddb9 commit 0f482ec

File tree

4 files changed

+12
-23
lines changed

4 files changed

+12
-23
lines changed

gooddata-pandas/gooddata_pandas/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
try:
77
from gooddata_pandas.arrow_convertor import convert_arrow_table_to_dataframe
88
except ImportError:
9-
pass
9+
10+
def convert_arrow_table_to_dataframe(*args, **kwargs): # type: ignore[misc]
11+
raise ImportError("pyarrow is required for Arrow support. Install it with: pip install gooddata-pandas[arrow]")
12+
13+
1014
from gooddata_pandas.dataframe import DataFrameFactory
1115
from gooddata_pandas.good_pandas import GoodPandas
1216
from gooddata_pandas.result_convertor import LabelOverrides

gooddata-pandas/gooddata_pandas/arrow_convertor.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,6 @@
2323
pa.large_string(): pandas.StringDtype("pyarrow"),
2424
}
2525

26-
# Full nullable-type mapper: also maps integer and boolean Arrow types to their
27-
# pandas nullable equivalents. NOT the default — nullable integer dtypes are
28-
# not backward compatible with the JSON path which produces float64 for all
29-
# numeric columns (with NaN for nulls).
30-
_FULL_TYPES_MAPPER: dict = {
31-
**_ARROW_STRINGS_MAPPER,
32-
pa.int8(): pandas.Int8Dtype(),
33-
pa.int16(): pandas.Int16Dtype(),
34-
pa.int32(): pandas.Int32Dtype(),
35-
pa.int64(): pandas.Int64Dtype(),
36-
pa.uint8(): pandas.UInt8Dtype(),
37-
pa.uint16(): pandas.UInt16Dtype(),
38-
pa.uint32(): pandas.UInt32Dtype(),
39-
pa.uint64(): pandas.UInt64Dtype(),
40-
pa.bool_(): pandas.BooleanDtype(),
41-
}
42-
4326

4427
def _label_ref_to_id_map(xtab_meta: dict) -> dict[str, str]:
4528
"""Map 'l0', 'l1', ... to actual label local IDs."""

gooddata-pandas/gooddata_pandas/dataframe.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class DataFrameFactory:
7070
- for_exec_def_arrow(self, exec_def: ExecutionDefinition,
7171
on_execution_submitted: Optional[Callable[[Execution], None]] = None)
7272
-> Tuple[pandas.DataFrame, DataFrameMetadata]:
73+
- for_arrow_table(self, table: pa.Table,
74+
execution_response: Optional[BareExecutionResponse] = None)
75+
-> Tuple[pandas.DataFrame, DataFrameMetadata]:
7376
- for_exec_result_id(self, result_id: str, label_overrides: Optional[LabelOverrides] = None,
7477
result_cache_metadata: Optional[ResultCacheMetadata] = None,
7578
result_size_dimensions_limits: ResultSizeDimensions = (),
@@ -381,10 +384,6 @@ def for_exec_def_arrow(
381384
Returns the same ``(DataFrame, DataFrameMetadata)`` tuple as :meth:`for_exec_def` so that
382385
callers can switch between the two paths without changing their code.
383386
384-
``DataFrameMetadata.primary_labels_from_index`` and
385-
``DataFrameMetadata.primary_labels_from_columns`` are always empty dicts because the
386-
Arrow path does not support the ``use_primary_labels_in_attributes`` feature.
387-
388387
Requires pyarrow to be installed (pip install gooddata-pandas[arrow]).
389388
390389
Args:

gooddata-sdk/gooddata_sdk/compute/model/execution.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,16 +387,19 @@ def read_result_arrow(self) -> pyarrow.Table:
387387
raise ImportError(
388388
"pyarrow is required to use read_result_arrow(). Install it with: pip install gooddata-sdk[arrow]"
389389
)
390+
import io
391+
390392
response = self._actions_api.retrieve_result_binary(
391393
workspace_id=self._workspace_id,
392394
result_id=self.result_id,
393395
_preload_content=False,
394396
**({"x_gdc_cancel_token": self.cancel_token} if self.cancel_token else {}),
395397
)
396398
try:
397-
return _ipc.open_stream(response).read_all()
399+
buf = io.BytesIO(response.read())
398400
finally:
399401
response.release_conn()
402+
return _ipc.open_stream(buf).read_all()
400403

401404
def cancel(self) -> None:
402405
"""

0 commit comments

Comments
 (0)