Skip to content

Commit 0423ca9

Browse files
committed
fix: address review findings in Arrow path
- read_result_arrow(): drain response into BytesIO before releasing the connection, eliminating the fragile try/finally ordering - Remove unused _FULL_TYPES_MAPPER dead code from arrow_convertor.py - Remove stale docstring claiming primary_labels are always empty dicts (compute_primary_labels() now populates them) - Add for_arrow_table() to DataFrameFactory class docstring - convert_arrow_table_to_dataframe stub in __init__.py now raises ImportError with a helpful pyarrow install hint instead of being silently absent
1 parent 13baee7 commit 0423ca9

File tree

4 files changed

+12
-23
lines changed

4 files changed

+12
-23
lines changed

packages/gooddata-pandas/src/gooddata_pandas/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
try:
77
from gooddata_pandas.arrow_convertor import convert_arrow_table_to_dataframe
88
except ImportError:
9-
pass
9+
10+
def convert_arrow_table_to_dataframe(*args, **kwargs): # type: ignore[misc]
11+
raise ImportError("pyarrow is required for Arrow support. Install it with: pip install gooddata-pandas[arrow]")
12+
13+
1014
from gooddata_pandas.dataframe import DataFrameFactory
1115
from gooddata_pandas.good_pandas import GoodPandas
1216
from gooddata_pandas.result_convertor import LabelOverrides

packages/gooddata-pandas/src/gooddata_pandas/arrow_convertor.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,6 @@
2323
pa.large_string(): pandas.StringDtype("pyarrow"),
2424
}
2525

26-
# Full nullable-type mapper: also maps integer and boolean Arrow types to their
27-
# pandas nullable equivalents. NOT the default — nullable integer dtypes are
28-
# not backward compatible with the JSON path which produces float64 for all
29-
# numeric columns (with NaN for nulls).
30-
_FULL_TYPES_MAPPER: dict = {
31-
**_ARROW_STRINGS_MAPPER,
32-
pa.int8(): pandas.Int8Dtype(),
33-
pa.int16(): pandas.Int16Dtype(),
34-
pa.int32(): pandas.Int32Dtype(),
35-
pa.int64(): pandas.Int64Dtype(),
36-
pa.uint8(): pandas.UInt8Dtype(),
37-
pa.uint16(): pandas.UInt16Dtype(),
38-
pa.uint32(): pandas.UInt32Dtype(),
39-
pa.uint64(): pandas.UInt64Dtype(),
40-
pa.bool_(): pandas.BooleanDtype(),
41-
}
42-
4326

4427
def _label_ref_to_id_map(xtab_meta: dict) -> dict[str, str]:
4528
"""Map 'l0', 'l1', ... to actual label local IDs."""

packages/gooddata-pandas/src/gooddata_pandas/dataframe.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class DataFrameFactory:
7070
- for_exec_def_arrow(self, exec_def: ExecutionDefinition,
7171
on_execution_submitted: Optional[Callable[[Execution], None]] = None)
7272
-> Tuple[pandas.DataFrame, DataFrameMetadata]:
73+
- for_arrow_table(self, table: pa.Table,
74+
execution_response: Optional[BareExecutionResponse] = None)
75+
-> Tuple[pandas.DataFrame, DataFrameMetadata]:
7376
- for_exec_result_id(self, result_id: str, label_overrides: Optional[LabelOverrides] = None,
7477
result_cache_metadata: Optional[ResultCacheMetadata] = None,
7578
result_size_dimensions_limits: ResultSizeDimensions = (),
@@ -412,10 +415,6 @@ def for_exec_def_arrow(
412415
Returns the same ``(DataFrame, DataFrameMetadata)`` tuple as :meth:`for_exec_def` so that
413416
callers can switch between the two paths without changing their code.
414417
415-
``DataFrameMetadata.primary_labels_from_index`` and
416-
``DataFrameMetadata.primary_labels_from_columns`` are always empty dicts because the
417-
Arrow path does not support the ``use_primary_labels_in_attributes`` feature.
418-
419418
Requires pyarrow to be installed (pip install gooddata-pandas[arrow]).
420419
421420
Args:

packages/gooddata-sdk/src/gooddata_sdk/compute/model/execution.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,16 +394,19 @@ def read_result_arrow(self) -> pyarrow.Table:
394394
raise ImportError(
395395
"pyarrow is required to use read_result_arrow(). Install it with: pip install gooddata-sdk[arrow]"
396396
)
397+
import io
398+
397399
response = self._actions_api.retrieve_result_binary(
398400
workspace_id=self._workspace_id,
399401
result_id=self.result_id,
400402
_preload_content=False,
401403
**({"x_gdc_cancel_token": self.cancel_token} if self.cancel_token else {}),
402404
)
403405
try:
404-
return _ipc.open_stream(response).read_all()
406+
buf = io.BytesIO(response.read())
405407
finally:
406408
response.release_conn()
409+
return _ipc.open_stream(buf).read_all()
407410

408411
def cancel(self) -> None:
409412
"""

0 commit comments

Comments
 (0)