Skip to content

Commit 835f57e

Browse files
committed
feat(tests): add parquet size checks and GeoJSON parity validation for HETA 1.2.0
1 parent 1a3e050 commit 835f57e

2 files changed

Lines changed: 27 additions & 2 deletions

File tree

tests/aignostics/application/cli_test.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,8 +1111,8 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
11111111
results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "")
11121112
assert results_dir.is_dir(), f"Expected directory {results_dir} not found"
11131113
files_in_dir = list(results_dir.glob("*"))
1114-
assert len(files_in_dir) == 9, (
1115-
f"Expected 9 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}"
1114+
assert len(files_in_dir) == 12, (
1115+
f"Expected 12 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}"
11161116
)
11171117
print(f"Found files in {results_dir}:")
11181118
for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES:
@@ -1133,6 +1133,25 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
11331133
f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})"
11341134
)
11351135

1136+
# Validate parquet <-> GeoJSON row count parity for the 3 paired outputs
1137+
import pandas as pd
1138+
1139+
parquet_geojson_pairs = [
1140+
("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"),
1141+
("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"),
1142+
("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"),
1143+
]
1144+
for parquet_filename, geojson_filename in parquet_geojson_pairs:
1145+
parquet_path = results_dir / parquet_filename
1146+
geojson_path = results_dir / geojson_filename
1147+
parquet_row_count = len(pd.read_parquet(parquet_path))
1148+
with geojson_path.open() as f:
1149+
geojson_feature_count = len(json.load(f)["features"])
1150+
assert parquet_row_count == geojson_feature_count, (
1151+
f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) "
1152+
f"and {geojson_filename} ({geojson_feature_count} features)"
1153+
)
1154+
11361155
# Validate the execute command exited successfully
11371156
assert result.exit_code == 0
11381157

tests/constants_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@
105105
("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10),
106106
("tissue_segmentation_csv_class_information.csv", 446, 10),
107107
("tissue_qc_csv_class_information.csv", 290, 10),
108+
("tissue_qc_parquet_polygons.parquet", 29049, 10),
109+
("tissue_segmentation_parquet_polygons.parquet", 56682, 10),
110+
("cell_classification_parquet_polygons.parquet", 838533, 10),
108111
]
109112

110113
case "staging":
@@ -146,6 +149,9 @@
146149
("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10),
147150
("tissue_segmentation_csv_class_information.csv", 446, 10),
148151
("tissue_qc_csv_class_information.csv", 290, 10),
152+
("tissue_qc_parquet_polygons.parquet", 29049, 10),
153+
("tissue_segmentation_parquet_polygons.parquet", 56682, 10),
154+
("cell_classification_parquet_polygons.parquet", 838533, 10),
149155
]
150156

151157
case _:

0 commit comments

Comments
 (0)