Skip to content

Commit afc60d1

Browse files
committed
feat(tests): add HETA 1.2.0 parquet size checks and GeoJSON parity validation
1 parent 1a3e050 commit afc60d1

3 files changed

Lines changed: 87 additions & 62 deletions

File tree

tests/aignostics/application/cli_test.py

Lines changed: 35 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,29 @@
33
import contextlib
44
import json
55
import platform
6+
import random
67
import re
78
from collections.abc import Generator
89
from datetime import UTC, datetime, timedelta
910
from pathlib import Path
1011
from time import sleep
1112
from unittest.mock import MagicMock, patch
1213

14+
import pandas as pd
1315
import pytest
16+
from aignx.codegen.exceptions import ForbiddenException
17+
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
18+
from aignx.codegen.models import (
19+
ItemOutput,
20+
ItemResultReadResponse,
21+
ItemState,
22+
ItemTerminationReason,
23+
RunItemStatistics,
24+
RunOutput,
25+
RunReadResponse,
26+
RunState,
27+
RunTerminationReason,
28+
)
1429
from loguru import logger
1530
from tenacity import Retrying, retry, stop_after_attempt, wait_exponential
1631
from typer.testing import CliRunner
@@ -847,8 +862,6 @@ def test_cli_run_list_for_organization(runner: CliRunner) -> None:
847862
@pytest.mark.unit
848863
def test_cli_run_list_forbidden_with_organization(runner: CliRunner) -> None:
849864
"""Check ForbiddenException with --for-organization shows org-specific access denied message."""
850-
from aignx.codegen.exceptions import ForbiddenException
851-
852865
with patch.object(
853866
ApplicationService, "application_runs", side_effect=ForbiddenException(status=403, reason="Forbidden")
854867
):
@@ -862,8 +875,6 @@ def test_cli_run_list_forbidden_with_organization(runner: CliRunner) -> None:
862875
@pytest.mark.unit
863876
def test_cli_run_list_forbidden_without_organization(runner: CliRunner) -> None:
864877
"""Check ForbiddenException without --for-organization shows generic access denied message."""
865-
from aignx.codegen.exceptions import ForbiddenException
866-
867878
with patch.object(
868879
ApplicationService, "application_runs", side_effect=ForbiddenException(status=403, reason="Forbidden")
869880
):
@@ -897,18 +908,6 @@ def test_cli_run_describe_not_found(runner: CliRunner, record_property) -> None:
897908
@pytest.mark.integration
898909
def test_cli_run_describe_json_includes_items(runner: CliRunner) -> None:
899910
"""Check run describe --format=json includes items in output."""
900-
from aignx.codegen.models import (
901-
ItemOutput,
902-
ItemResultReadResponse,
903-
ItemState,
904-
ItemTerminationReason,
905-
RunItemStatistics,
906-
RunOutput,
907-
RunReadResponse,
908-
RunState,
909-
RunTerminationReason,
910-
)
911-
912911
mock_run_data = RunReadResponse(
913912
run_id="test-run-id-123",
914913
application_id="test-app",
@@ -1111,8 +1110,8 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
11111110
results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "")
11121111
assert results_dir.is_dir(), f"Expected directory {results_dir} not found"
11131112
files_in_dir = list(results_dir.glob("*"))
1114-
assert len(files_in_dir) == 9, (
1115-
f"Expected 9 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}"
1113+
assert len(files_in_dir) == 12, (
1114+
f"Expected 12 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}"
11161115
)
11171116
print(f"Found files in {results_dir}:")
11181117
for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES:
@@ -1133,6 +1132,23 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
11331132
f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})"
11341133
)
11351134

1135+
# Validate parquet <-> GeoJSON row count parity for the 3 paired outputs
1136+
parquet_geojson_pairs = [
1137+
("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"),
1138+
("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"),
1139+
("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"),
1140+
]
1141+
for parquet_filename, geojson_filename in parquet_geojson_pairs:
1142+
parquet_path = results_dir / parquet_filename
1143+
geojson_path = results_dir / geojson_filename
1144+
parquet_row_count = len(pd.read_parquet(parquet_path))
1145+
with geojson_path.open() as f:
1146+
geojson_feature_count = len(json.load(f)["features"])
1147+
assert parquet_row_count == geojson_feature_count, (
1148+
f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) "
1149+
f"and {geojson_filename} ({geojson_feature_count} features)"
1150+
)
1151+
11361152
# Validate the execute command exited successfully
11371153
assert result.exit_code == 0
11381154

@@ -1222,9 +1238,6 @@ def test_cli_run_update_item_metadata_not_dict(runner: CliRunner) -> None:
12221238
@pytest.mark.sequential
12231239
def test_cli_run_dump_and_update_custom_metadata(runner: CliRunner, tmp_path: Path) -> None:
12241240
"""Test dumping and updating custom metadata via CLI commands."""
1225-
import json
1226-
import random
1227-
12281241
unique_tag = f"test_metadata_{datetime.now(tz=UTC).timestamp()}"
12291242
with submitted_run(runner, tmp_path, CSV_CONTENT_SPOT0, extra_args=["--tags", unique_tag, "--force"]) as run_id:
12301243
# Step 1: Dump initial custom metadata of run
@@ -1313,11 +1326,8 @@ def test_cli_run_dump_and_update_custom_metadata(runner: CliRunner, tmp_path: Pa
13131326
@pytest.mark.e2e
13141327
@pytest.mark.timeout(timeout=240)
13151328
@pytest.mark.sequential
1316-
def test_cli_run_dump_and_update_item_custom_metadata(runner: CliRunner, tmp_path: Path) -> None: # noqa: PLR0915
1329+
def test_cli_run_dump_and_update_item_custom_metadata(runner: CliRunner, tmp_path: Path) -> None:
13171330
"""Test dumping and updating item custom metadata via CLI commands."""
1318-
import json
1319-
import random
1320-
13211331
unique_tag = f"test_item_metadata_{datetime.now(tz=UTC).timestamp()}"
13221332
# CSV_CONTENT_SPOT0 uses SPOT_0_FILENAME as external_id, which the describe output surfaces
13231333
# as "Item External ID: `...`" — the get_external_id() helper below captures it dynamically.
@@ -1773,8 +1783,6 @@ def test_cli_application_version_document_describe_success(runner: CliRunner, re
17731783
def test_cli_application_version_document_describe_not_found(runner: CliRunner, record_property) -> None:
17741784
"""`application version document describe` exits 2 with a clear message on 404."""
17751785
record_property("tested-item-id", "TC-APPLICATION-CLI-05-03")
1776-
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
1777-
17781786
fake_documents = MagicMock()
17791787
fake_documents.details.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
17801788
fake_client = MagicMock()
@@ -1870,8 +1878,6 @@ def test_cli_application_version_document_list_json_empty(runner: CliRunner, rec
18701878
def test_cli_application_version_document_list_resolve_not_found_text(runner: CliRunner, record_property) -> None:
18711879
"""`application version document list` exits 2 when the application version cannot be resolved."""
18721880
record_property("tested-item-id", "TC-APPLICATION-CLI-05-01")
1873-
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
1874-
18751881
fake_client = MagicMock()
18761882
fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
18771883

@@ -1888,8 +1894,6 @@ def test_cli_application_version_document_list_resolve_not_found_text(runner: Cl
18881894
def test_cli_application_version_document_list_resolve_not_found_json(runner: CliRunner, record_property) -> None:
18891895
"""`application version document list --format json` emits structured error on 404."""
18901896
record_property("tested-item-id", "TC-APPLICATION-CLI-05-01")
1891-
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
1892-
18931897
fake_client = MagicMock()
18941898
fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
18951899

@@ -1976,8 +1980,6 @@ def test_cli_application_version_document_describe_json_success(runner: CliRunne
19761980
def test_cli_application_version_document_describe_resolve_not_found_text(runner: CliRunner, record_property) -> None:
19771981
"""`describe` exits 2 when the application version cannot be resolved (text format)."""
19781982
record_property("tested-item-id", "TC-APPLICATION-CLI-05-03")
1979-
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
1980-
19811983
fake_client = MagicMock()
19821984
fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
19831985

@@ -1996,8 +1998,6 @@ def test_cli_application_version_document_describe_resolve_not_found_text(runner
19961998
def test_cli_application_version_document_describe_resolve_not_found_json(runner: CliRunner, record_property) -> None:
19971999
"""`describe --format json` emits structured error when version cannot be resolved."""
19982000
record_property("tested-item-id", "TC-APPLICATION-CLI-05-03")
1999-
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
2000-
20012001
fake_client = MagicMock()
20022002
fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
20032003

@@ -2026,8 +2026,6 @@ def test_cli_application_version_document_describe_resolve_not_found_json(runner
20262026
def test_cli_application_version_document_describe_not_found_json(runner: CliRunner, record_property) -> None:
20272027
"""`describe --format json` emits structured error when the document is missing."""
20282028
record_property("tested-item-id", "TC-APPLICATION-CLI-05-03")
2029-
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
2030-
20312029
fake_documents = MagicMock()
20322030
fake_documents.details.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
20332031
fake_client = MagicMock()
@@ -2111,8 +2109,6 @@ def test_cli_application_version_document_download_resolve_not_found(
21112109
) -> None:
21122110
"""`download` exits 2 when the application version cannot be resolved."""
21132111
record_property("tested-item-id", "TC-APPLICATION-CLI-05-04")
2114-
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
2115-
21162112
fake_client = MagicMock()
21172113
fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
21182114

@@ -2142,8 +2138,6 @@ def test_cli_application_version_document_download_not_found(
21422138
) -> None:
21432139
"""`download` exits 2 with a clear message when the document does not exist."""
21442140
record_property("tested-item-id", "TC-APPLICATION-CLI-05-04")
2145-
from aignx.codegen.exceptions import NotFoundException as ApiNotFound
2146-
21472141
fake_documents = MagicMock()
21482142
fake_documents.download_to_path.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
21492143
fake_client = MagicMock()

tests/aignostics/application/gui_test.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests to verify the GUI functionality of the application module."""
22

33
import contextlib
4+
import json
45
import re
56
import tempfile
67
from asyncio import sleep, to_thread
@@ -9,6 +10,7 @@
910
from typing import TYPE_CHECKING
1011
from unittest.mock import AsyncMock, MagicMock, Mock, patch
1112

13+
import pandas as pd
1214
import pytest
1315
from nicegui.testing import User
1416
from typer.testing import CliRunner
@@ -354,7 +356,7 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back(
354356
@pytest.mark.flaky(retries=1, delay=5)
355357
@pytest.mark.timeout(timeout=60 * 10)
356358
@pytest.mark.sequential # Helps on Linux with image analysis step otherwise timing out
357-
async def test_gui_run_download( # noqa: PLR0915
359+
async def test_gui_run_download( # noqa: PLR0914, PLR0915
358360
user: User, runner: CliRunner, tmp_path: Path, silent_logging: None, record_property
359361
) -> None:
360362
"""Test that the user can download a run result via the GUI."""
@@ -440,8 +442,8 @@ async def test_gui_run_download( # noqa: PLR0915
440442

441443
# Check for files in the results directory
442444
files_in_results_dir = list(results_dir.glob("*"))
443-
assert len(files_in_results_dir) == 9, (
444-
f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: "
445+
assert len(files_in_results_dir) == 12, (
446+
f"Expected 12 files in {results_dir}, but found {len(files_in_results_dir)}: "
445447
f"{[f.name for f in files_in_results_dir]}"
446448
)
447449

@@ -464,6 +466,23 @@ async def test_gui_run_download( # noqa: PLR0915
464466
f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})"
465467
)
466468

469+
# Validate parquet <-> GeoJSON row count parity for the 3 paired outputs
470+
parquet_geojson_pairs = [
471+
("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"),
472+
("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"),
473+
("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"),
474+
]
475+
for parquet_filename, geojson_filename in parquet_geojson_pairs:
476+
parquet_path = results_dir / parquet_filename
477+
geojson_path = results_dir / geojson_filename
478+
parquet_row_count = len(pd.read_parquet(parquet_path))
479+
with geojson_path.open() as f:
480+
geojson_feature_count = len(json.load(f)["features"])
481+
assert parquet_row_count == geojson_feature_count, (
482+
f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) "
483+
f"and {geojson_filename} ({geojson_feature_count} features)"
484+
)
485+
467486

468487
@pytest.mark.integration
469488
@pytest.mark.sequential

tests/constants_test.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,18 @@
8383
# SPOT_0: uv run pytest tests/aignostics/application/gui_test.py::test_gui_run_download -s --no-cov
8484
# SPOT_1: uv run pytest tests/aignostics/application/cli_test.py::test_cli_run_execute -s --no-cov
8585
SPOT_0_EXPECTED_RESULT_FILES = [
86-
("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
87-
("tissue_qc_geojson_polygons.json", 259955, 10),
88-
("tissue_segmentation_geojson_polygons.json", 887003, 10),
89-
("readout_generation_slide_readouts.csv", 303217, 10),
90-
("readout_generation_cell_readouts.csv", 1658344, 10),
91-
("cell_classification_geojson_polygons.json", 11218951, 10),
92-
("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10),
93-
("tissue_segmentation_csv_class_information.csv", 452, 10),
94-
("tissue_qc_csv_class_information.csv", 285, 10),
86+
("tissue_qc_segmentation_map_image.tiff", 470150, 10),
87+
("tissue_qc_geojson_polygons.json", 171251, 10),
88+
("tissue_segmentation_geojson_polygons.json", 185516, 10),
89+
("readout_generation_slide_readouts.csv", 300205, 10),
90+
("readout_generation_cell_readouts.csv", 2417117, 10),
91+
("cell_classification_geojson_polygons.json", 16673412, 10),
92+
("tissue_segmentation_segmentation_map_image.tiff", 527264, 10),
93+
("tissue_segmentation_csv_class_information.csv", 443, 10),
94+
("tissue_qc_csv_class_information.csv", 286, 10),
95+
("tissue_qc_parquet_polygons.parquet", 34346, 10),
96+
("tissue_segmentation_parquet_polygons.parquet", 39185, 10),
97+
("cell_classification_parquet_polygons.parquet", 5476364, 10),
9598
]
9699
SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10)
97100

@@ -105,6 +108,9 @@
105108
("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10),
106109
("tissue_segmentation_csv_class_information.csv", 446, 10),
107110
("tissue_qc_csv_class_information.csv", 290, 10),
111+
("tissue_qc_parquet_polygons.parquet", 29049, 10),
112+
("tissue_segmentation_parquet_polygons.parquet", 56682, 10),
113+
("cell_classification_parquet_polygons.parquet", 838533, 10),
108114
]
109115

110116
case "staging":
@@ -124,15 +130,18 @@
124130

125131
# See production block above for instructions on how to update these sizes.
126132
SPOT_0_EXPECTED_RESULT_FILES = [
127-
("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
128-
("tissue_qc_geojson_polygons.json", 259955, 10),
129-
("tissue_segmentation_geojson_polygons.json", 887003, 10),
130-
("readout_generation_slide_readouts.csv", 303217, 10),
131-
("readout_generation_cell_readouts.csv", 1658344, 10),
132-
("cell_classification_geojson_polygons.json", 11218951, 10),
133-
("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10),
134-
("tissue_segmentation_csv_class_information.csv", 452, 10),
135-
("tissue_qc_csv_class_information.csv", 285, 10),
133+
("tissue_qc_segmentation_map_image.tiff", 470150, 10),
134+
("tissue_qc_geojson_polygons.json", 171251, 10),
135+
("tissue_segmentation_geojson_polygons.json", 185516, 10),
136+
("readout_generation_slide_readouts.csv", 300205, 10),
137+
("readout_generation_cell_readouts.csv", 2417117, 10),
138+
("cell_classification_geojson_polygons.json", 16673412, 10),
139+
("tissue_segmentation_segmentation_map_image.tiff", 527264, 10),
140+
("tissue_segmentation_csv_class_information.csv", 443, 10),
141+
("tissue_qc_csv_class_information.csv", 286, 10),
142+
("tissue_qc_parquet_polygons.parquet", 34346, 10),
143+
("tissue_segmentation_parquet_polygons.parquet", 39185, 10),
144+
("cell_classification_parquet_polygons.parquet", 5476364, 10),
136145
]
137146
SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10)
138147

@@ -146,6 +155,9 @@
146155
("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10),
147156
("tissue_segmentation_csv_class_information.csv", 446, 10),
148157
("tissue_qc_csv_class_information.csv", 290, 10),
158+
("tissue_qc_parquet_polygons.parquet", 29049, 10),
159+
("tissue_segmentation_parquet_polygons.parquet", 56682, 10),
160+
("cell_classification_parquet_polygons.parquet", 838533, 10),
149161
]
150162

151163
case _:

0 commit comments

Comments
 (0)