From fba6ec8f12e911aa559946d5fe0a7662803c8939 Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 28 Apr 2026 09:33:27 +0200 Subject: [PATCH 1/9] chore(tests): bump staging app versions and drop special-app constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test-app: 0.0.6 → 1.0.0 (new version uses same he-tme input schema) - he-tme: 1.1.0 → 1.1.1 on staging - Remove SPECIAL_APPLICATION_ID/VERSION from staging (no longer needed) Co-Authored-By: Claude Sonnet 4.6 (1M context) --- tests/constants_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/constants_test.py b/tests/constants_test.py index f9385b290..0d98bec0b 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -94,7 +94,7 @@ case "staging": TEST_APPLICATION_ID = "test-app" - TEST_APPLICATION_VERSION = "0.0.6" + TEST_APPLICATION_VERSION = "1.0.0" HETA_APPLICATION_ID = "he-tme" HETA_APPLICATION_VERSION = "1.1.1" @@ -108,9 +108,6 @@ PIPELINE_CPU_PROVISIONING_MODE = "SPOT" PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30 - SPECIAL_APPLICATION_ID = "test-app" - SPECIAL_APPLICATION_VERSION = "0.99.0" - SPOT_0_EXPECTED_RESULT_FILES = [ ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), ("tissue_qc_geojson_polygons.json", 259955, 10), From 0e4630b2c9a6d68a87a210c366ccd4aeb2158e8d Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 28 Apr 2026 15:54:06 +0200 Subject: [PATCH 2/9] chore(tests): fix staging SPECIAL_APPLICATION constants and drop normalization artifact - Re-add SPECIAL_APPLICATION_ID/VERSION to staging pointing to test-app 1.0.0 so e2e_test.py imports resolve on staging - Remove normalization:wsi input artifact from _get_spots_payload_for_special; test-app 1.0.0 only requires whole_slide_image, matching the he-tme schema Co-Authored-By: Claude Sonnet 4.6 (1M context) --- tests/aignostics/platform/e2e_test.py | 12 ------------ tests/constants_test.py | 3 +++ 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py index 634e85da0..3696fc675 100644 --- a/tests/aignostics/platform/e2e_test.py +++ b/tests/aignostics/platform/e2e_test.py @@ -224,13 +224,6 @@ def _get_spots_payload_for_special(expires_seconds: int, count: int) -> list[pla "disease": "LUNG_CANCER", }, } - normalization_metadata = { - "checksum_base64_crc32c": SPOT_1_CRC32C, - "width_px": SPOT_1_WIDTH, - "height_px": SPOT_1_HEIGHT, - "resolution_mpp": SPOT_1_RESOLUTION_MPP, - "media_type": "image/tiff", - } return [ platform.InputItem( external_id=f"{SPOT_1_GS_URL}&spot_index={index}", @@ -240,11 +233,6 @@ def _get_spots_payload_for_special(expires_seconds: int, count: int) -> list[pla download_url=signed_url, metadata=wsi_metadata, ), - platform.InputArtifact( - name="normalization:wsi", - download_url=signed_url, - metadata=normalization_metadata, - ), ], ) for index in range(count) diff --git a/tests/constants_test.py b/tests/constants_test.py index 0d98bec0b..78dcf9dfd 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -108,6 +108,9 @@ PIPELINE_CPU_PROVISIONING_MODE = "SPOT" PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30 + SPECIAL_APPLICATION_ID = "test-app" + SPECIAL_APPLICATION_VERSION = "1.0.0" + SPOT_0_EXPECTED_RESULT_FILES = [ ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), ("tissue_qc_geojson_polygons.json", 259955, 10), From 27ca00e39e9cba4b319c944c74c42256f02ef5c1 Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 28 Apr 2026 16:03:49 +0200 Subject: [PATCH 3/9] chore(tests): skip special-app tests on staging; guard import - Remove SPECIAL_APPLICATION_ID/VERSION from staging constants entirely - Guard the import in e2e_test.py with try/except so staging doesn't NameError - Add skipif(SPECIAL_APPLICATION_ID is None) to both special-app tests so they are silently skipped on staging but still run on production (0.99.0) Co-Authored-By: Claude Sonnet 4.6 (1M context) --- tests/aignostics/platform/e2e_test.py | 10 ++++++++-- tests/constants_test.py | 3 --- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py index 3696fc675..c620d7a2d 100644 --- a/tests/aignostics/platform/e2e_test.py +++ b/tests/aignostics/platform/e2e_test.py @@ -36,8 +36,6 @@ PIPELINE_GPU_TYPE, PIPELINE_MAX_GPUS_PER_SLIDE, PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES, - SPECIAL_APPLICATION_ID, - SPECIAL_APPLICATION_VERSION, SPOT_0_CRC32C, SPOT_0_GS_URL, SPOT_0_HEIGHT, @@ -62,6 +60,12 @@ TEST_APPLICATION_VERSION, ) +try: + from tests.constants_test import SPECIAL_APPLICATION_ID, SPECIAL_APPLICATION_VERSION +except ImportError: + SPECIAL_APPLICATION_ID = None # type: ignore[assignment] + SPECIAL_APPLICATION_VERSION = None # type: ignore[assignment] + TEST_APPLICATION_SUBMIT_AND_WAIT_DEADLINE_SECONDS = 60 * 45 # 45 minutes TEST_APPLICATION_SUBMIT_AND_WAIT_DUE_DATE_SECONDS = 60 * 10 # 10 minutes TEST_APPLICATION_SUBMIT_AND_WAIT_TIMEOUT_SECONDS = ( @@ -610,6 +614,7 @@ def test_platform_heta_app_submit() -> None: @pytest.mark.e2e @pytest.mark.stress_only @pytest.mark.long_running +@pytest.mark.skipif(SPECIAL_APPLICATION_ID is None, reason="Special application not configured for this environment") @pytest.mark.timeout(timeout=SPECIAL_APPLICATION_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS) def test_platform_special_app_submit() -> None: """Test application runs with the special application. @@ -678,6 +683,7 @@ def test_platform_special_app_submit() -> None: @pytest.mark.stress_only @pytest.mark.long_running @pytest.mark.scheduled_only +@pytest.mark.skipif(SPECIAL_APPLICATION_ID is None, reason="Special application not configured for this environment") @pytest.mark.timeout(timeout=SPECIAL_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS) def test_platform_special_app_find_and_validate() -> None: """Test application runs with the special application. diff --git a/tests/constants_test.py b/tests/constants_test.py index 78dcf9dfd..0d98bec0b 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -108,9 +108,6 @@ PIPELINE_CPU_PROVISIONING_MODE = "SPOT" PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30 - SPECIAL_APPLICATION_ID = "test-app" - SPECIAL_APPLICATION_VERSION = "1.0.0" - SPOT_0_EXPECTED_RESULT_FILES = [ ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), ("tissue_qc_geojson_polygons.json", 259955, 10), From c06f7732ff7ecc5e3cb7b8ff80621100b61773aa Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 28 Apr 2026 16:24:20 +0200 Subject: [PATCH 4/9] chore(tests): use None sentinel for SPECIAL_APPLICATION on staging Simpler than a try/except guard: staging defines SPECIAL_APPLICATION_ID and SPECIAL_APPLICATION_VERSION as None, the regular import works, and the existing skipif(SPECIAL_APPLICATION_ID is None) handles the rest. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- tests/aignostics/platform/e2e_test.py | 8 ++------ tests/constants_test.py | 3 +++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py index c620d7a2d..26fea08ae 100644 --- a/tests/aignostics/platform/e2e_test.py +++ b/tests/aignostics/platform/e2e_test.py @@ -36,6 +36,8 @@ PIPELINE_GPU_TYPE, PIPELINE_MAX_GPUS_PER_SLIDE, PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES, + SPECIAL_APPLICATION_ID, + SPECIAL_APPLICATION_VERSION, SPOT_0_CRC32C, SPOT_0_GS_URL, SPOT_0_HEIGHT, @@ -60,12 +62,6 @@ TEST_APPLICATION_VERSION, ) -try: - from tests.constants_test import SPECIAL_APPLICATION_ID, SPECIAL_APPLICATION_VERSION -except ImportError: - SPECIAL_APPLICATION_ID = None # type: ignore[assignment] - SPECIAL_APPLICATION_VERSION = None # type: ignore[assignment] - TEST_APPLICATION_SUBMIT_AND_WAIT_DEADLINE_SECONDS = 60 * 45 # 45 minutes TEST_APPLICATION_SUBMIT_AND_WAIT_DUE_DATE_SECONDS = 60 * 10 # 10 minutes TEST_APPLICATION_SUBMIT_AND_WAIT_TIMEOUT_SECONDS = ( diff --git a/tests/constants_test.py b/tests/constants_test.py index 0d98bec0b..946cceba1 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -108,6 +108,9 @@ PIPELINE_CPU_PROVISIONING_MODE = "SPOT" PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30 + SPECIAL_APPLICATION_ID = None + SPECIAL_APPLICATION_VERSION = None + SPOT_0_EXPECTED_RESULT_FILES = [ ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), ("tissue_qc_geojson_polygons.json", 259955, 10), From d1397d856789a35d79ad2316aba05a4bcb119b24 Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 12 May 2026 01:15:41 +0200 Subject: [PATCH 5/9] chore(tests): update SPOT_1 slide, add SPOT_4, bump app versions to he-tme 1.2.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace SPOT_1 with breast cancer slide 1603ba4c (BREAST/BREAST_CANCER, 6649×6578 at 0.25 MPP); preserve old 9375e3ed data as SPOT_4 - Add VIPS 10x resolution ambiguity note for SPOT_2, SPOT_3, SPOT_4 - Bump HETA_APPLICATION_VERSION to 1.2.0, TEST_APPLICATION_VERSION to 1.0.0 - Remove SPECIAL_APPLICATION concept; restore stress tests against test-app 1.0.0 - Unify payload builders via _build_wsi_input_item / _build_minimal_wsi_input_item - Update SPOT_1_EXPECTED_RESULT_FILES sizes from staging run 43a3bcd2 - Reduce PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES to 25 --- tests/aignostics/platform/e2e_test.py | 316 +++++++++++--------------- tests/constants_test.py | 92 ++++---- 2 files changed, 187 insertions(+), 221 deletions(-) diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py index 26fea08ae..9324898ce 100644 --- a/tests/aignostics/platform/e2e_test.py +++ b/tests/aignostics/platform/e2e_test.py @@ -36,17 +36,17 @@ PIPELINE_GPU_TYPE, PIPELINE_MAX_GPUS_PER_SLIDE, PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES, - SPECIAL_APPLICATION_ID, - SPECIAL_APPLICATION_VERSION, SPOT_0_CRC32C, SPOT_0_GS_URL, SPOT_0_HEIGHT, SPOT_0_RESOLUTION_MPP, SPOT_0_WIDTH, SPOT_1_CRC32C, + SPOT_1_DISEASE, SPOT_1_GS_URL, SPOT_1_HEIGHT, SPOT_1_RESOLUTION_MPP, + SPOT_1_TISSUE, SPOT_1_WIDTH, SPOT_2_CRC32C, SPOT_2_GS_URL, @@ -87,152 +87,119 @@ # Plan to have 100.000 slides processed in total, with 100 slides per application run, # one application run starting every 5 minutes, with a throughput of 1 slide per minute, # given no GPU. -SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT = 100 -SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_00 = 2000 # Minute 0..9 -SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_20 = 2000 # Minute 20..29 -SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS = 60 * 60 * 20 # 20 hours -SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS = 60 * 60 * 24 # 24 hours -SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 = 60 * 60 * 2 # 2 hours -SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 = 60 * 60 * 3 # 3 hours -SPECIAL_APPLICATION_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS = 60 * 30 # 30 minutes -SPECIAL_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS = 60 * 60 # 60 minutes +TEST_APP_STRESS_SLIDE_PER_RUN_COUNT = 100 +TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_00 = 2000 # Minute 0..9 +TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_20 = 2000 # Minute 20..29 +TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS = 60 * 60 * 20 # 20 hours +TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS = 60 * 60 * 24 # 24 hours +TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 = 60 * 60 * 2 # 2 hours +TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 = 60 * 60 * 3 # 3 hours +TEST_APP_STRESS_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS = 60 * 30 # 30 minutes +TEST_APP_STRESS_FIND_AND_VALIDATE_TIMEOUT_SECONDS = 60 * 60 # 60 minutes + + +def _build_wsi_input_item( # noqa: PLR0913, PLR0917 + gs_url: str, + crc32c: str, + width: int, + height: int, + resolution_mpp: float, + expires_seconds: int, + *, + tissue: str = "LUNG", + disease: str = "LUNG_CANCER", +) -> platform.InputItem: + """Build a single WSI InputItem from spot metadata.""" + return platform.InputItem( + external_id=gs_url, + input_artifacts=[ + platform.InputArtifact( + name="whole_slide_image", + download_url=platform.generate_signed_url( + url=gs_url, + expires_seconds=expires_seconds, + ), + metadata={ + "checksum_base64_crc32c": crc32c, + "width_px": width, + "height_px": height, + "resolution_mpp": resolution_mpp, + "media_type": "image/tiff", + "staining_method": "H&E", + "specimen": { + "tissue": tissue, + "disease": disease, + }, + }, + ) + ], + ) + + +def _build_minimal_wsi_input_item(gs_url: str, crc32c: str, expires_seconds: int) -> platform.InputItem: + """Build a minimal WSI InputItem supplying only the CRC32C and image URL.""" + return platform.InputItem( + external_id=gs_url, + input_artifacts=[ + platform.InputArtifact( + name="whole_slide_image", + download_url=platform.generate_signed_url(url=gs_url, expires_seconds=expires_seconds), + metadata={ + "checksum_base64_crc32c": crc32c, + "media_type": "image/tiff", + }, + ) + ], + ) def _get_single_spot_payload_for_heta(expires_seconds: int) -> list[platform.InputItem]: """Generates a payload using a single spot.""" return [ - platform.InputItem( - external_id=SPOT_0_GS_URL, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url( - url=SPOT_0_GS_URL, - expires_seconds=expires_seconds, - ), - metadata={ - "checksum_base64_crc32c": SPOT_0_CRC32C, - "resolution_mpp": SPOT_0_RESOLUTION_MPP, - "width_px": SPOT_0_WIDTH, - "height_px": SPOT_0_HEIGHT, - "media_type": "image/tiff", - "staining_method": "H&E", - "specimen": { - "tissue": "LUNG", - "disease": "LUNG_CANCER", - }, - }, - ) - ], - ), + _build_wsi_input_item( + SPOT_0_GS_URL, SPOT_0_CRC32C, SPOT_0_WIDTH, SPOT_0_HEIGHT, SPOT_0_RESOLUTION_MPP, expires_seconds + ) ] def _get_three_spots_payload_for_test(expires_seconds: int) -> list[platform.InputItem]: """Generates a payload using three spots.""" return [ - platform.InputItem( - external_id=SPOT_1_GS_URL, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url( - url=SPOT_1_GS_URL, - expires_seconds=expires_seconds, - ), - metadata={ - "checksum_base64_crc32c": SPOT_1_CRC32C, - "width_px": SPOT_1_WIDTH, - "height_px": SPOT_1_HEIGHT, - "resolution_mpp": SPOT_1_RESOLUTION_MPP, - "media_type": "image/tiff", - }, - ) - ], + _build_wsi_input_item( + SPOT_1_GS_URL, + SPOT_1_CRC32C, + SPOT_1_WIDTH, + SPOT_1_HEIGHT, + SPOT_1_RESOLUTION_MPP, + expires_seconds, + tissue=SPOT_1_TISSUE, + disease=SPOT_1_DISEASE, ), - platform.InputItem( - external_id=SPOT_2_GS_URL, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url( - url=SPOT_2_GS_URL, - expires_seconds=expires_seconds, - ), - metadata={ - "checksum_base64_crc32c": SPOT_2_CRC32C, - "width_px": SPOT_2_WIDTH, - "height_px": SPOT_2_HEIGHT, - "resolution_mpp": SPOT_2_RESOLUTION_MPP, - "media_type": "image/tiff", - }, - ) - ], + _build_wsi_input_item( + SPOT_2_GS_URL, SPOT_2_CRC32C, SPOT_2_WIDTH, SPOT_2_HEIGHT, SPOT_2_RESOLUTION_MPP, expires_seconds ), - platform.InputItem( - external_id=SPOT_3_GS_URL, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url( - url=SPOT_3_GS_URL, - expires_seconds=expires_seconds, - ), - metadata={ - "checksum_base64_crc32c": SPOT_3_CRC32C, - "width_px": SPOT_3_WIDTH, - "height_px": SPOT_3_HEIGHT, - "resolution_mpp": SPOT_3_RESOLUTION_MPP, - "media_type": "image/tiff", - }, - ) - ], + _build_wsi_input_item( + SPOT_3_GS_URL, SPOT_3_CRC32C, SPOT_3_WIDTH, SPOT_3_HEIGHT, SPOT_3_RESOLUTION_MPP, expires_seconds ), ] -def _get_spots_payload_for_special(expires_seconds: int, count: int) -> list[platform.InputItem]: - """Generates a payload using count many spots. - - Optimized for large counts (e.g., 100k items): - - Generates signed URL once (all items use same source file) - - Pre-builds metadata dicts once (identical across all items) +def _get_spots_payload_for_test_app(expires_seconds: int, count: int) -> list[platform.InputItem]: + """Generates a minimal payload for the test application using count many spots. - Args: - expires_seconds: Expiration time for signed URLs in seconds. - count: Number of items to generate. - - Returns: - List of InputItem objects for the special application. + Optimized for large counts (e.g., 2000 items): + - Generates signed URL once (all items use the same source file) + - Pre-builds metadata dict once (identical across all items) """ if count <= 0: return [] - - signed_url = platform.generate_signed_url( - url=SPOT_1_GS_URL, - expires_seconds=expires_seconds, - ) - wsi_metadata = { - "checksum_base64_crc32c": SPOT_1_CRC32C, - "width_px": SPOT_1_WIDTH, - "height_px": SPOT_1_HEIGHT, - "resolution_mpp": SPOT_1_RESOLUTION_MPP, - "media_type": "image/tiff", - "staining_method": "H&E", - "specimen": { - "tissue": "LUNG", - "disease": "LUNG_CANCER", - }, - } + signed_url = platform.generate_signed_url(url=SPOT_1_GS_URL, expires_seconds=expires_seconds) + metadata = {"checksum_base64_crc32c": SPOT_1_CRC32C, "media_type": "image/tiff"} return [ platform.InputItem( external_id=f"{SPOT_1_GS_URL}&spot_index={index}", input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=signed_url, - metadata=wsi_metadata, - ), + platform.InputArtifact(name="whole_slide_image", download_url=signed_url, metadata=metadata), ], ) for index in range(count) @@ -608,110 +575,97 @@ def test_platform_heta_app_submit() -> None: @pytest.mark.e2e -@pytest.mark.stress_only @pytest.mark.long_running -@pytest.mark.skipif(SPECIAL_APPLICATION_ID is None, reason="Special application not configured for this environment") -@pytest.mark.timeout(timeout=SPECIAL_APPLICATION_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS) -def test_platform_special_app_submit() -> None: - """Test application runs with the special application. +@pytest.mark.scheduled_only +@pytest.mark.timeout(timeout=HETA_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS) +def test_platform_heta_app_find_and_validate() -> None: + """Test application runs with the HETA application. + + This test finds an application run with the HETA application submitted earlier and + validates it completed successfully and in time. + + Raises: + AssertionError: If any of the validation checks fail. + """ + _find_and_validate( + application_id=HETA_APPLICATION_ID, + application_version=HETA_APPLICATION_VERSION, + ) + - This test submits an application run with the special application and validates the submission. +@pytest.mark.e2e +@pytest.mark.stress_only +@pytest.mark.long_running +@pytest.mark.timeout(timeout=TEST_APP_STRESS_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS) +def test_platform_test_app_stress_submit() -> None: + """Test application runs with the test application under stress conditions. - The test behavior varies based on the current minute when triggered by cron (*/10): - - Minutes 0-9 (every 6th run): Uses 1000 items instead of 100 - - Minutes 40-49 (every 4th run): Uses 2h due date / 3h deadline instead of 20h due date / 24h deadline + Submits a large batch of slides and validates the submission. Batch size and + scheduling vary based on the current minute when triggered by cron (*/10): + - Minutes 0-9 (every 6th run): 2000 items + - Minutes 20-29 (every 6th run): 2000 items + - Minutes 40-49 (every 4th run): 2h due date / 3h deadline instead of defaults + - All other minutes: 100 items Raises: AssertionError: If any of the validation checks fail. """ - # Determine run configuration based on current minute - # Cron runs every 10 minutes (*/10, in _scheduled-test-stress.yml), - # so we check which 10-minute window we're in current_minute = datetime.now(tz=UTC).minute is_on_00 = 0 <= current_minute <= 9 is_on_20 = 20 <= current_minute <= 29 is_on_40 = 40 <= current_minute <= 49 if is_on_00: - slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_00 + slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_00 elif is_on_20: - slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_20 + slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_20 else: - slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT + slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT deadline_seconds = ( - SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 + TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 if is_on_40 - else SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS + else TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS ) due_date_seconds = ( - SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 + TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 if is_on_40 - else SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS + else TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS ) logger.info( - f"Special app submit config: minute={current_minute}, is_on_00={is_on_00}, is_on_40={is_on_40}, " + f"Test app stress submit: minute={current_minute}, is_on_00={is_on_00}, is_on_40={is_on_40}, " f"slide_count={slide_count}, deadline_seconds={deadline_seconds}, due_date_seconds={due_date_seconds}" ) - logger.trace( - f"Generating special application payload with {slide_count} spots for " - f"{SPECIAL_APPLICATION_ID} version {SPECIAL_APPLICATION_VERSION}" - ) - payload = _get_spots_payload_for_special( + payload = _get_spots_payload_for_test_app( expires_seconds=deadline_seconds + 60 * 5, count=slide_count, ) - logger.debug(f"Generated special application payload: {payload}") _submit_and_validate( - application_id=SPECIAL_APPLICATION_ID, - application_version=SPECIAL_APPLICATION_VERSION, + application_id=TEST_APPLICATION_ID, + application_version=TEST_APPLICATION_VERSION, payload=payload, deadline_seconds=deadline_seconds, due_date_seconds=due_date_seconds, - tags={"test_platform_special_app_submit", "special", "stress", "stress_only"}, + tags={"test_platform_test_app_stress_submit", "stress", "stress_only"}, ) - logger.debug("Special application payload submitted successfully") @pytest.mark.e2e @pytest.mark.stress_only @pytest.mark.long_running @pytest.mark.scheduled_only -@pytest.mark.skipif(SPECIAL_APPLICATION_ID is None, reason="Special application not configured for this environment") -@pytest.mark.timeout(timeout=SPECIAL_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS) -def test_platform_special_app_find_and_validate() -> None: - """Test application runs with the special application. - - This test finds an application run with the special application submitted earlier and - validates it completed successfully and in time. +@pytest.mark.timeout(timeout=TEST_APP_STRESS_FIND_AND_VALIDATE_TIMEOUT_SECONDS) +def test_platform_test_app_stress_find_and_validate() -> None: + """Find and validate a previously submitted test application stress run. Raises: AssertionError: If any of the validation checks fail. """ _find_and_validate( - application_id=SPECIAL_APPLICATION_ID, - application_version=SPECIAL_APPLICATION_VERSION, - ) - - -@pytest.mark.e2e -@pytest.mark.long_running -@pytest.mark.scheduled_only -@pytest.mark.timeout(timeout=HETA_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS) -def test_platform_heta_app_find_and_validate() -> None: - """Test application runs with the HETA application. - - This test finds an application run with the HETA application submitted earlier and - validates it completed successfully and in time. - - Raises: - AssertionError: If any of the validation checks fail. - """ - _find_and_validate( - application_id=HETA_APPLICATION_ID, - application_version=HETA_APPLICATION_VERSION, + application_id=TEST_APPLICATION_ID, + application_version=TEST_APPLICATION_VERSION, ) diff --git a/tests/constants_test.py b/tests/constants_test.py index 946cceba1..0296cb0d8 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -19,15 +19,21 @@ SPOT_0_HEIGHT = 7196 SPOT_1_GS_URL = ( - "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff" + "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/1603ba4c-398a-49db-926b-c14d8f17dc83.tiff" ) -SPOT_1_FILENAME = "9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff" -SPOT_1_CRC32C = "9l3NNQ==" -SPOT_1_FILESIZE = 14681750 -SPOT_1_RESOLUTION_MPP = 0.46499982 -SPOT_1_WIDTH = 3728 -SPOT_1_HEIGHT = 3640 - +SPOT_1_FILENAME = "1603ba4c-398a-49db-926b-c14d8f17dc83.tiff" +SPOT_1_CRC32C = "MKWV1g==" +SPOT_1_FILESIZE = 8942460 +SPOT_1_RESOLUTION_MPP = 0.25 +SPOT_1_WIDTH = 6649 +SPOT_1_HEIGHT = 6578 +SPOT_1_TISSUE = "BREAST" +SPOT_1_DISEASE = "BREAST_CANCER" + +# SPOT_2, SPOT_3 (and the former SPOT_1 / 9375e3ed): these slides have a known 10x resolution +# ambiguity — certain VIPS versions read their MPP as ~0.0465 instead of ~0.465 due to differing +# interpretations of the TIFF ResolutionUnit tag. The values below reflect the correct 0.465 MPP. +# If a test fails with an off-by-10x resolution error, check the VIPS version in use. SPOT_2_GS_URL = ( "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/8c7b079e-8b8a-4036-bfde-5818352b503a.tiff" ) @@ -46,13 +52,23 @@ SPOT_3_WIDTH = 4016 SPOT_3_HEIGHT = 3952 +SPOT_4_GS_URL = ( + "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff" +) +SPOT_4_FILENAME = "9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff" +SPOT_4_CRC32C = "9l3NNQ==" +SPOT_4_FILESIZE = 14681750 +SPOT_4_RESOLUTION_MPP = 0.46499982 +SPOT_4_WIDTH = 3728 +SPOT_4_HEIGHT = 3640 + match os.getenv("AIGNOSTICS_PLATFORM_ENVIRONMENT", "production"): case "production": TEST_APPLICATION_ID = "test-app" - TEST_APPLICATION_VERSION = "0.0.6" + TEST_APPLICATION_VERSION = "1.0.0" HETA_APPLICATION_ID = "he-tme" - HETA_APPLICATION_VERSION = "1.1.1" + HETA_APPLICATION_VERSION = "1.2.0" TEST_APPLICATION_VERSION_USE_LATEST_FALLBACK_SKIP = False PIPELINE_GPU_TYPE = "L4" @@ -60,13 +76,12 @@ PIPELINE_GPU_FLEX_START_MAX_RUN_DURATION_MINUTES = None PIPELINE_MAX_GPUS_PER_SLIDE = 1 PIPELINE_CPU_PROVISIONING_MODE = "SPOT" - PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = ( - 30 # Respected starting with 1.0.0-sl.4.1+internal, until then set to 60min by application itself. - ) - - SPECIAL_APPLICATION_ID = "test-app" - SPECIAL_APPLICATION_VERSION = "0.99.0" + PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 25 + # To update file sizes: the tests print every file's actual size before asserting. Run with + # -s to see them, then paste the printed byte values as the second element of each tuple. + # SPOT_0: uv run pytest tests/aignostics/application/gui_test.py::test_gui_run_download -s --no-cov + # SPOT_1: uv run pytest tests/aignostics/application/cli_test.py::test_cli_run_execute -s --no-cov SPOT_0_EXPECTED_RESULT_FILES = [ ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), ("tissue_qc_geojson_polygons.json", 259955, 10), @@ -81,15 +96,15 @@ SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) SPOT_1_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 469040, 10), - ("tissue_qc_geojson_polygons.json", 177779, 10), - ("tissue_segmentation_geojson_polygons.json", 205951, 10), - ("readout_generation_slide_readouts.csv", 299654, 10), - ("readout_generation_cell_readouts.csv", 2387860, 10), - ("cell_classification_geojson_polygons.json", 16687724, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 536582, 10), - ("tissue_segmentation_csv_class_information.csv", 441, 10), - ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_segmentation_map_image.tiff", 1288632, 10), + ("tissue_qc_geojson_polygons.json", 75293, 10), + ("tissue_segmentation_geojson_polygons.json", 152317, 10), + ("readout_generation_slide_readouts.csv", 299381, 10), + ("readout_generation_cell_readouts.csv", 466725, 10), + ("cell_classification_geojson_polygons.json", 2812005, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), + ("tissue_segmentation_csv_class_information.csv", 446, 10), + ("tissue_qc_csv_class_information.csv", 290, 10), ] case "staging": @@ -97,20 +112,17 @@ TEST_APPLICATION_VERSION = "1.0.0" HETA_APPLICATION_ID = "he-tme" - HETA_APPLICATION_VERSION = "1.1.1" + HETA_APPLICATION_VERSION = "1.2.0" TEST_APPLICATION_VERSION_USE_LATEST_FALLBACK_SKIP = True PIPELINE_GPU_TYPE = "L4" PIPELINE_GPU_PROVISIONING_MODE = "SPOT" PIPELINE_GPU_FLEX_START_MAX_RUN_DURATION_MINUTES = None - PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30 PIPELINE_MAX_GPUS_PER_SLIDE = 1 PIPELINE_CPU_PROVISIONING_MODE = "SPOT" - PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30 - - SPECIAL_APPLICATION_ID = None - SPECIAL_APPLICATION_VERSION = None + PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 25 + # See production block above for instructions on how to update these sizes. SPOT_0_EXPECTED_RESULT_FILES = [ ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), ("tissue_qc_geojson_polygons.json", 259955, 10), @@ -125,15 +137,15 @@ SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) SPOT_1_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 469040, 10), - ("tissue_qc_geojson_polygons.json", 177779, 10), - ("tissue_segmentation_geojson_polygons.json", 205951, 10), - ("readout_generation_slide_readouts.csv", 299654, 10), - ("readout_generation_cell_readouts.csv", 2387860, 10), - ("cell_classification_geojson_polygons.json", 16687724, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 536582, 10), - ("tissue_segmentation_csv_class_information.csv", 441, 10), - ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_segmentation_map_image.tiff", 1288632, 10), + ("tissue_qc_geojson_polygons.json", 75293, 10), + ("tissue_segmentation_geojson_polygons.json", 152317, 10), + ("readout_generation_slide_readouts.csv", 299381, 10), + ("readout_generation_cell_readouts.csv", 466725, 10), + ("cell_classification_geojson_polygons.json", 2812005, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), + ("tissue_segmentation_csv_class_information.csv", 446, 10), + ("tissue_qc_csv_class_information.csv", 290, 10), ] case _: From 889620703f3b8177ddeb5b942d85e91099f1c4ed Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 19 May 2026 16:18:09 +0200 Subject: [PATCH 6/9] feat(tests): add HETA 1.2.0 parquet size checks and GeoJSON parity validation --- tests/aignostics/application/cli_test.py | 76 +++++++++++------------- tests/aignostics/application/gui_test.py | 25 +++++++- tests/constants_test.py | 48 +++++++++------ 3 files changed, 87 insertions(+), 62 deletions(-) diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py index c5d4a2e90..5d9a2fdc4 100644 --- a/tests/aignostics/application/cli_test.py +++ b/tests/aignostics/application/cli_test.py @@ -3,6 +3,7 @@ import contextlib import json import platform +import random import re from collections.abc import Generator from datetime import UTC, datetime, timedelta @@ -10,7 +11,21 @@ from time import sleep from unittest.mock import MagicMock, patch +import pandas as pd import pytest +from aignx.codegen.exceptions import ForbiddenException +from aignx.codegen.exceptions import NotFoundException as ApiNotFound +from aignx.codegen.models import ( + ItemOutput, + ItemResultReadResponse, + ItemState, + ItemTerminationReason, + RunItemStatistics, + RunOutput, + RunReadResponse, + RunState, + RunTerminationReason, +) from loguru import logger from tenacity import Retrying, retry, stop_after_attempt, wait_exponential from typer.testing import CliRunner @@ -847,8 +862,6 @@ def test_cli_run_list_for_organization(runner: CliRunner) -> None: @pytest.mark.unit def test_cli_run_list_forbidden_with_organization(runner: CliRunner) -> None: """Check ForbiddenException with --for-organization shows org-specific access denied message.""" - from aignx.codegen.exceptions import ForbiddenException - with patch.object( ApplicationService, "application_runs", side_effect=ForbiddenException(status=403, reason="Forbidden") ): @@ -862,8 +875,6 @@ def test_cli_run_list_forbidden_with_organization(runner: CliRunner) -> None: @pytest.mark.unit def test_cli_run_list_forbidden_without_organization(runner: CliRunner) -> None: """Check ForbiddenException without --for-organization shows generic access denied message.""" - from aignx.codegen.exceptions import ForbiddenException - with patch.object( ApplicationService, "application_runs", side_effect=ForbiddenException(status=403, reason="Forbidden") ): @@ -897,18 +908,6 @@ def test_cli_run_describe_not_found(runner: CliRunner, record_property) -> None: @pytest.mark.integration def test_cli_run_describe_json_includes_items(runner: CliRunner) -> None: """Check run describe --format=json includes items in output.""" - from aignx.codegen.models import ( - ItemOutput, - ItemResultReadResponse, - ItemState, - ItemTerminationReason, - RunItemStatistics, - RunOutput, - RunReadResponse, - RunState, - RunTerminationReason, - ) - mock_run_data = RunReadResponse( run_id="test-run-id-123", application_id="test-app", @@ -1111,8 +1110,8 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "") assert results_dir.is_dir(), f"Expected directory {results_dir} not found" files_in_dir = list(results_dir.glob("*")) - assert len(files_in_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}" + assert len(files_in_dir) == 12, ( + f"Expected 12 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}" ) print(f"Found files in {results_dir}:") for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES: @@ -1133,6 +1132,23 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})" ) + # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs + parquet_geojson_pairs = [ + ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"), + ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), + ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), + ] + for parquet_filename, geojson_filename in parquet_geojson_pairs: + parquet_path = results_dir / parquet_filename + geojson_path = results_dir / geojson_filename + parquet_row_count = len(pd.read_parquet(parquet_path)) + with geojson_path.open() as f: + geojson_feature_count = len(json.load(f)["features"]) + assert parquet_row_count == geojson_feature_count, ( + f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " + f"and {geojson_filename} ({geojson_feature_count} features)" + ) + # Validate the execute command exited successfully assert result.exit_code == 0 @@ -1222,9 +1238,6 @@ def test_cli_run_update_item_metadata_not_dict(runner: CliRunner) -> None: @pytest.mark.sequential def test_cli_run_dump_and_update_custom_metadata(runner: CliRunner, tmp_path: Path) -> None: """Test dumping and updating custom metadata via CLI commands.""" - import json - import random - unique_tag = f"test_metadata_{datetime.now(tz=UTC).timestamp()}" with submitted_run(runner, tmp_path, CSV_CONTENT_SPOT0, extra_args=["--tags", unique_tag, "--force"]) as run_id: # Step 1: Dump initial custom metadata of run @@ -1313,11 +1326,8 @@ def test_cli_run_dump_and_update_custom_metadata(runner: CliRunner, tmp_path: Pa @pytest.mark.e2e @pytest.mark.timeout(timeout=240) @pytest.mark.sequential -def test_cli_run_dump_and_update_item_custom_metadata(runner: CliRunner, tmp_path: Path) -> None: # noqa: PLR0915 +def test_cli_run_dump_and_update_item_custom_metadata(runner: CliRunner, tmp_path: Path) -> None: """Test dumping and updating item custom metadata via CLI commands.""" - import json - import random - unique_tag = f"test_item_metadata_{datetime.now(tz=UTC).timestamp()}" # CSV_CONTENT_SPOT0 uses SPOT_0_FILENAME as external_id, which the describe output surfaces # as "Item External ID: `...`" — the get_external_id() helper below captures it dynamically. @@ -1773,8 +1783,6 @@ def test_cli_application_version_document_describe_success(runner: CliRunner, re def test_cli_application_version_document_describe_not_found(runner: CliRunner, record_property) -> None: """`application version document describe` exits 2 with a clear message on 404.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-03") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_documents = MagicMock() fake_documents.details.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) fake_client = MagicMock() @@ -1870,8 +1878,6 @@ def test_cli_application_version_document_list_json_empty(runner: CliRunner, rec def test_cli_application_version_document_list_resolve_not_found_text(runner: CliRunner, record_property) -> None: """`application version document list` exits 2 when the application version cannot be resolved.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-01") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -1888,8 +1894,6 @@ def test_cli_application_version_document_list_resolve_not_found_text(runner: Cl def test_cli_application_version_document_list_resolve_not_found_json(runner: CliRunner, record_property) -> None: """`application version document list --format json` emits structured error on 404.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-01") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -1976,8 +1980,6 @@ def test_cli_application_version_document_describe_json_success(runner: CliRunne def test_cli_application_version_document_describe_resolve_not_found_text(runner: CliRunner, record_property) -> None: """`describe` exits 2 when the application version cannot be resolved (text format).""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-03") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -1996,8 +1998,6 @@ def test_cli_application_version_document_describe_resolve_not_found_text(runner def test_cli_application_version_document_describe_resolve_not_found_json(runner: CliRunner, record_property) -> None: """`describe --format json` emits structured error when version cannot be resolved.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-03") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -2026,8 +2026,6 @@ def test_cli_application_version_document_describe_resolve_not_found_json(runner def test_cli_application_version_document_describe_not_found_json(runner: CliRunner, record_property) -> None: """`describe --format json` emits structured error when the document is missing.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-03") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_documents = MagicMock() fake_documents.details.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) fake_client = MagicMock() @@ -2111,8 +2109,6 @@ def test_cli_application_version_document_download_resolve_not_found( ) -> None: """`download` exits 2 when the application version cannot be resolved.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-04") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_client = MagicMock() fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) @@ -2142,8 +2138,6 @@ def test_cli_application_version_document_download_not_found( ) -> None: """`download` exits 2 with a clear message when the document does not exist.""" record_property("tested-item-id", "TC-APPLICATION-CLI-05-04") - from aignx.codegen.exceptions import NotFoundException as ApiNotFound - fake_documents = MagicMock() fake_documents.download_to_path.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND) fake_client = MagicMock() diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py index 59ba189e2..05c799d2a 100644 --- a/tests/aignostics/application/gui_test.py +++ b/tests/aignostics/application/gui_test.py @@ -1,6 +1,7 @@ """Tests to verify the GUI functionality of the application module.""" import contextlib +import json import re import tempfile from asyncio import sleep, to_thread @@ -9,6 +10,7 @@ from typing import TYPE_CHECKING from unittest.mock import AsyncMock, MagicMock, Mock, patch +import pandas as pd import pytest from nicegui.testing import User from typer.testing import CliRunner @@ -354,7 +356,7 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back( @pytest.mark.flaky(retries=1, delay=5) @pytest.mark.timeout(timeout=60 * 10) @pytest.mark.sequential # Helps on Linux with image analysis step otherwise timing out -async def test_gui_run_download( # noqa: PLR0915 +async def test_gui_run_download( # noqa: PLR0914, PLR0915 user: User, runner: CliRunner, tmp_path: Path, silent_logging: None, record_property ) -> None: """Test that the user can download a run result via the GUI.""" @@ -440,8 +442,8 @@ async def test_gui_run_download( # noqa: PLR0915 # Check for files in the results directory files_in_results_dir = list(results_dir.glob("*")) - assert len(files_in_results_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: " + assert len(files_in_results_dir) == 12, ( + f"Expected 12 files in {results_dir}, but found {len(files_in_results_dir)}: " f"{[f.name for f in files_in_results_dir]}" ) @@ -464,6 +466,23 @@ async def test_gui_run_download( # noqa: PLR0915 f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})" ) + # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs + parquet_geojson_pairs = [ + ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"), + ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), + ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), + ] + for parquet_filename, geojson_filename in parquet_geojson_pairs: + parquet_path = results_dir / parquet_filename + geojson_path = results_dir / geojson_filename + parquet_row_count = len(pd.read_parquet(parquet_path)) + with geojson_path.open() as f: + geojson_feature_count = len(json.load(f)["features"]) + assert parquet_row_count == geojson_feature_count, ( + f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " + f"and {geojson_filename} ({geojson_feature_count} features)" + ) + @pytest.mark.integration @pytest.mark.sequential diff --git a/tests/constants_test.py b/tests/constants_test.py index 0296cb0d8..aa18676ee 100644 --- a/tests/constants_test.py +++ b/tests/constants_test.py @@ -83,15 +83,18 @@ # SPOT_0: uv run pytest tests/aignostics/application/gui_test.py::test_gui_run_download -s --no-cov # SPOT_1: uv run pytest tests/aignostics/application/cli_test.py::test_cli_run_execute -s --no-cov SPOT_0_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), - ("tissue_qc_geojson_polygons.json", 259955, 10), - ("tissue_segmentation_geojson_polygons.json", 887003, 10), - ("readout_generation_slide_readouts.csv", 303217, 10), - ("readout_generation_cell_readouts.csv", 1658344, 10), - ("cell_classification_geojson_polygons.json", 11218951, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10), - ("tissue_segmentation_csv_class_information.csv", 452, 10), - ("tissue_qc_csv_class_information.csv", 285, 10), + ("tissue_qc_segmentation_map_image.tiff", 470150, 10), + ("tissue_qc_geojson_polygons.json", 171251, 10), + ("tissue_segmentation_geojson_polygons.json", 185516, 10), + ("readout_generation_slide_readouts.csv", 300205, 10), + ("readout_generation_cell_readouts.csv", 2417117, 10), + ("cell_classification_geojson_polygons.json", 16673412, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10), + ("tissue_segmentation_csv_class_information.csv", 443, 10), + ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_parquet_polygons.parquet", 34346, 10), + ("tissue_segmentation_parquet_polygons.parquet", 39185, 10), + ("cell_classification_parquet_polygons.parquet", 5476364, 10), ] SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) @@ -105,6 +108,9 @@ ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), ("tissue_segmentation_csv_class_information.csv", 446, 10), ("tissue_qc_csv_class_information.csv", 290, 10), + ("tissue_qc_parquet_polygons.parquet", 29049, 10), + ("tissue_segmentation_parquet_polygons.parquet", 56682, 10), + ("cell_classification_parquet_polygons.parquet", 838533, 10), ] case "staging": @@ -124,15 +130,18 @@ # See production block above for instructions on how to update these sizes. SPOT_0_EXPECTED_RESULT_FILES = [ - ("tissue_qc_segmentation_map_image.tiff", 1642856, 10), - ("tissue_qc_geojson_polygons.json", 259955, 10), - ("tissue_segmentation_geojson_polygons.json", 887003, 10), - ("readout_generation_slide_readouts.csv", 303217, 10), - ("readout_generation_cell_readouts.csv", 1658344, 10), - ("cell_classification_geojson_polygons.json", 11218951, 10), - ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10), - ("tissue_segmentation_csv_class_information.csv", 452, 10), - ("tissue_qc_csv_class_information.csv", 285, 10), + ("tissue_qc_segmentation_map_image.tiff", 470150, 10), + ("tissue_qc_geojson_polygons.json", 171251, 10), + ("tissue_segmentation_geojson_polygons.json", 185516, 10), + ("readout_generation_slide_readouts.csv", 300205, 10), + ("readout_generation_cell_readouts.csv", 2417117, 10), + ("cell_classification_geojson_polygons.json", 16673412, 10), + ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10), + ("tissue_segmentation_csv_class_information.csv", 443, 10), + ("tissue_qc_csv_class_information.csv", 286, 10), + ("tissue_qc_parquet_polygons.parquet", 34346, 10), + ("tissue_segmentation_parquet_polygons.parquet", 39185, 10), + ("cell_classification_parquet_polygons.parquet", 5476364, 10), ] SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10) @@ -146,6 +155,9 @@ ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10), ("tissue_segmentation_csv_class_information.csv", 446, 10), ("tissue_qc_csv_class_information.csv", 290, 10), + ("tissue_qc_parquet_polygons.parquet", 29049, 10), + ("tissue_segmentation_parquet_polygons.parquet", 56682, 10), + ("cell_classification_parquet_polygons.parquet", 838533, 10), ] case _: From 17d9e1fa0bae5b167af5b0f762961b12d696a4a0 Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Tue, 19 May 2026 19:58:04 +0200 Subject: [PATCH 7/9] fix(tests): address PR review comments on parquet/GeoJSON parity checks - Use pyarrow.parquet.read_metadata() instead of pd.read_parquet() to get row count from Parquet footer without loading polygon data - Use ijson streaming to count GeoJSON features without loading the full feature array into memory - Replace hard-coded file counts with len(SPOT_x_EXPECTED_RESULT_FILES) to avoid drift when the constants change - Sync qupath/gui_test.py to use len(SPOT_0_EXPECTED_RESULT_FILES) instead of the stale literal 9 - Remove unused _build_minimal_wsi_input_item dead code from e2e_test.py --- tests/aignostics/application/cli_test.py | 15 +++++++++------ tests/aignostics/application/gui_test.py | 15 ++++++++------- tests/aignostics/platform/e2e_test.py | 17 ----------------- tests/aignostics/qupath/gui_test.py | 5 +++-- 4 files changed, 20 insertions(+), 32 deletions(-) diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py index 5d9a2fdc4..6cadfdde5 100644 --- a/tests/aignostics/application/cli_test.py +++ b/tests/aignostics/application/cli_test.py @@ -11,7 +11,8 @@ from time import sleep from unittest.mock import MagicMock, patch -import pandas as pd +import ijson +import pyarrow.parquet as pq import pytest from aignx.codegen.exceptions import ForbiddenException from aignx.codegen.exceptions import NotFoundException as ApiNotFound @@ -1110,8 +1111,10 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "") assert results_dir.is_dir(), f"Expected directory {results_dir} not found" files_in_dir = list(results_dir.glob("*")) - assert len(files_in_dir) == 12, ( - f"Expected 12 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}" + expected_count = len(SPOT_1_EXPECTED_RESULT_FILES) + assert len(files_in_dir) == expected_count, ( + f"Expected {expected_count} files in {results_dir}, but found {len(files_in_dir)}: " + f"{[f.name for f in files_in_dir]}" ) print(f"Found files in {results_dir}:") for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES: @@ -1141,9 +1144,9 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> for parquet_filename, geojson_filename in parquet_geojson_pairs: parquet_path = results_dir / parquet_filename geojson_path = results_dir / geojson_filename - parquet_row_count = len(pd.read_parquet(parquet_path)) - with geojson_path.open() as f: - geojson_feature_count = len(json.load(f)["features"]) + parquet_row_count = pq.read_metadata(parquet_path).num_rows + with geojson_path.open("rb") as f: + geojson_feature_count = sum(1 for _ in ijson.items(f, "features.item")) assert parquet_row_count == geojson_feature_count, ( f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " f"and {geojson_filename} ({geojson_feature_count} features)" diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py index 05c799d2a..361cd419d 100644 --- a/tests/aignostics/application/gui_test.py +++ b/tests/aignostics/application/gui_test.py @@ -1,7 +1,6 @@ """Tests to verify the GUI functionality of the application module.""" import contextlib -import json import re import tempfile from asyncio import sleep, to_thread @@ -10,7 +9,8 @@ from typing import TYPE_CHECKING from unittest.mock import AsyncMock, MagicMock, Mock, patch -import pandas as pd +import ijson +import pyarrow.parquet as pq import pytest from nicegui.testing import User from typer.testing import CliRunner @@ -442,8 +442,9 @@ async def test_gui_run_download( # noqa: PLR0914, PLR0915 # Check for files in the results directory files_in_results_dir = list(results_dir.glob("*")) - assert len(files_in_results_dir) == 12, ( - f"Expected 12 files in {results_dir}, but found {len(files_in_results_dir)}: " + expected_count = len(SPOT_0_EXPECTED_RESULT_FILES) + assert len(files_in_results_dir) == expected_count, ( + f"Expected {expected_count} files in {results_dir}, but found {len(files_in_results_dir)}: " f"{[f.name for f in files_in_results_dir]}" ) @@ -475,9 +476,9 @@ async def test_gui_run_download( # noqa: PLR0914, PLR0915 for parquet_filename, geojson_filename in parquet_geojson_pairs: parquet_path = results_dir / parquet_filename geojson_path = results_dir / geojson_filename - parquet_row_count = len(pd.read_parquet(parquet_path)) - with geojson_path.open() as f: - geojson_feature_count = len(json.load(f)["features"]) + parquet_row_count = pq.read_metadata(parquet_path).num_rows + with geojson_path.open("rb") as f: + geojson_feature_count = sum(1 for _ in ijson.items(f, "features.item")) assert parquet_row_count == geojson_feature_count, ( f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) " f"and {geojson_filename} ({geojson_feature_count} features)" diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py index 9324898ce..0f7699600 100644 --- a/tests/aignostics/platform/e2e_test.py +++ b/tests/aignostics/platform/e2e_test.py @@ -136,23 +136,6 @@ def _build_wsi_input_item( # noqa: PLR0913, PLR0917 ) -def _build_minimal_wsi_input_item(gs_url: str, crc32c: str, expires_seconds: int) -> platform.InputItem: - """Build a minimal WSI InputItem supplying only the CRC32C and image URL.""" - return platform.InputItem( - external_id=gs_url, - input_artifacts=[ - platform.InputArtifact( - name="whole_slide_image", - download_url=platform.generate_signed_url(url=gs_url, expires_seconds=expires_seconds), - metadata={ - "checksum_base64_crc32c": crc32c, - "media_type": "image/tiff", - }, - ) - ], - ) - - def _get_single_spot_payload_for_heta(expires_seconds: int) -> list[platform.InputItem]: """Generates a payload using a single spot.""" return [ diff --git a/tests/aignostics/qupath/gui_test.py b/tests/aignostics/qupath/gui_test.py index 01d9a1b6d..0fdd07a7a 100644 --- a/tests/aignostics/qupath/gui_test.py +++ b/tests/aignostics/qupath/gui_test.py @@ -257,8 +257,9 @@ async def test_gui_run_qupath_install_to_inspect( # noqa: C901, PLR0912, PLR091 # Check for files in the results directory files_in_results_dir = list(results_dir.glob("*")) - assert len(files_in_results_dir) == 9, ( - f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: " + expected_count = len(SPOT_0_EXPECTED_RESULT_FILES) + assert len(files_in_results_dir) == expected_count, ( + f"Expected {expected_count} files in {results_dir}, but found {len(files_in_results_dir)}: " f"{[f.name for f in files_in_results_dir]}" ) From 8d1ddfeb58417dbbdcfe9068752faf6f2c4ba8da Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Wed, 20 May 2026 11:23:24 +0200 Subject: [PATCH 8/9] fix(tests): add blank line after lazy pyarrow import for ruff format compliance --- tests/aignostics/application/cli_test.py | 3 ++- tests/aignostics/application/gui_test.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py index 6cadfdde5..3bc3a9cf3 100644 --- a/tests/aignostics/application/cli_test.py +++ b/tests/aignostics/application/cli_test.py @@ -12,7 +12,6 @@ from unittest.mock import MagicMock, patch import ijson -import pyarrow.parquet as pq import pytest from aignx.codegen.exceptions import ForbiddenException from aignx.codegen.exceptions import NotFoundException as ApiNotFound @@ -1141,6 +1140,8 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) -> ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), ] + import pyarrow.parquet as pq + for parquet_filename, geojson_filename in parquet_geojson_pairs: parquet_path = results_dir / parquet_filename geojson_path = results_dir / geojson_filename diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py index 361cd419d..205fa3d2d 100644 --- a/tests/aignostics/application/gui_test.py +++ b/tests/aignostics/application/gui_test.py @@ -10,7 +10,6 @@ from unittest.mock import AsyncMock, MagicMock, Mock, patch import ijson -import pyarrow.parquet as pq import pytest from nicegui.testing import User from typer.testing import CliRunner @@ -473,6 +472,8 @@ async def test_gui_run_download( # noqa: PLR0914, PLR0915 ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"), ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"), ] + import pyarrow.parquet as pq + for parquet_filename, geojson_filename in parquet_geojson_pairs: parquet_path = results_dir / parquet_filename geojson_path = results_dir / geojson_filename From b8261e0517d2fb889a9d8ef536afbc645b518aaa Mon Sep 17 00:00:00 2001 From: Ari Angelo Date: Wed, 20 May 2026 16:02:34 +0200 Subject: [PATCH 9/9] =?UTF-8?q?fix(tests):=20update=20stale=20assertions?= =?UTF-8?q?=20=E2=80=94=2016=20schemata=20files=20and=20SPOT=5F1=20file=20?= =?UTF-8?q?size=20constant?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/aignostics/application/cli_test.py | 2 +- tests/aignostics/application/gui_test.py | 3 ++- tests/aignostics/dataset/cli_test.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py index 3bc3a9cf3..0a162298a 100644 --- a/tests/aignostics/application/cli_test.py +++ b/tests/aignostics/application/cli_test.py @@ -239,7 +239,7 @@ def test_cli_application_dump_schemata(runner: CliRunner, tmp_path: Path, record ], ) assert result.exit_code == 0 - assert "Zipped 11 files" in normalize_output(result.output) + assert "Zipped 16 files" in normalize_output(result.output) zip_file = sanitize_path(Path(tmp_path / f"{HETA_APPLICATION_ID}_{HETA_APPLICATION_VERSION}_schemata.zip")) assert zip_file.exists(), f"Expected zip file {zip_file} not found" diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py index 205fa3d2d..5c81bcff2 100644 --- a/tests/aignostics/application/gui_test.py +++ b/tests/aignostics/application/gui_test.py @@ -32,6 +32,7 @@ SPOT_0_FILESIZE, SPOT_0_GS_URL, SPOT_1_FILENAME, + SPOT_1_FILESIZE, SPOT_1_GS_URL, ) @@ -215,7 +216,7 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back( assert SPOT_1_FILENAME in normalize_output(result.stdout) expected_file = Path(tmp_path) / SPOT_1_FILENAME assert expected_file.exists(), f"Expected file {expected_file} not found" - assert expected_file.stat().st_size == 14681750 + assert expected_file.stat().st_size == SPOT_1_FILESIZE # Open the GUI and navigate to Atlas H&E-TME application await user.open("/") diff --git a/tests/aignostics/dataset/cli_test.py b/tests/aignostics/dataset/cli_test.py index 23e1f5236..0c9fdfe3c 100644 --- a/tests/aignostics/dataset/cli_test.py +++ b/tests/aignostics/dataset/cli_test.py @@ -11,7 +11,7 @@ from aignostics.cli import cli from tests.conftest import normalize_output -from tests.constants_test import SPOT_1_FILENAME, SPOT_1_GS_URL +from tests.constants_test import SPOT_1_FILENAME, SPOT_1_FILESIZE, SPOT_1_GS_URL SERIES_UID = "1.3.6.1.4.1.5962.99.1.1069745200.1645485340.1637452317744.2.0" THUMBNAIL_UID = "1.3.6.1.4.1.5962.99.1.1038911754.1238045814.1637421484298.15.0" @@ -149,7 +149,7 @@ def test_cli_aignostics_download_sample(runner: CliRunner, tmp_path: Path, recor expected_file = tmp_path / SPOT_1_FILENAME assert expected_file.exists(), f"Expected file {expected_file} not found" - assert expected_file.stat().st_size == 14681750 + assert expected_file.stat().st_size == SPOT_1_FILESIZE @pytest.mark.integration