From fba6ec8f12e911aa559946d5fe0a7662803c8939 Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Tue, 28 Apr 2026 09:33:27 +0200
Subject: [PATCH 1/9] chore(tests): bump staging app versions and drop
 special-app constant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- test-app: 0.0.6 → 1.0.0 (new version uses same he-tme input schema)
- he-tme: 1.1.0 → 1.1.1 on staging
- Remove SPECIAL_APPLICATION_ID/VERSION from staging (no longer needed)

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 tests/constants_test.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/constants_test.py b/tests/constants_test.py
index f9385b290..0d98bec0b 100644
--- a/tests/constants_test.py
+++ b/tests/constants_test.py
@@ -94,7 +94,7 @@
 
     case "staging":
         TEST_APPLICATION_ID = "test-app"
-        TEST_APPLICATION_VERSION = "0.0.6"
+        TEST_APPLICATION_VERSION = "1.0.0"
 
         HETA_APPLICATION_ID = "he-tme"
         HETA_APPLICATION_VERSION = "1.1.1"
@@ -108,9 +108,6 @@
         PIPELINE_CPU_PROVISIONING_MODE = "SPOT"
         PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30
 
-        SPECIAL_APPLICATION_ID = "test-app"
-        SPECIAL_APPLICATION_VERSION = "0.99.0"
-
         SPOT_0_EXPECTED_RESULT_FILES = [
             ("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
             ("tissue_qc_geojson_polygons.json", 259955, 10),

From 0e4630b2c9a6d68a87a210c366ccd4aeb2158e8d Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Tue, 28 Apr 2026 15:54:06 +0200
Subject: [PATCH 2/9] chore(tests): fix staging SPECIAL_APPLICATION constants
 and drop normalization artifact

- Re-add SPECIAL_APPLICATION_ID/VERSION to staging pointing to test-app 1.0.0
  so e2e_test.py imports resolve on staging
- Remove normalization:wsi input artifact from _get_spots_payload_for_special;
  test-app 1.0.0 only requires whole_slide_image, matching the he-tme schema

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 tests/aignostics/platform/e2e_test.py | 12 ------------
 tests/constants_test.py               |  3 +++
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py
index 634e85da0..3696fc675 100644
--- a/tests/aignostics/platform/e2e_test.py
+++ b/tests/aignostics/platform/e2e_test.py
@@ -224,13 +224,6 @@ def _get_spots_payload_for_special(expires_seconds: int, count: int) -> list[pla
             "disease": "LUNG_CANCER",
         },
     }
-    normalization_metadata = {
-        "checksum_base64_crc32c": SPOT_1_CRC32C,
-        "width_px": SPOT_1_WIDTH,
-        "height_px": SPOT_1_HEIGHT,
-        "resolution_mpp": SPOT_1_RESOLUTION_MPP,
-        "media_type": "image/tiff",
-    }
     return [
         platform.InputItem(
             external_id=f"{SPOT_1_GS_URL}&spot_index={index}",
@@ -240,11 +233,6 @@ def _get_spots_payload_for_special(expires_seconds: int, count: int) -> list[pla
                     download_url=signed_url,
                     metadata=wsi_metadata,
                 ),
-                platform.InputArtifact(
-                    name="normalization:wsi",
-                    download_url=signed_url,
-                    metadata=normalization_metadata,
-                ),
             ],
         )
         for index in range(count)
diff --git a/tests/constants_test.py b/tests/constants_test.py
index 0d98bec0b..78dcf9dfd 100644
--- a/tests/constants_test.py
+++ b/tests/constants_test.py
@@ -108,6 +108,9 @@
         PIPELINE_CPU_PROVISIONING_MODE = "SPOT"
         PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30
 
+        SPECIAL_APPLICATION_ID = "test-app"
+        SPECIAL_APPLICATION_VERSION = "1.0.0"
+
         SPOT_0_EXPECTED_RESULT_FILES = [
             ("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
             ("tissue_qc_geojson_polygons.json", 259955, 10),

From 27ca00e39e9cba4b319c944c74c42256f02ef5c1 Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Tue, 28 Apr 2026 16:03:49 +0200
Subject: [PATCH 3/9] chore(tests): skip special-app tests on staging; guard
 import

- Remove SPECIAL_APPLICATION_ID/VERSION from staging constants entirely
- Guard the import in e2e_test.py with try/except so staging doesn't NameError
- Add skipif(SPECIAL_APPLICATION_ID is None) to both special-app tests
  so they are silently skipped on staging but still run on production (0.99.0)

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 tests/aignostics/platform/e2e_test.py | 10 ++++++++--
 tests/constants_test.py               |  3 ---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py
index 3696fc675..c620d7a2d 100644
--- a/tests/aignostics/platform/e2e_test.py
+++ b/tests/aignostics/platform/e2e_test.py
@@ -36,8 +36,6 @@
     PIPELINE_GPU_TYPE,
     PIPELINE_MAX_GPUS_PER_SLIDE,
     PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES,
-    SPECIAL_APPLICATION_ID,
-    SPECIAL_APPLICATION_VERSION,
     SPOT_0_CRC32C,
     SPOT_0_GS_URL,
     SPOT_0_HEIGHT,
@@ -62,6 +60,12 @@
     TEST_APPLICATION_VERSION,
 )
 
+try:
+    from tests.constants_test import SPECIAL_APPLICATION_ID, SPECIAL_APPLICATION_VERSION
+except ImportError:
+    SPECIAL_APPLICATION_ID = None  # type: ignore[assignment]
+    SPECIAL_APPLICATION_VERSION = None  # type: ignore[assignment]
+
 TEST_APPLICATION_SUBMIT_AND_WAIT_DEADLINE_SECONDS = 60 * 45  # 45 minutes
 TEST_APPLICATION_SUBMIT_AND_WAIT_DUE_DATE_SECONDS = 60 * 10  # 10 minutes
 TEST_APPLICATION_SUBMIT_AND_WAIT_TIMEOUT_SECONDS = (
@@ -610,6 +614,7 @@ def test_platform_heta_app_submit() -> None:
 @pytest.mark.e2e
 @pytest.mark.stress_only
 @pytest.mark.long_running
+@pytest.mark.skipif(SPECIAL_APPLICATION_ID is None, reason="Special application not configured for this environment")
 @pytest.mark.timeout(timeout=SPECIAL_APPLICATION_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS)
 def test_platform_special_app_submit() -> None:
     """Test application runs with the special application.
@@ -678,6 +683,7 @@ def test_platform_special_app_submit() -> None:
 @pytest.mark.stress_only
 @pytest.mark.long_running
 @pytest.mark.scheduled_only
+@pytest.mark.skipif(SPECIAL_APPLICATION_ID is None, reason="Special application not configured for this environment")
 @pytest.mark.timeout(timeout=SPECIAL_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS)
 def test_platform_special_app_find_and_validate() -> None:
     """Test application runs with the special application.
diff --git a/tests/constants_test.py b/tests/constants_test.py
index 78dcf9dfd..0d98bec0b 100644
--- a/tests/constants_test.py
+++ b/tests/constants_test.py
@@ -108,9 +108,6 @@
         PIPELINE_CPU_PROVISIONING_MODE = "SPOT"
         PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30
 
-        SPECIAL_APPLICATION_ID = "test-app"
-        SPECIAL_APPLICATION_VERSION = "1.0.0"
-
         SPOT_0_EXPECTED_RESULT_FILES = [
             ("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
             ("tissue_qc_geojson_polygons.json", 259955, 10),

From c06f7732ff7ecc5e3cb7b8ff80621100b61773aa Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Tue, 28 Apr 2026 16:24:20 +0200
Subject: [PATCH 4/9] chore(tests): use None sentinel for SPECIAL_APPLICATION
 on staging

Simpler than a try/except guard: staging defines SPECIAL_APPLICATION_ID
and SPECIAL_APPLICATION_VERSION as None, the regular import works, and
the existing skipif(SPECIAL_APPLICATION_ID is None) handles the rest.

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 tests/aignostics/platform/e2e_test.py | 8 ++------
 tests/constants_test.py               | 3 +++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py
index c620d7a2d..26fea08ae 100644
--- a/tests/aignostics/platform/e2e_test.py
+++ b/tests/aignostics/platform/e2e_test.py
@@ -36,6 +36,8 @@
     PIPELINE_GPU_TYPE,
     PIPELINE_MAX_GPUS_PER_SLIDE,
     PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES,
+    SPECIAL_APPLICATION_ID,
+    SPECIAL_APPLICATION_VERSION,
     SPOT_0_CRC32C,
     SPOT_0_GS_URL,
     SPOT_0_HEIGHT,
@@ -60,12 +62,6 @@
     TEST_APPLICATION_VERSION,
 )
 
-try:
-    from tests.constants_test import SPECIAL_APPLICATION_ID, SPECIAL_APPLICATION_VERSION
-except ImportError:
-    SPECIAL_APPLICATION_ID = None  # type: ignore[assignment]
-    SPECIAL_APPLICATION_VERSION = None  # type: ignore[assignment]
-
 TEST_APPLICATION_SUBMIT_AND_WAIT_DEADLINE_SECONDS = 60 * 45  # 45 minutes
 TEST_APPLICATION_SUBMIT_AND_WAIT_DUE_DATE_SECONDS = 60 * 10  # 10 minutes
 TEST_APPLICATION_SUBMIT_AND_WAIT_TIMEOUT_SECONDS = (
diff --git a/tests/constants_test.py b/tests/constants_test.py
index 0d98bec0b..946cceba1 100644
--- a/tests/constants_test.py
+++ b/tests/constants_test.py
@@ -108,6 +108,9 @@
         PIPELINE_CPU_PROVISIONING_MODE = "SPOT"
         PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30
 
+        SPECIAL_APPLICATION_ID = None
+        SPECIAL_APPLICATION_VERSION = None
+
         SPOT_0_EXPECTED_RESULT_FILES = [
             ("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
             ("tissue_qc_geojson_polygons.json", 259955, 10),

From d1397d856789a35d79ad2316aba05a4bcb119b24 Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Tue, 12 May 2026 01:15:41 +0200
Subject: [PATCH 5/9] chore(tests): update SPOT_1 slide, add SPOT_4, bump app
 versions to he-tme 1.2.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace SPOT_1 with breast cancer slide 1603ba4c (BREAST/BREAST_CANCER,
  6649×6578 at 0.25 MPP); preserve old 9375e3ed data as SPOT_4
- Add VIPS 10x resolution ambiguity note for SPOT_2, SPOT_3, SPOT_4
- Bump HETA_APPLICATION_VERSION to 1.2.0, TEST_APPLICATION_VERSION to 1.0.0
- Remove SPECIAL_APPLICATION concept; restore stress tests against test-app 1.0.0
- Unify payload builders via _build_wsi_input_item / _build_minimal_wsi_input_item
- Update SPOT_1_EXPECTED_RESULT_FILES sizes from staging run 43a3bcd2
- Reduce PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES to 25
---
 tests/aignostics/platform/e2e_test.py | 316 +++++++++++---------------
 tests/constants_test.py               |  92 ++++----
 2 files changed, 187 insertions(+), 221 deletions(-)

diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py
index 26fea08ae..9324898ce 100644
--- a/tests/aignostics/platform/e2e_test.py
+++ b/tests/aignostics/platform/e2e_test.py
@@ -36,17 +36,17 @@
     PIPELINE_GPU_TYPE,
     PIPELINE_MAX_GPUS_PER_SLIDE,
     PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES,
-    SPECIAL_APPLICATION_ID,
-    SPECIAL_APPLICATION_VERSION,
     SPOT_0_CRC32C,
     SPOT_0_GS_URL,
     SPOT_0_HEIGHT,
     SPOT_0_RESOLUTION_MPP,
     SPOT_0_WIDTH,
     SPOT_1_CRC32C,
+    SPOT_1_DISEASE,
     SPOT_1_GS_URL,
     SPOT_1_HEIGHT,
     SPOT_1_RESOLUTION_MPP,
+    SPOT_1_TISSUE,
     SPOT_1_WIDTH,
     SPOT_2_CRC32C,
     SPOT_2_GS_URL,
@@ -87,152 +87,119 @@
 # Plan to have 100.000 slides processed in total, with 100 slides per application run,
 # one application run starting every 5 minutes, with a throughput of 1 slide per minute,
 # given no GPU.
-SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT = 100
-SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_00 = 2000  # Minute 0..9
-SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_20 = 2000  # Minute 20..29
-SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS = 60 * 60 * 20  # 20 hours
-SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS = 60 * 60 * 24  # 24 hours
-SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 = 60 * 60 * 2  # 2 hours
-SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 = 60 * 60 * 3  # 3 hours
-SPECIAL_APPLICATION_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS = 60 * 30  # 30 minutes
-SPECIAL_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS = 60 * 60  # 60 minutes
+TEST_APP_STRESS_SLIDE_PER_RUN_COUNT = 100
+TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_00 = 2000  # Minute 0..9
+TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_20 = 2000  # Minute 20..29
+TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS = 60 * 60 * 20  # 20 hours
+TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS = 60 * 60 * 24  # 24 hours
+TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40 = 60 * 60 * 2  # 2 hours
+TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40 = 60 * 60 * 3  # 3 hours
+TEST_APP_STRESS_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS = 60 * 30  # 30 minutes
+TEST_APP_STRESS_FIND_AND_VALIDATE_TIMEOUT_SECONDS = 60 * 60  # 60 minutes
+
+
+def _build_wsi_input_item(  # noqa: PLR0913, PLR0917
+    gs_url: str,
+    crc32c: str,
+    width: int,
+    height: int,
+    resolution_mpp: float,
+    expires_seconds: int,
+    *,
+    tissue: str = "LUNG",
+    disease: str = "LUNG_CANCER",
+) -> platform.InputItem:
+    """Build a single WSI InputItem from spot metadata."""
+    return platform.InputItem(
+        external_id=gs_url,
+        input_artifacts=[
+            platform.InputArtifact(
+                name="whole_slide_image",
+                download_url=platform.generate_signed_url(
+                    url=gs_url,
+                    expires_seconds=expires_seconds,
+                ),
+                metadata={
+                    "checksum_base64_crc32c": crc32c,
+                    "width_px": width,
+                    "height_px": height,
+                    "resolution_mpp": resolution_mpp,
+                    "media_type": "image/tiff",
+                    "staining_method": "H&E",
+                    "specimen": {
+                        "tissue": tissue,
+                        "disease": disease,
+                    },
+                },
+            )
+        ],
+    )
+
+
+def _build_minimal_wsi_input_item(gs_url: str, crc32c: str, expires_seconds: int) -> platform.InputItem:
+    """Build a minimal WSI InputItem supplying only the CRC32C and image URL."""
+    return platform.InputItem(
+        external_id=gs_url,
+        input_artifacts=[
+            platform.InputArtifact(
+                name="whole_slide_image",
+                download_url=platform.generate_signed_url(url=gs_url, expires_seconds=expires_seconds),
+                metadata={
+                    "checksum_base64_crc32c": crc32c,
+                    "media_type": "image/tiff",
+                },
+            )
+        ],
+    )
 
 
 def _get_single_spot_payload_for_heta(expires_seconds: int) -> list[platform.InputItem]:
     """Generates a payload using a single spot."""
     return [
-        platform.InputItem(
-            external_id=SPOT_0_GS_URL,
-            input_artifacts=[
-                platform.InputArtifact(
-                    name="whole_slide_image",
-                    download_url=platform.generate_signed_url(
-                        url=SPOT_0_GS_URL,
-                        expires_seconds=expires_seconds,
-                    ),
-                    metadata={
-                        "checksum_base64_crc32c": SPOT_0_CRC32C,
-                        "resolution_mpp": SPOT_0_RESOLUTION_MPP,
-                        "width_px": SPOT_0_WIDTH,
-                        "height_px": SPOT_0_HEIGHT,
-                        "media_type": "image/tiff",
-                        "staining_method": "H&E",
-                        "specimen": {
-                            "tissue": "LUNG",
-                            "disease": "LUNG_CANCER",
-                        },
-                    },
-                )
-            ],
-        ),
+        _build_wsi_input_item(
+            SPOT_0_GS_URL, SPOT_0_CRC32C, SPOT_0_WIDTH, SPOT_0_HEIGHT, SPOT_0_RESOLUTION_MPP, expires_seconds
+        )
     ]
 
 
 def _get_three_spots_payload_for_test(expires_seconds: int) -> list[platform.InputItem]:
     """Generates a payload using three spots."""
     return [
-        platform.InputItem(
-            external_id=SPOT_1_GS_URL,
-            input_artifacts=[
-                platform.InputArtifact(
-                    name="whole_slide_image",
-                    download_url=platform.generate_signed_url(
-                        url=SPOT_1_GS_URL,
-                        expires_seconds=expires_seconds,
-                    ),
-                    metadata={
-                        "checksum_base64_crc32c": SPOT_1_CRC32C,
-                        "width_px": SPOT_1_WIDTH,
-                        "height_px": SPOT_1_HEIGHT,
-                        "resolution_mpp": SPOT_1_RESOLUTION_MPP,
-                        "media_type": "image/tiff",
-                    },
-                )
-            ],
+        _build_wsi_input_item(
+            SPOT_1_GS_URL,
+            SPOT_1_CRC32C,
+            SPOT_1_WIDTH,
+            SPOT_1_HEIGHT,
+            SPOT_1_RESOLUTION_MPP,
+            expires_seconds,
+            tissue=SPOT_1_TISSUE,
+            disease=SPOT_1_DISEASE,
         ),
-        platform.InputItem(
-            external_id=SPOT_2_GS_URL,
-            input_artifacts=[
-                platform.InputArtifact(
-                    name="whole_slide_image",
-                    download_url=platform.generate_signed_url(
-                        url=SPOT_2_GS_URL,
-                        expires_seconds=expires_seconds,
-                    ),
-                    metadata={
-                        "checksum_base64_crc32c": SPOT_2_CRC32C,
-                        "width_px": SPOT_2_WIDTH,
-                        "height_px": SPOT_2_HEIGHT,
-                        "resolution_mpp": SPOT_2_RESOLUTION_MPP,
-                        "media_type": "image/tiff",
-                    },
-                )
-            ],
+        _build_wsi_input_item(
+            SPOT_2_GS_URL, SPOT_2_CRC32C, SPOT_2_WIDTH, SPOT_2_HEIGHT, SPOT_2_RESOLUTION_MPP, expires_seconds
         ),
-        platform.InputItem(
-            external_id=SPOT_3_GS_URL,
-            input_artifacts=[
-                platform.InputArtifact(
-                    name="whole_slide_image",
-                    download_url=platform.generate_signed_url(
-                        url=SPOT_3_GS_URL,
-                        expires_seconds=expires_seconds,
-                    ),
-                    metadata={
-                        "checksum_base64_crc32c": SPOT_3_CRC32C,
-                        "width_px": SPOT_3_WIDTH,
-                        "height_px": SPOT_3_HEIGHT,
-                        "resolution_mpp": SPOT_3_RESOLUTION_MPP,
-                        "media_type": "image/tiff",
-                    },
-                )
-            ],
+        _build_wsi_input_item(
+            SPOT_3_GS_URL, SPOT_3_CRC32C, SPOT_3_WIDTH, SPOT_3_HEIGHT, SPOT_3_RESOLUTION_MPP, expires_seconds
         ),
     ]
 
 
-def _get_spots_payload_for_special(expires_seconds: int, count: int) -> list[platform.InputItem]:
-    """Generates a payload using count many spots.
-
-    Optimized for large counts (e.g., 100k items):
-    - Generates signed URL once (all items use same source file)
-    - Pre-builds metadata dicts once (identical across all items)
+def _get_spots_payload_for_test_app(expires_seconds: int, count: int) -> list[platform.InputItem]:
+    """Generates a minimal payload for the test application using count many spots.
 
-    Args:
-        expires_seconds: Expiration time for signed URLs in seconds.
-        count: Number of items to generate.
-
-    Returns:
-        List of InputItem objects for the special application.
+    Optimized for large counts (e.g., 2000 items):
+    - Generates signed URL once (all items use the same source file)
+    - Pre-builds metadata dict once (identical across all items)
     """
     if count <= 0:
         return []
-
-    signed_url = platform.generate_signed_url(
-        url=SPOT_1_GS_URL,
-        expires_seconds=expires_seconds,
-    )
-    wsi_metadata = {
-        "checksum_base64_crc32c": SPOT_1_CRC32C,
-        "width_px": SPOT_1_WIDTH,
-        "height_px": SPOT_1_HEIGHT,
-        "resolution_mpp": SPOT_1_RESOLUTION_MPP,
-        "media_type": "image/tiff",
-        "staining_method": "H&E",
-        "specimen": {
-            "tissue": "LUNG",
-            "disease": "LUNG_CANCER",
-        },
-    }
+    signed_url = platform.generate_signed_url(url=SPOT_1_GS_URL, expires_seconds=expires_seconds)
+    metadata = {"checksum_base64_crc32c": SPOT_1_CRC32C, "media_type": "image/tiff"}
     return [
         platform.InputItem(
             external_id=f"{SPOT_1_GS_URL}&spot_index={index}",
             input_artifacts=[
-                platform.InputArtifact(
-                    name="whole_slide_image",
-                    download_url=signed_url,
-                    metadata=wsi_metadata,
-                ),
+                platform.InputArtifact(name="whole_slide_image", download_url=signed_url, metadata=metadata),
             ],
         )
         for index in range(count)
@@ -608,110 +575,97 @@ def test_platform_heta_app_submit() -> None:
 
 
 @pytest.mark.e2e
-@pytest.mark.stress_only
 @pytest.mark.long_running
-@pytest.mark.skipif(SPECIAL_APPLICATION_ID is None, reason="Special application not configured for this environment")
-@pytest.mark.timeout(timeout=SPECIAL_APPLICATION_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS)
-def test_platform_special_app_submit() -> None:
-    """Test application runs with the special application.
+@pytest.mark.scheduled_only
+@pytest.mark.timeout(timeout=HETA_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS)
+def test_platform_heta_app_find_and_validate() -> None:
+    """Test application runs with the HETA application.
+
+    This test finds an application run with the HETA application submitted earlier and
+    validates it completed successfully and in time.
+
+    Raises:
+        AssertionError: If any of the validation checks fail.
+    """
+    _find_and_validate(
+        application_id=HETA_APPLICATION_ID,
+        application_version=HETA_APPLICATION_VERSION,
+    )
+
 
-    This test submits an application run with the special application and validates the submission.
+@pytest.mark.e2e
+@pytest.mark.stress_only
+@pytest.mark.long_running
+@pytest.mark.timeout(timeout=TEST_APP_STRESS_SUBMIT_AND_FIND_SUBMIT_TIMEOUT_SECONDS)
+def test_platform_test_app_stress_submit() -> None:
+    """Test application runs with the test application under stress conditions.
 
-    The test behavior varies based on the current minute when triggered by cron (*/10):
-    - Minutes 0-9 (every 6th run): Uses 1000 items instead of 100
-    - Minutes 40-49 (every 4th run): Uses 2h due date / 3h deadline instead of 20h due date / 24h deadline
+    Submits a large batch of slides and validates the submission. Batch size and
+    scheduling vary based on the current minute when triggered by cron (*/10):
+    - Minutes 0-9 (every 6th run): 2000 items
+    - Minutes 20-29 (every 6th run): 2000 items
+    - Minutes 40-49 (every 4th run): 2h due date / 3h deadline instead of defaults
+    - All other minutes: 100 items
 
     Raises:
         AssertionError: If any of the validation checks fail.
     """
-    # Determine run configuration based on current minute
-    # Cron runs every 10 minutes (*/10, in _scheduled-test-stress.yml),
-    # so we check which 10-minute window we're in
     current_minute = datetime.now(tz=UTC).minute
     is_on_00 = 0 <= current_minute <= 9
     is_on_20 = 20 <= current_minute <= 29
     is_on_40 = 40 <= current_minute <= 49
 
     if is_on_00:
-        slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_00
+        slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_00
     elif is_on_20:
-        slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT_ON_20
+        slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT_ON_20
     else:
-        slide_count = SPECIAL_APPLICATION_SLIDE_PER_RUN_COUNT
+        slide_count = TEST_APP_STRESS_SLIDE_PER_RUN_COUNT
 
     deadline_seconds = (
-        SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40
+        TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS_ON_40
         if is_on_40
-        else SPECIAL_APPLICATION_SUBMIT_AND_FIND_DEADLINE_SECONDS
+        else TEST_APP_STRESS_SUBMIT_AND_FIND_DEADLINE_SECONDS
     )
     due_date_seconds = (
-        SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40
+        TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS_ON_40
         if is_on_40
-        else SPECIAL_APPLICATION_SUBMIT_AND_FIND_DUE_DATE_SECONDS
+        else TEST_APP_STRESS_SUBMIT_AND_FIND_DUE_DATE_SECONDS
     )
 
     logger.info(
-        f"Special app submit config: minute={current_minute}, is_on_00={is_on_00}, is_on_40={is_on_40}, "
+        f"Test app stress submit: minute={current_minute}, is_on_00={is_on_00}, is_on_40={is_on_40}, "
         f"slide_count={slide_count}, deadline_seconds={deadline_seconds}, due_date_seconds={due_date_seconds}"
     )
 
-    logger.trace(
-        f"Generating special application payload with {slide_count} spots for "
-        f"{SPECIAL_APPLICATION_ID} version {SPECIAL_APPLICATION_VERSION}"
-    )
-    payload = _get_spots_payload_for_special(
+    payload = _get_spots_payload_for_test_app(
         expires_seconds=deadline_seconds + 60 * 5,
         count=slide_count,
     )
-    logger.debug(f"Generated special application payload: {payload}")
     _submit_and_validate(
-        application_id=SPECIAL_APPLICATION_ID,
-        application_version=SPECIAL_APPLICATION_VERSION,
+        application_id=TEST_APPLICATION_ID,
+        application_version=TEST_APPLICATION_VERSION,
         payload=payload,
         deadline_seconds=deadline_seconds,
         due_date_seconds=due_date_seconds,
-        tags={"test_platform_special_app_submit", "special", "stress", "stress_only"},
+        tags={"test_platform_test_app_stress_submit", "stress", "stress_only"},
     )
-    logger.debug("Special application payload submitted successfully")
 
 
 @pytest.mark.e2e
 @pytest.mark.stress_only
 @pytest.mark.long_running
 @pytest.mark.scheduled_only
-@pytest.mark.skipif(SPECIAL_APPLICATION_ID is None, reason="Special application not configured for this environment")
-@pytest.mark.timeout(timeout=SPECIAL_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS)
-def test_platform_special_app_find_and_validate() -> None:
-    """Test application runs with the special application.
-
-    This test finds an application run with the special application submitted earlier and
-    validates it completed successfully and in time.
+@pytest.mark.timeout(timeout=TEST_APP_STRESS_FIND_AND_VALIDATE_TIMEOUT_SECONDS)
+def test_platform_test_app_stress_find_and_validate() -> None:
+    """Find and validate a previously submitted test application stress run.
 
     Raises:
         AssertionError: If any of the validation checks fail.
     """
     _find_and_validate(
-        application_id=SPECIAL_APPLICATION_ID,
-        application_version=SPECIAL_APPLICATION_VERSION,
-    )
-
-
-@pytest.mark.e2e
-@pytest.mark.long_running
-@pytest.mark.scheduled_only
-@pytest.mark.timeout(timeout=HETA_APPLICATION_FIND_AND_VALIDATE_TIMEOUT_SECONDS)
-def test_platform_heta_app_find_and_validate() -> None:
-    """Test application runs with the HETA application.
-
-    This test finds an application run with the HETA application submitted earlier and
-    validates it completed successfully and in time.
-
-    Raises:
-        AssertionError: If any of the validation checks fail.
-    """
-    _find_and_validate(
-        application_id=HETA_APPLICATION_ID,
-        application_version=HETA_APPLICATION_VERSION,
+        application_id=TEST_APPLICATION_ID,
+        application_version=TEST_APPLICATION_VERSION,
     )
 
 
diff --git a/tests/constants_test.py b/tests/constants_test.py
index 946cceba1..0296cb0d8 100644
--- a/tests/constants_test.py
+++ b/tests/constants_test.py
@@ -19,15 +19,21 @@
 SPOT_0_HEIGHT = 7196
 
 SPOT_1_GS_URL = (
-    "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff"
+    "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/1603ba4c-398a-49db-926b-c14d8f17dc83.tiff"
 )
-SPOT_1_FILENAME = "9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff"
-SPOT_1_CRC32C = "9l3NNQ=="
-SPOT_1_FILESIZE = 14681750
-SPOT_1_RESOLUTION_MPP = 0.46499982
-SPOT_1_WIDTH = 3728
-SPOT_1_HEIGHT = 3640
-
+SPOT_1_FILENAME = "1603ba4c-398a-49db-926b-c14d8f17dc83.tiff"
+SPOT_1_CRC32C = "MKWV1g=="
+SPOT_1_FILESIZE = 8942460
+SPOT_1_RESOLUTION_MPP = 0.25
+SPOT_1_WIDTH = 6649
+SPOT_1_HEIGHT = 6578
+SPOT_1_TISSUE = "BREAST"
+SPOT_1_DISEASE = "BREAST_CANCER"
+
+# SPOT_2, SPOT_3 (and the former SPOT_1 / 9375e3ed): these slides have a known 10x resolution
+# ambiguity — certain VIPS versions read their MPP as ~0.0465 instead of ~0.465 due to differing
+# interpretations of the TIFF ResolutionUnit tag. The values below reflect the correct 0.465 MPP.
+# If a test fails with an off-by-10x resolution error, check the VIPS version in use.
 SPOT_2_GS_URL = (
     "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/8c7b079e-8b8a-4036-bfde-5818352b503a.tiff"
 )
@@ -46,13 +52,23 @@
 SPOT_3_WIDTH = 4016
 SPOT_3_HEIGHT = 3952
 
+SPOT_4_GS_URL = (
+    "gs://aignostics-platform-ext-a4f7e9/python-sdk-tests/he-tme/slides/9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff"
+)
+SPOT_4_FILENAME = "9375e3ed-28d2-4cf3-9fb9-8df9d11a6627.tiff"
+SPOT_4_CRC32C = "9l3NNQ=="
+SPOT_4_FILESIZE = 14681750
+SPOT_4_RESOLUTION_MPP = 0.46499982
+SPOT_4_WIDTH = 3728
+SPOT_4_HEIGHT = 3640
+
 match os.getenv("AIGNOSTICS_PLATFORM_ENVIRONMENT", "production"):
     case "production":
         TEST_APPLICATION_ID = "test-app"
-        TEST_APPLICATION_VERSION = "0.0.6"
+        TEST_APPLICATION_VERSION = "1.0.0"
 
         HETA_APPLICATION_ID = "he-tme"
-        HETA_APPLICATION_VERSION = "1.1.1"
+        HETA_APPLICATION_VERSION = "1.2.0"
         TEST_APPLICATION_VERSION_USE_LATEST_FALLBACK_SKIP = False
 
         PIPELINE_GPU_TYPE = "L4"
@@ -60,13 +76,12 @@
         PIPELINE_GPU_FLEX_START_MAX_RUN_DURATION_MINUTES = None
         PIPELINE_MAX_GPUS_PER_SLIDE = 1
         PIPELINE_CPU_PROVISIONING_MODE = "SPOT"
-        PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = (
-            30  # Respected starting with 1.0.0-sl.4.1+internal, until then set to 60min by application itself.
-        )
-
-        SPECIAL_APPLICATION_ID = "test-app"
-        SPECIAL_APPLICATION_VERSION = "0.99.0"
+        PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 25
 
+        # To update file sizes: the tests print every file's actual size before asserting. Run with
+        # -s to see them, then paste the printed byte values as the second element of each tuple.
+        # SPOT_0: uv run pytest tests/aignostics/application/gui_test.py::test_gui_run_download -s --no-cov
+        # SPOT_1: uv run pytest tests/aignostics/application/cli_test.py::test_cli_run_execute -s --no-cov
         SPOT_0_EXPECTED_RESULT_FILES = [
             ("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
             ("tissue_qc_geojson_polygons.json", 259955, 10),
@@ -81,15 +96,15 @@
         SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10)
 
         SPOT_1_EXPECTED_RESULT_FILES = [
-            ("tissue_qc_segmentation_map_image.tiff", 469040, 10),
-            ("tissue_qc_geojson_polygons.json", 177779, 10),
-            ("tissue_segmentation_geojson_polygons.json", 205951, 10),
-            ("readout_generation_slide_readouts.csv", 299654, 10),
-            ("readout_generation_cell_readouts.csv", 2387860, 10),
-            ("cell_classification_geojson_polygons.json", 16687724, 10),
-            ("tissue_segmentation_segmentation_map_image.tiff", 536582, 10),
-            ("tissue_segmentation_csv_class_information.csv", 441, 10),
-            ("tissue_qc_csv_class_information.csv", 286, 10),
+            ("tissue_qc_segmentation_map_image.tiff", 1288632, 10),
+            ("tissue_qc_geojson_polygons.json", 75293, 10),
+            ("tissue_segmentation_geojson_polygons.json", 152317, 10),
+            ("readout_generation_slide_readouts.csv", 299381, 10),
+            ("readout_generation_cell_readouts.csv", 466725, 10),
+            ("cell_classification_geojson_polygons.json", 2812005, 10),
+            ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10),
+            ("tissue_segmentation_csv_class_information.csv", 446, 10),
+            ("tissue_qc_csv_class_information.csv", 290, 10),
         ]
 
     case "staging":
@@ -97,20 +112,17 @@
         TEST_APPLICATION_VERSION = "1.0.0"
 
         HETA_APPLICATION_ID = "he-tme"
-        HETA_APPLICATION_VERSION = "1.1.1"
+        HETA_APPLICATION_VERSION = "1.2.0"
         TEST_APPLICATION_VERSION_USE_LATEST_FALLBACK_SKIP = True
 
         PIPELINE_GPU_TYPE = "L4"
         PIPELINE_GPU_PROVISIONING_MODE = "SPOT"
         PIPELINE_GPU_FLEX_START_MAX_RUN_DURATION_MINUTES = None
-        PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30
         PIPELINE_MAX_GPUS_PER_SLIDE = 1
         PIPELINE_CPU_PROVISIONING_MODE = "SPOT"
-        PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 30
-
-        SPECIAL_APPLICATION_ID = None
-        SPECIAL_APPLICATION_VERSION = None
+        PIPELINE_NODE_ACQUISITION_TIMEOUT_MINUTES = 25
 
+        # See production block above for instructions on how to update these sizes.
         SPOT_0_EXPECTED_RESULT_FILES = [
             ("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
             ("tissue_qc_geojson_polygons.json", 259955, 10),
@@ -125,15 +137,15 @@
         SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10)
 
         SPOT_1_EXPECTED_RESULT_FILES = [
-            ("tissue_qc_segmentation_map_image.tiff", 469040, 10),
-            ("tissue_qc_geojson_polygons.json", 177779, 10),
-            ("tissue_segmentation_geojson_polygons.json", 205951, 10),
-            ("readout_generation_slide_readouts.csv", 299654, 10),
-            ("readout_generation_cell_readouts.csv", 2387860, 10),
-            ("cell_classification_geojson_polygons.json", 16687724, 10),
-            ("tissue_segmentation_segmentation_map_image.tiff", 536582, 10),
-            ("tissue_segmentation_csv_class_information.csv", 441, 10),
-            ("tissue_qc_csv_class_information.csv", 286, 10),
+            ("tissue_qc_segmentation_map_image.tiff", 1288632, 10),
+            ("tissue_qc_geojson_polygons.json", 75293, 10),
+            ("tissue_segmentation_geojson_polygons.json", 152317, 10),
+            ("readout_generation_slide_readouts.csv", 299381, 10),
+            ("readout_generation_cell_readouts.csv", 466725, 10),
+            ("cell_classification_geojson_polygons.json", 2812005, 10),
+            ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10),
+            ("tissue_segmentation_csv_class_information.csv", 446, 10),
+            ("tissue_qc_csv_class_information.csv", 290, 10),
         ]
 
     case _:

From 889620703f3b8177ddeb5b942d85e91099f1c4ed Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Tue, 19 May 2026 16:18:09 +0200
Subject: [PATCH 6/9] feat(tests): add HETA 1.2.0 parquet size checks and
 GeoJSON parity validation

---
 tests/aignostics/application/cli_test.py | 76 +++++++++++-------------
 tests/aignostics/application/gui_test.py | 25 +++++++-
 tests/constants_test.py                  | 48 +++++++++------
 3 files changed, 87 insertions(+), 62 deletions(-)

diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py
index c5d4a2e90..5d9a2fdc4 100644
--- a/tests/aignostics/application/cli_test.py
+++ b/tests/aignostics/application/cli_test.py
@@ -3,6 +3,7 @@
 import contextlib
 import json
 import platform
+import random
 import re
 from collections.abc import Generator
 from datetime import UTC, datetime, timedelta
@@ -10,7 +11,21 @@
 from time import sleep
 from unittest.mock import MagicMock, patch
 
+import pandas as pd
 import pytest
+from aignx.codegen.exceptions import ForbiddenException
+from aignx.codegen.exceptions import NotFoundException as ApiNotFound
+from aignx.codegen.models import (
+    ItemOutput,
+    ItemResultReadResponse,
+    ItemState,
+    ItemTerminationReason,
+    RunItemStatistics,
+    RunOutput,
+    RunReadResponse,
+    RunState,
+    RunTerminationReason,
+)
 from loguru import logger
 from tenacity import Retrying, retry, stop_after_attempt, wait_exponential
 from typer.testing import CliRunner
@@ -847,8 +862,6 @@ def test_cli_run_list_for_organization(runner: CliRunner) -> None:
 @pytest.mark.unit
 def test_cli_run_list_forbidden_with_organization(runner: CliRunner) -> None:
     """Check ForbiddenException with --for-organization shows org-specific access denied message."""
-    from aignx.codegen.exceptions import ForbiddenException
-
     with patch.object(
         ApplicationService, "application_runs", side_effect=ForbiddenException(status=403, reason="Forbidden")
     ):
@@ -862,8 +875,6 @@ def test_cli_run_list_forbidden_with_organization(runner: CliRunner) -> None:
 @pytest.mark.unit
 def test_cli_run_list_forbidden_without_organization(runner: CliRunner) -> None:
     """Check ForbiddenException without --for-organization shows generic access denied message."""
-    from aignx.codegen.exceptions import ForbiddenException
-
     with patch.object(
         ApplicationService, "application_runs", side_effect=ForbiddenException(status=403, reason="Forbidden")
     ):
@@ -897,18 +908,6 @@ def test_cli_run_describe_not_found(runner: CliRunner, record_property) -> None:
 @pytest.mark.integration
 def test_cli_run_describe_json_includes_items(runner: CliRunner) -> None:
     """Check run describe --format=json includes items in output."""
-    from aignx.codegen.models import (
-        ItemOutput,
-        ItemResultReadResponse,
-        ItemState,
-        ItemTerminationReason,
-        RunItemStatistics,
-        RunOutput,
-        RunReadResponse,
-        RunState,
-        RunTerminationReason,
-    )
-
     mock_run_data = RunReadResponse(
         run_id="test-run-id-123",
         application_id="test-app",
@@ -1111,8 +1110,8 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
     results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "")
     assert results_dir.is_dir(), f"Expected directory {results_dir} not found"
     files_in_dir = list(results_dir.glob("*"))
-    assert len(files_in_dir) == 9, (
-        f"Expected 9 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}"
+    assert len(files_in_dir) == 12, (
+        f"Expected 12 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}"
     )
     print(f"Found files in {results_dir}:")
     for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES:
@@ -1133,6 +1132,23 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
             f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})"
         )
 
+    # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs
+    parquet_geojson_pairs = [
+        ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"),
+        ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"),
+        ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"),
+    ]
+    for parquet_filename, geojson_filename in parquet_geojson_pairs:
+        parquet_path = results_dir / parquet_filename
+        geojson_path = results_dir / geojson_filename
+        parquet_row_count = len(pd.read_parquet(parquet_path))
+        with geojson_path.open() as f:
+            geojson_feature_count = len(json.load(f)["features"])
+        assert parquet_row_count == geojson_feature_count, (
+            f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) "
+            f"and {geojson_filename} ({geojson_feature_count} features)"
+        )
+
     # Validate the execute command exited successfully
     assert result.exit_code == 0
 
@@ -1222,9 +1238,6 @@ def test_cli_run_update_item_metadata_not_dict(runner: CliRunner) -> None:
 @pytest.mark.sequential
 def test_cli_run_dump_and_update_custom_metadata(runner: CliRunner, tmp_path: Path) -> None:
     """Test dumping and updating custom metadata via CLI commands."""
-    import json
-    import random
-
     unique_tag = f"test_metadata_{datetime.now(tz=UTC).timestamp()}"
     with submitted_run(runner, tmp_path, CSV_CONTENT_SPOT0, extra_args=["--tags", unique_tag, "--force"]) as run_id:
         # Step 1: Dump initial custom metadata of run
@@ -1313,11 +1326,8 @@ def test_cli_run_dump_and_update_custom_metadata(runner: CliRunner, tmp_path: Pa
 @pytest.mark.e2e
 @pytest.mark.timeout(timeout=240)
 @pytest.mark.sequential
-def test_cli_run_dump_and_update_item_custom_metadata(runner: CliRunner, tmp_path: Path) -> None:  # noqa: PLR0915
+def test_cli_run_dump_and_update_item_custom_metadata(runner: CliRunner, tmp_path: Path) -> None:
     """Test dumping and updating item custom metadata via CLI commands."""
-    import json
-    import random
-
     unique_tag = f"test_item_metadata_{datetime.now(tz=UTC).timestamp()}"
     # CSV_CONTENT_SPOT0 uses SPOT_0_FILENAME as external_id, which the describe output surfaces
     # as "Item External ID: `...`" — the get_external_id() helper below captures it dynamically.
@@ -1773,8 +1783,6 @@ def test_cli_application_version_document_describe_success(runner: CliRunner, re
 def test_cli_application_version_document_describe_not_found(runner: CliRunner, record_property) -> None:
     """`application version document describe` exits 2 with a clear message on 404."""
     record_property("tested-item-id", "TC-APPLICATION-CLI-05-03")
-    from aignx.codegen.exceptions import NotFoundException as ApiNotFound
-
     fake_documents = MagicMock()
     fake_documents.details.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
     fake_client = MagicMock()
@@ -1870,8 +1878,6 @@ def test_cli_application_version_document_list_json_empty(runner: CliRunner, rec
 def test_cli_application_version_document_list_resolve_not_found_text(runner: CliRunner, record_property) -> None:
     """`application version document list` exits 2 when the application version cannot be resolved."""
     record_property("tested-item-id", "TC-APPLICATION-CLI-05-01")
-    from aignx.codegen.exceptions import NotFoundException as ApiNotFound
-
     fake_client = MagicMock()
     fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
 
@@ -1888,8 +1894,6 @@ def test_cli_application_version_document_list_resolve_not_found_text(runner: Cl
 def test_cli_application_version_document_list_resolve_not_found_json(runner: CliRunner, record_property) -> None:
     """`application version document list --format json` emits structured error on 404."""
     record_property("tested-item-id", "TC-APPLICATION-CLI-05-01")
-    from aignx.codegen.exceptions import NotFoundException as ApiNotFound
-
     fake_client = MagicMock()
     fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
 
@@ -1976,8 +1980,6 @@ def test_cli_application_version_document_describe_json_success(runner: CliRunne
 def test_cli_application_version_document_describe_resolve_not_found_text(runner: CliRunner, record_property) -> None:
     """`describe` exits 2 when the application version cannot be resolved (text format)."""
     record_property("tested-item-id", "TC-APPLICATION-CLI-05-03")
-    from aignx.codegen.exceptions import NotFoundException as ApiNotFound
-
     fake_client = MagicMock()
     fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
 
@@ -1996,8 +1998,6 @@ def test_cli_application_version_document_describe_resolve_not_found_text(runner
 def test_cli_application_version_document_describe_resolve_not_found_json(runner: CliRunner, record_property) -> None:
     """`describe --format json` emits structured error when version cannot be resolved."""
     record_property("tested-item-id", "TC-APPLICATION-CLI-05-03")
-    from aignx.codegen.exceptions import NotFoundException as ApiNotFound
-
     fake_client = MagicMock()
     fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
 
@@ -2026,8 +2026,6 @@ def test_cli_application_version_document_describe_resolve_not_found_json(runner
 def test_cli_application_version_document_describe_not_found_json(runner: CliRunner, record_property) -> None:
     """`describe --format json` emits structured error when the document is missing."""
     record_property("tested-item-id", "TC-APPLICATION-CLI-05-03")
-    from aignx.codegen.exceptions import NotFoundException as ApiNotFound
-
     fake_documents = MagicMock()
     fake_documents.details.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
     fake_client = MagicMock()
@@ -2111,8 +2109,6 @@ def test_cli_application_version_document_download_resolve_not_found(
 ) -> None:
     """`download` exits 2 when the application version cannot be resolved."""
     record_property("tested-item-id", "TC-APPLICATION-CLI-05-04")
-    from aignx.codegen.exceptions import NotFoundException as ApiNotFound
-
     fake_client = MagicMock()
     fake_client.applications.versions.documents.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
 
@@ -2142,8 +2138,6 @@ def test_cli_application_version_document_download_not_found(
 ) -> None:
     """`download` exits 2 with a clear message when the document does not exist."""
     record_property("tested-item-id", "TC-APPLICATION-CLI-05-04")
-    from aignx.codegen.exceptions import NotFoundException as ApiNotFound
-
     fake_documents = MagicMock()
     fake_documents.download_to_path.side_effect = ApiNotFound(status=404, reason=API_REASON_NOT_FOUND)
     fake_client = MagicMock()
diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py
index 59ba189e2..05c799d2a 100644
--- a/tests/aignostics/application/gui_test.py
+++ b/tests/aignostics/application/gui_test.py
@@ -1,6 +1,7 @@
 """Tests to verify the GUI functionality of the application module."""
 
 import contextlib
+import json
 import re
 import tempfile
 from asyncio import sleep, to_thread
@@ -9,6 +10,7 @@
 from typing import TYPE_CHECKING
 from unittest.mock import AsyncMock, MagicMock, Mock, patch
 
+import pandas as pd
 import pytest
 from nicegui.testing import User
 from typer.testing import CliRunner
@@ -354,7 +356,7 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back(
 @pytest.mark.flaky(retries=1, delay=5)
 @pytest.mark.timeout(timeout=60 * 10)
 @pytest.mark.sequential  # Helps on Linux with image analysis step otherwise timing out
-async def test_gui_run_download(  # noqa: PLR0915
+async def test_gui_run_download(  # noqa: PLR0914, PLR0915
     user: User, runner: CliRunner, tmp_path: Path, silent_logging: None, record_property
 ) -> None:
     """Test that the user can download a run result via the GUI."""
@@ -440,8 +442,8 @@ async def test_gui_run_download(  # noqa: PLR0915
 
         # Check for files in the results directory
         files_in_results_dir = list(results_dir.glob("*"))
-        assert len(files_in_results_dir) == 9, (
-            f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: "
+        assert len(files_in_results_dir) == 12, (
+            f"Expected 12 files in {results_dir}, but found {len(files_in_results_dir)}: "
             f"{[f.name for f in files_in_results_dir]}"
         )
 
@@ -464,6 +466,23 @@ async def test_gui_run_download(  # noqa: PLR0915
                 f"({min_size} to {max_size} bytes, ±{tolerance_percent}% of {expected_size})"
             )
 
+        # Validate parquet <-> GeoJSON row count parity for the 3 paired outputs
+        parquet_geojson_pairs = [
+            ("tissue_qc_parquet_polygons.parquet", "tissue_qc_geojson_polygons.json"),
+            ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"),
+            ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"),
+        ]
+        for parquet_filename, geojson_filename in parquet_geojson_pairs:
+            parquet_path = results_dir / parquet_filename
+            geojson_path = results_dir / geojson_filename
+            parquet_row_count = len(pd.read_parquet(parquet_path))
+            with geojson_path.open() as f:
+                geojson_feature_count = len(json.load(f)["features"])
+            assert parquet_row_count == geojson_feature_count, (
+                f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) "
+                f"and {geojson_filename} ({geojson_feature_count} features)"
+            )
+
 
 @pytest.mark.integration
 @pytest.mark.sequential
diff --git a/tests/constants_test.py b/tests/constants_test.py
index 0296cb0d8..aa18676ee 100644
--- a/tests/constants_test.py
+++ b/tests/constants_test.py
@@ -83,15 +83,18 @@
         # SPOT_0: uv run pytest tests/aignostics/application/gui_test.py::test_gui_run_download -s --no-cov
         # SPOT_1: uv run pytest tests/aignostics/application/cli_test.py::test_cli_run_execute -s --no-cov
         SPOT_0_EXPECTED_RESULT_FILES = [
-            ("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
-            ("tissue_qc_geojson_polygons.json", 259955, 10),
-            ("tissue_segmentation_geojson_polygons.json", 887003, 10),
-            ("readout_generation_slide_readouts.csv", 303217, 10),
-            ("readout_generation_cell_readouts.csv", 1658344, 10),
-            ("cell_classification_geojson_polygons.json", 11218951, 10),
-            ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10),
-            ("tissue_segmentation_csv_class_information.csv", 452, 10),
-            ("tissue_qc_csv_class_information.csv", 285, 10),
+            ("tissue_qc_segmentation_map_image.tiff", 470150, 10),
+            ("tissue_qc_geojson_polygons.json", 171251, 10),
+            ("tissue_segmentation_geojson_polygons.json", 185516, 10),
+            ("readout_generation_slide_readouts.csv", 300205, 10),
+            ("readout_generation_cell_readouts.csv", 2417117, 10),
+            ("cell_classification_geojson_polygons.json", 16673412, 10),
+            ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10),
+            ("tissue_segmentation_csv_class_information.csv", 443, 10),
+            ("tissue_qc_csv_class_information.csv", 286, 10),
+            ("tissue_qc_parquet_polygons.parquet", 34346, 10),
+            ("tissue_segmentation_parquet_polygons.parquet", 39185, 10),
+            ("cell_classification_parquet_polygons.parquet", 5476364, 10),
         ]
         SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10)
 
@@ -105,6 +108,9 @@
             ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10),
             ("tissue_segmentation_csv_class_information.csv", 446, 10),
             ("tissue_qc_csv_class_information.csv", 290, 10),
+            ("tissue_qc_parquet_polygons.parquet", 29049, 10),
+            ("tissue_segmentation_parquet_polygons.parquet", 56682, 10),
+            ("cell_classification_parquet_polygons.parquet", 838533, 10),
         ]
 
     case "staging":
@@ -124,15 +130,18 @@
 
         # See production block above for instructions on how to update these sizes.
         SPOT_0_EXPECTED_RESULT_FILES = [
-            ("tissue_qc_segmentation_map_image.tiff", 1642856, 10),
-            ("tissue_qc_geojson_polygons.json", 259955, 10),
-            ("tissue_segmentation_geojson_polygons.json", 887003, 10),
-            ("readout_generation_slide_readouts.csv", 303217, 10),
-            ("readout_generation_cell_readouts.csv", 1658344, 10),
-            ("cell_classification_geojson_polygons.json", 11218951, 10),
-            ("tissue_segmentation_segmentation_map_image.tiff", 2945078, 10),
-            ("tissue_segmentation_csv_class_information.csv", 452, 10),
-            ("tissue_qc_csv_class_information.csv", 285, 10),
+            ("tissue_qc_segmentation_map_image.tiff", 470150, 10),
+            ("tissue_qc_geojson_polygons.json", 171251, 10),
+            ("tissue_segmentation_geojson_polygons.json", 185516, 10),
+            ("readout_generation_slide_readouts.csv", 300205, 10),
+            ("readout_generation_cell_readouts.csv", 2417117, 10),
+            ("cell_classification_geojson_polygons.json", 16673412, 10),
+            ("tissue_segmentation_segmentation_map_image.tiff", 527264, 10),
+            ("tissue_segmentation_csv_class_information.csv", 443, 10),
+            ("tissue_qc_csv_class_information.csv", 286, 10),
+            ("tissue_qc_parquet_polygons.parquet", 34346, 10),
+            ("tissue_segmentation_parquet_polygons.parquet", 39185, 10),
+            ("cell_classification_parquet_polygons.parquet", 5476364, 10),
         ]
         SPOT_0_EXPECTED_CELLS_CLASSIFIED = (39798, 10)
 
@@ -146,6 +155,9 @@
             ("tissue_segmentation_segmentation_map_image.tiff", 1783952, 10),
             ("tissue_segmentation_csv_class_information.csv", 446, 10),
             ("tissue_qc_csv_class_information.csv", 290, 10),
+            ("tissue_qc_parquet_polygons.parquet", 29049, 10),
+            ("tissue_segmentation_parquet_polygons.parquet", 56682, 10),
+            ("cell_classification_parquet_polygons.parquet", 838533, 10),
         ]
 
     case _:

From 17d9e1fa0bae5b167af5b0f762961b12d696a4a0 Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Tue, 19 May 2026 19:58:04 +0200
Subject: [PATCH 7/9] fix(tests): address PR review comments on parquet/GeoJSON
 parity checks

- Use pyarrow.parquet.read_metadata() instead of pd.read_parquet() to
  get row count from Parquet footer without loading polygon data
- Use ijson streaming to count GeoJSON features without loading the
  full feature array into memory
- Replace hard-coded file counts with len(SPOT_x_EXPECTED_RESULT_FILES)
  to avoid drift when the constants change
- Sync qupath/gui_test.py to use len(SPOT_0_EXPECTED_RESULT_FILES)
  instead of the stale literal 9
- Remove unused _build_minimal_wsi_input_item dead code from e2e_test.py
---
 tests/aignostics/application/cli_test.py | 15 +++++++++------
 tests/aignostics/application/gui_test.py | 15 ++++++++-------
 tests/aignostics/platform/e2e_test.py    | 17 -----------------
 tests/aignostics/qupath/gui_test.py      |  5 +++--
 4 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py
index 5d9a2fdc4..6cadfdde5 100644
--- a/tests/aignostics/application/cli_test.py
+++ b/tests/aignostics/application/cli_test.py
@@ -11,7 +11,8 @@
 from time import sleep
 from unittest.mock import MagicMock, patch
 
-import pandas as pd
+import ijson
+import pyarrow.parquet as pq
 import pytest
 from aignx.codegen.exceptions import ForbiddenException
 from aignx.codegen.exceptions import NotFoundException as ApiNotFound
@@ -1110,8 +1111,10 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
     results_dir = tmp_path / SPOT_1_FILENAME.replace(".tiff", "")
     assert results_dir.is_dir(), f"Expected directory {results_dir} not found"
     files_in_dir = list(results_dir.glob("*"))
-    assert len(files_in_dir) == 12, (
-        f"Expected 12 files in {results_dir}, but found {len(files_in_dir)}: {[f.name for f in files_in_dir]}"
+    expected_count = len(SPOT_1_EXPECTED_RESULT_FILES)
+    assert len(files_in_dir) == expected_count, (
+        f"Expected {expected_count} files in {results_dir}, but found {len(files_in_dir)}: "
+        f"{[f.name for f in files_in_dir]}"
     )
     print(f"Found files in {results_dir}:")
     for filename, expected_size, tolerance_percent in SPOT_1_EXPECTED_RESULT_FILES:
@@ -1141,9 +1144,9 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
     for parquet_filename, geojson_filename in parquet_geojson_pairs:
         parquet_path = results_dir / parquet_filename
         geojson_path = results_dir / geojson_filename
-        parquet_row_count = len(pd.read_parquet(parquet_path))
-        with geojson_path.open() as f:
-            geojson_feature_count = len(json.load(f)["features"])
+        parquet_row_count = pq.read_metadata(parquet_path).num_rows
+        with geojson_path.open("rb") as f:
+            geojson_feature_count = sum(1 for _ in ijson.items(f, "features.item"))
         assert parquet_row_count == geojson_feature_count, (
             f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) "
             f"and {geojson_filename} ({geojson_feature_count} features)"
diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py
index 05c799d2a..361cd419d 100644
--- a/tests/aignostics/application/gui_test.py
+++ b/tests/aignostics/application/gui_test.py
@@ -1,7 +1,6 @@
 """Tests to verify the GUI functionality of the application module."""
 
 import contextlib
-import json
 import re
 import tempfile
 from asyncio import sleep, to_thread
@@ -10,7 +9,8 @@
 from typing import TYPE_CHECKING
 from unittest.mock import AsyncMock, MagicMock, Mock, patch
 
-import pandas as pd
+import ijson
+import pyarrow.parquet as pq
 import pytest
 from nicegui.testing import User
 from typer.testing import CliRunner
@@ -442,8 +442,9 @@ async def test_gui_run_download(  # noqa: PLR0914, PLR0915
 
         # Check for files in the results directory
         files_in_results_dir = list(results_dir.glob("*"))
-        assert len(files_in_results_dir) == 12, (
-            f"Expected 12 files in {results_dir}, but found {len(files_in_results_dir)}: "
+        expected_count = len(SPOT_0_EXPECTED_RESULT_FILES)
+        assert len(files_in_results_dir) == expected_count, (
+            f"Expected {expected_count} files in {results_dir}, but found {len(files_in_results_dir)}: "
             f"{[f.name for f in files_in_results_dir]}"
         )
 
@@ -475,9 +476,9 @@ async def test_gui_run_download(  # noqa: PLR0914, PLR0915
         for parquet_filename, geojson_filename in parquet_geojson_pairs:
             parquet_path = results_dir / parquet_filename
             geojson_path = results_dir / geojson_filename
-            parquet_row_count = len(pd.read_parquet(parquet_path))
-            with geojson_path.open() as f:
-                geojson_feature_count = len(json.load(f)["features"])
+            parquet_row_count = pq.read_metadata(parquet_path).num_rows
+            with geojson_path.open("rb") as f:
+                geojson_feature_count = sum(1 for _ in ijson.items(f, "features.item"))
             assert parquet_row_count == geojson_feature_count, (
                 f"Row count mismatch between {parquet_filename} ({parquet_row_count} rows) "
                 f"and {geojson_filename} ({geojson_feature_count} features)"
diff --git a/tests/aignostics/platform/e2e_test.py b/tests/aignostics/platform/e2e_test.py
index 9324898ce..0f7699600 100644
--- a/tests/aignostics/platform/e2e_test.py
+++ b/tests/aignostics/platform/e2e_test.py
@@ -136,23 +136,6 @@ def _build_wsi_input_item(  # noqa: PLR0913, PLR0917
     )
 
 
-def _build_minimal_wsi_input_item(gs_url: str, crc32c: str, expires_seconds: int) -> platform.InputItem:
-    """Build a minimal WSI InputItem supplying only the CRC32C and image URL."""
-    return platform.InputItem(
-        external_id=gs_url,
-        input_artifacts=[
-            platform.InputArtifact(
-                name="whole_slide_image",
-                download_url=platform.generate_signed_url(url=gs_url, expires_seconds=expires_seconds),
-                metadata={
-                    "checksum_base64_crc32c": crc32c,
-                    "media_type": "image/tiff",
-                },
-            )
-        ],
-    )
-
-
 def _get_single_spot_payload_for_heta(expires_seconds: int) -> list[platform.InputItem]:
     """Generates a payload using a single spot."""
     return [
diff --git a/tests/aignostics/qupath/gui_test.py b/tests/aignostics/qupath/gui_test.py
index 01d9a1b6d..0fdd07a7a 100644
--- a/tests/aignostics/qupath/gui_test.py
+++ b/tests/aignostics/qupath/gui_test.py
@@ -257,8 +257,9 @@ async def test_gui_run_qupath_install_to_inspect(  # noqa: C901, PLR0912, PLR091
 
         # Check for files in the results directory
         files_in_results_dir = list(results_dir.glob("*"))
-        assert len(files_in_results_dir) == 9, (
-            f"Expected 9 files in {results_dir}, but found {len(files_in_results_dir)}: "
+        expected_count = len(SPOT_0_EXPECTED_RESULT_FILES)
+        assert len(files_in_results_dir) == expected_count, (
+            f"Expected {expected_count} files in {results_dir}, but found {len(files_in_results_dir)}: "
             f"{[f.name for f in files_in_results_dir]}"
         )
 

From 8d1ddfeb58417dbbdcfe9068752faf6f2c4ba8da Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Wed, 20 May 2026 11:23:24 +0200
Subject: [PATCH 8/9] fix(tests): add blank line after lazy pyarrow import for
 ruff format compliance

---
 tests/aignostics/application/cli_test.py | 3 ++-
 tests/aignostics/application/gui_test.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py
index 6cadfdde5..3bc3a9cf3 100644
--- a/tests/aignostics/application/cli_test.py
+++ b/tests/aignostics/application/cli_test.py
@@ -12,7 +12,6 @@
 from unittest.mock import MagicMock, patch
 
 import ijson
-import pyarrow.parquet as pq
 import pytest
 from aignx.codegen.exceptions import ForbiddenException
 from aignx.codegen.exceptions import NotFoundException as ApiNotFound
@@ -1141,6 +1140,8 @@ def test_cli_run_execute(runner: CliRunner, tmp_path: Path, record_property) ->
         ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"),
         ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"),
     ]
+    import pyarrow.parquet as pq
+
     for parquet_filename, geojson_filename in parquet_geojson_pairs:
         parquet_path = results_dir / parquet_filename
         geojson_path = results_dir / geojson_filename
diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py
index 361cd419d..205fa3d2d 100644
--- a/tests/aignostics/application/gui_test.py
+++ b/tests/aignostics/application/gui_test.py
@@ -10,7 +10,6 @@
 from unittest.mock import AsyncMock, MagicMock, Mock, patch
 
 import ijson
-import pyarrow.parquet as pq
 import pytest
 from nicegui.testing import User
 from typer.testing import CliRunner
@@ -473,6 +472,8 @@ async def test_gui_run_download(  # noqa: PLR0914, PLR0915
             ("tissue_segmentation_parquet_polygons.parquet", "tissue_segmentation_geojson_polygons.json"),
             ("cell_classification_parquet_polygons.parquet", "cell_classification_geojson_polygons.json"),
         ]
+        import pyarrow.parquet as pq
+
         for parquet_filename, geojson_filename in parquet_geojson_pairs:
             parquet_path = results_dir / parquet_filename
             geojson_path = results_dir / geojson_filename

From b8261e0517d2fb889a9d8ef536afbc645b518aaa Mon Sep 17 00:00:00 2001
From: Ari Angelo <hello@ari.nz>
Date: Wed, 20 May 2026 16:02:34 +0200
Subject: [PATCH 9/9] =?UTF-8?q?fix(tests):=20update=20stale=20assertions?=
 =?UTF-8?q?=20=E2=80=94=2016=20schemata=20files=20and=20SPOT=5F1=20file=20?=
 =?UTF-8?q?size=20constant?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/aignostics/application/cli_test.py | 2 +-
 tests/aignostics/application/gui_test.py | 3 ++-
 tests/aignostics/dataset/cli_test.py     | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/aignostics/application/cli_test.py b/tests/aignostics/application/cli_test.py
index 3bc3a9cf3..0a162298a 100644
--- a/tests/aignostics/application/cli_test.py
+++ b/tests/aignostics/application/cli_test.py
@@ -239,7 +239,7 @@ def test_cli_application_dump_schemata(runner: CliRunner, tmp_path: Path, record
         ],
     )
     assert result.exit_code == 0
-    assert "Zipped 11 files" in normalize_output(result.output)
+    assert "Zipped 16 files" in normalize_output(result.output)
     zip_file = sanitize_path(Path(tmp_path / f"{HETA_APPLICATION_ID}_{HETA_APPLICATION_VERSION}_schemata.zip"))
     assert zip_file.exists(), f"Expected zip file {zip_file} not found"
 
diff --git a/tests/aignostics/application/gui_test.py b/tests/aignostics/application/gui_test.py
index 205fa3d2d..5c81bcff2 100644
--- a/tests/aignostics/application/gui_test.py
+++ b/tests/aignostics/application/gui_test.py
@@ -32,6 +32,7 @@
     SPOT_0_FILESIZE,
     SPOT_0_GS_URL,
     SPOT_1_FILENAME,
+    SPOT_1_FILESIZE,
     SPOT_1_GS_URL,
 )
 
@@ -215,7 +216,7 @@ async def test_gui_download_dataset_via_application_to_run_cancel_to_find_back(
             assert SPOT_1_FILENAME in normalize_output(result.stdout)
             expected_file = Path(tmp_path) / SPOT_1_FILENAME
             assert expected_file.exists(), f"Expected file {expected_file} not found"
-            assert expected_file.stat().st_size == 14681750
+            assert expected_file.stat().st_size == SPOT_1_FILESIZE
 
             # Open the GUI and navigate to Atlas H&E-TME application
             await user.open("/")
diff --git a/tests/aignostics/dataset/cli_test.py b/tests/aignostics/dataset/cli_test.py
index 23e1f5236..0c9fdfe3c 100644
--- a/tests/aignostics/dataset/cli_test.py
+++ b/tests/aignostics/dataset/cli_test.py
@@ -11,7 +11,7 @@
 
 from aignostics.cli import cli
 from tests.conftest import normalize_output
-from tests.constants_test import SPOT_1_FILENAME, SPOT_1_GS_URL
+from tests.constants_test import SPOT_1_FILENAME, SPOT_1_FILESIZE, SPOT_1_GS_URL
 
 SERIES_UID = "1.3.6.1.4.1.5962.99.1.1069745200.1645485340.1637452317744.2.0"
 THUMBNAIL_UID = "1.3.6.1.4.1.5962.99.1.1038911754.1238045814.1637421484298.15.0"
@@ -149,7 +149,7 @@ def test_cli_aignostics_download_sample(runner: CliRunner, tmp_path: Path, recor
 
     expected_file = tmp_path / SPOT_1_FILENAME
     assert expected_file.exists(), f"Expected file {expected_file} not found"
-    assert expected_file.stat().st_size == 14681750
+    assert expected_file.stat().st_size == SPOT_1_FILESIZE
 
 
 @pytest.mark.integration