System tests (globaldothealth#36)

* Add system tests * Add chromedriver to ci action * Check if insightboard config exists in system tests * Add sleep before asserts in system tests * Refine system tests * Wait for InsightBoard to start in system tests * Migrate from gunicorn to waitress for cross-platform compatibility * Wait for InsightBoard to start before opening webbrowser / running system tests * Take screenshot if system test fails * Resolve temporary file access permissions (Windows)
jsbrittain · Oct 9, 2024 · c5741d0 · c5741d0
1 parent 0c3598f
commit c5741d0
Show file tree

Hide file tree

Showing 25 changed files with 737 additions and 149 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -24,10 +24,19 @@ jobs:
             3.12
       - name: Install the latest version of uv
         uses: astral-sh/setup-uv@v2
-      - name: Run tests
+
+      - name: Setup test environment
         run: |
           uv sync --all-extras
-          uv run pytest --cov=InsightBoard --cov-report=html tests/
+          uv pip install "adtl[parquet] @ git+https://github.com/globaldothealth/adtl"
+
+      - name: Unit tests (and coverage report)
+        run: |
+          uv run pytest --cov=InsightBoard --cov-report=html tests/unit
+      - name: System tests
+        run: |
+          uv run pytest tests/system
+
       - name: Upload coverage report
         uses: actions/upload-artifact@v4
         if: ${{ matrix.os == 'ubuntu-latest' }}
@@ -36,4 +45,22 @@ jobs:
           path: htmlcov
       - name: Add coverage report to action summary
         if: ${{ matrix.os == 'ubuntu-latest' }}
-        run: cat htmlcov/index.html > $GITHUB_STEP_SUMMARY
+        run: cat htmlcov/index.html >> $GITHUB_STEP_SUMMARY
+
+      - name: Return screenshot (on system test failure)
+        if: failure()
+        shell: bash
+        run: |
+          # Check if screenshot exists, then convert to base64 and embed in summary
+          if [ -f screenshot.png ]; then
+            python -c "import base64; print(base64.b64encode(open('screenshot.png', 'rb').read()).decode('utf-8'))" > screenshot_base64.txt
+            echo "### Screenshot of Failed Test" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo '<img src="data:image/png;base64,'$(cat screenshot_base64.txt)'" alt="Screenshot"/>' >> $GITHUB_STEP_SUMMARY
+          fi
+      - name: Upload screenshot as artifact
+        if: failure()
+        uses: actions/upload-artifact@v3
+        with:
+          name: failed-test-screenshot
+          path: screenshot.png
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,12 +8,12 @@ dependencies = [
     "dash-table>=5.0.0",
     "pandas>=2.2.3",
     "dash-bootstrap-components>=1.6.0",
-    "gunicorn>=23.0.0",
     "pyarrow>=17.0.0",
     "openpyxl>=3.1.5",
     "jsonschema>=4.23.0",
     "dash-dangerously-set-inner-html>=0.0.2",
     "tomli-w>=1.0.0",
+    "waitress>=3.0.0",
 ]
 scripts = { InsightBoard = "InsightBoard:main" }
 dynamic = ["version"]
@@ -29,6 +29,8 @@ build-backend = "setuptools.build_meta"
 [tool.uv]
 package = true
 dev-dependencies = [
+    "chromedriver-py>=129.0.6668.89",
     "pytest-cov>=5.0.0",
     "pytest>=8.3.3",
+    "selenium>=4.25.0",
 ]
diff --git a/src/InsightBoard/__init__.py b/src/InsightBoard/__init__.py
@@ -1,17 +1,38 @@
+import os
+import time
+import socket
 import subprocess
 import webbrowser
 from .app import app  # noqa: F401
 
 from .version import __version__  # noqa: F401
 
+INSIGHTBOARD_PORT = os.getenv("INSIGHTBOARD_PORT", 8050)
+INSIGHTBOARD_TIMEOUT = os.getenv("INSIGHTBOARD_TIMEOUT", 30)
 
-def main(debug=False):
-    port = 8050
+
+def launch_app() -> subprocess.Popen:
     cmd = [
-        "gunicorn",
+        "waitress-serve",
+        f"--listen=0.0.0.0:{INSIGHTBOARD_PORT}",
         "InsightBoard.app:server",
-        *["--bind", f"0.0.0.0:{port}"],
     ]
-    process = subprocess.Popen(cmd)
-    webbrowser.open(f"http://127.0.0.1:{port}")
+    return subprocess.Popen(cmd)
+
+
+def wait_for_server(port=INSIGHTBOARD_PORT, timeout=INSIGHTBOARD_TIMEOUT) -> bool:
+    start_time = time.time()
+    while time.time() - start_time < timeout:
+        try:
+            with socket.create_connection(("127.0.0.1", port), timeout=1):
+                return True  # Server is up
+        except (OSError, ConnectionRefusedError):
+            time.sleep(1)
+    raise TimeoutError(f"Server did not start within {timeout} seconds")
+
+
+def main(debug=False):
+    process = launch_app()
+    wait_for_server(INSIGHTBOARD_PORT, INSIGHTBOARD_TIMEOUT)
+    webbrowser.open(f"http://127.0.0.1:{INSIGHTBOARD_PORT}")
     process.wait()
diff --git a/src/InsightBoard/parsers/__init__.py b/src/InsightBoard/parsers/__init__.py
@@ -1,6 +1,7 @@
 import shutil
 import subprocess
 import pandas as pd
+from pathlib import Path
 from tempfile import NamedTemporaryFile
 
 try:
@@ -35,9 +36,10 @@ def adtl(df: pd.DataFrame, specification: str, *cl_args) -> dict:
     """
     adtl_check_command()
     # Write pandas dataframe to temp file, then run adtl
-    with NamedTemporaryFile(suffix=".csv") as input_csv:
+    with NamedTemporaryFile(suffix=".csv", delete=False) as input_csv:
         df.to_csv(input_csv.name, index=False)
-        result = subprocess.run(["adtl", specification, input_csv.name, *cl_args])
+    result = subprocess.run(["adtl", specification, input_csv.name, *cl_args])
+    Path(input_csv.name).unlink()
 
     return {
         "stdout": result.stdout,
@@ -52,20 +54,23 @@ def parse_adtl(df: pd.DataFrame, spec_file, table_names) -> list[dict]:
     parser = adtl_parser.Parser(spec_file)
 
     # Write the dataframe to a temporary file and load it into ADTL
-    with NamedTemporaryFile(suffix=".csv") as source_temp_file:
+    with NamedTemporaryFile(suffix=".csv", delete=False) as source_temp_file:
         df.to_csv(source_temp_file.name)
-        parsed = parser.parse(source_temp_file.name)
+    parsed = parser.parse(source_temp_file.name)
+    Path(source_temp_file.name).unlink()
 
     # Write the parsed data to a temporary file and load it into a pandas dataframe
     dfs = []
     for table_name in table_names:
-        with NamedTemporaryFile(suffix=".csv") as parsed_temp_file:
+        with NamedTemporaryFile(suffix=".csv", delete=False) as parsed_temp_file:
             parsed.write_csv(table_name, parsed_temp_file.name)
-            df = pd.read_csv(parsed_temp_file.name)
-            # Drop ADTL-specific columns
-            df.drop(columns=["adtl_valid", "adtl_error"], inplace=True)
-            # Append the dataframe to the list
-            dfs.append(df)
+        df = pd.read_csv(parsed_temp_file.name)
+        # Drop ADTL-specific columns
+        df.drop(columns=["adtl_valid", "adtl_error"], inplace=True)
+        # Append the dataframe to the list
+        dfs.append(df)
+        # Remove temporary file
+        Path(parsed_temp_file.name).unlink()
 
     return [
         {

diff --git a/tests/system/.gitignore b/tests/system/.gitignore
@@ -0,0 +1 @@
+chromedriver
diff --git a/tests/system/InsightBoard/projects/a_first_project_in_list/touch b/tests/system/InsightBoard/projects/a_first_project_in_list/touch
diff --git a/tests/system/InsightBoard/projects/sample_project/data/sample_data.csv b/tests/system/InsightBoard/projects/sample_project/data/sample_data.csv
diff --git a/tests/system/InsightBoard/projects/sample_project/data/sample_data_source1.csv b/tests/system/InsightBoard/projects/sample_project/data/sample_data_source1.csv
@@ -0,0 +1,21 @@
+CaseNumber,PersonAge,Sex,City,OnsetDate,ReportedSymptoms,HealthOutcome,VaxStatus,RecoveryDays,PreexistingConditions
+001,34,Male,New York,08/10/2023,"fever, cough, headache",recovered,yes,10,None
+002,27,F,L.A.,09/08/2023,"fever, cough, fatigue",recovered,No,15,Athma
+003,55,Female,Chicago,10/08/2023,"short breath, fatigue",Deceased,no,N/A,Diabeties
+004,40,M,Houston,11/08/2023,"fever, cough",Recovered,Yes,12,High BP
+005,62,Other,Miami,12/08/2023,"fatigue, muscle pain, fever",Died,no,N/A,None
+006,18,F,Seattle,"13/08/2023","fever, cough",Recov,Partial,8,None
+007,70,M,Atlanta,14/08/2023,"fever, shortness of breath",Dead,None,N/A,Lung issue
+008,50,Female,SF,15/08/2023,"fever, headache",Recovered,yes,11,HBP
+009,44,Female,Boston,16/08/2023,"fatigue, muscle ache",recovered,Yes,13,None
+010,29,Male,Denver,17/08/2023,"fever, coughing",recovered,Partial,9,None
+011,23,Male,LV,18/08/2023,"fatigue, muscle pain",Recov,none,14,None
+012,37,F,Dallas,19/08/2023,"fever, cough, fatigue",Recovd,YES,10,None
+013,60,Male,NewYork,20/08/2023,"fever, short breath",Died,NO,N/A,Heart problems
+014,47,O,S.D.,21/08/2023,"fever, coughing",recovered,yes,12,None
+015,33,Female,Chicago,22/08/2023,"fever, headache",recovered,no,11,None
+016,25,Male,Philly,23/08/2023,"fatigue, muscle pain",Recov,Partial,8,None
+017,71,F,Detroit,24/08/2023,"short breath",Dead,No,N/A,Kidney Disease
+018,38,F,Houston,25/08/2023,"fever, coughing",recovered,yes,9,None
+019,55,Male,L.A.,26/08/2023,"fever, fatigue",Recov,None,15,Asthma
+020,65,Female,NYC,27/08/2023,"short breath",Dead,No,N/A,Diabetes
diff --git a/tests/system/InsightBoard/projects/sample_project/parsers/adtl-source1.py b/tests/system/InsightBoard/projects/sample_project/parsers/adtl-source1.py
@@ -0,0 +1,27 @@
+from tempfile import NamedTemporaryFile
+import pandas as pd
+from pathlib import Path
+import adtl
+from InsightBoard.parsers import parse_adtl
+
+SPECIFICATION_FILE = Path("adtl") / "source1.toml"
+TABLE_NAME = "linelist"
+
+
+def parse(df: pd.DataFrame) -> list[dict]:
+    spec_file = Path(__file__).parent / SPECIFICATION_FILE
+    return parse_adtl(df, spec_file, [TABLE_NAME])
+
+
+def test_parse():
+    print("Test: Parse")
+    data_file = Path(__file__).parent.parent / "data" / "sample_data_source1.csv"
+    orig_df = pd.read_csv(data_file)
+    rtn = parse(orig_df)
+    df = rtn[0]["data"]
+    assert isinstance(df, pd.DataFrame)
+    print("Test: Parse - Passed")
+
+
+if __name__ == "__main__":
+    test_parse()
diff --git a/tests/system/InsightBoard/projects/sample_project/parsers/adtl/source1.toml b/tests/system/InsightBoard/projects/sample_project/parsers/adtl/source1.toml
@@ -0,0 +1,75 @@
+[adtl]
+  name = "source1"
+  description = "Parses the sample dataset"
+  defaultDateFormat = "%d/%m/%Y"
+
+  [adtl.tables]
+    linelist = { kind = "oneToOne", schema = "../../schemas/linelist.schema.json" }
+
+[linelist]
+
+  [linelist."Case ID"]
+    field = "CaseNumber"
+
+  [linelist.Age]
+    field = "PersonAge"
+
+  [linelist.Gender]
+    field = "Sex"
+    ignoreMissingKey = true
+
+    [linelist.Gender.values]
+        "Male" = "Male"
+        "Female" = "Female"
+        "Other" = "Other"
+        "M" = "Male"
+        "F" = "Female"
+        "O" = "Other"
+
+  [linelist.Location]
+    field = "City"
+
+  [linelist."Date of Onset"]
+    field = "OnsetDate"
+    ignoreMissingKey = true
+
+  [linelist.Symptoms]
+    field = "ReportedSymptoms"
+
+  [linelist.Outcome]
+    field = "HealthOutcome"
+    ignoreMissingKey = true
+
+    [linelist.Outcome.values]
+        "Recovered" = "Recovered"
+        "Deceased" = "Deceased"
+        "Death" = "Deceased"
+        "Dead" = "Deceased"
+        "Died" = "Deceased"
+        "recovered" = "Recovered"
+        "Recov" = "Recovered"
+
+  [linelist."Vaccination Status"]
+    field = "VaxStatus"
+    ignoreMissingKey = true
+
+    [linelist."Vaccination Status".values]
+        "yes" = "Yes"
+        "no" = "No"
+        "Yes" = "Yes"
+        "No" = "No"
+        "YES" = "Yes"
+        "NO" = "No"
+        "none" = "No"
+        "Partial" = "Partial"
+        "Vaccinated" = "Yes"
+        "Not Vaccinated" = "No"
+        "V" = "Yes"
+        "N" = "No"
+        "null" = "Unknown"
+
+  [linelist."Days to Recovery"]
+    field = "RecoveryDays"
+
+  [linelist."Underlying Conditions"]
+    field = "PreexistingConditions"
diff --git a/tests/system/InsightBoard/projects/sample_project/parsers/test_parser2.py b/tests/system/InsightBoard/projects/sample_project/parsers/test_parser2.py
diff --git a/tests/system/InsightBoard/projects/sample_project/schemas/linelist.schema.json b/tests/system/InsightBoard/projects/sample_project/schemas/linelist.schema.json
@@ -0,0 +1,58 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "properties": {
+    "Case ID": {
+      "type": "integer",
+      "description": "Unique identifier for each case",
+      "PrimaryKey": true
+    },
+    "Age": {
+      "type": "integer",
+      "description": "Age of the patient",
+      "minimum": 0
+    },
+    "Gender": {
+      "type": "string",
+      "description": "Gender of the patient",
+      "enum": ["Male", "Female", "Other"]
+    },
+    "Location": {
+      "type": "string",
+      "description": "City or region where the case was reported"
+    },
+    "Date of Onset": {
+      "type": "string",
+      "format": "date",
+      "description": "Date when symptoms first appeared"
+    },
+    "Symptoms": {
+      "type": ["array", "null"],
+      "description": "List of symptoms exhibited by the patient",
+      "items": {
+        "type": "string"
+      }
+    },
+    "Outcome": {
+      "type": ["string", "null"],
+      "description": "Final outcome for the patient (recovered or deceased)",
+      "enum": ["Recovered", "Deceased", null]
+    },
+    "Vaccination Status": {
+      "type": ["string", "null"],
+      "description": "Vaccination status of the patient",
+      "enum": ["Yes", "No", "Partial", "Unknown"]
+    },
+    "Days to Recovery": {
+      "type": ["integer", "null"],
+      "description": "Number of days to recover, null for deceased cases"
+    },
+    "Underlying Conditions": {
+      "type": ["string", "null"],
+      "description": "Pre-existing health conditions of the patient",
+      "enum": ["None", "Asthma", "Diabetes", "Hypertension", "Heart Disease", "Chronic Lung Disease", "Chronic Kidney Disease", null]
+    }
+  },
+  "required": ["Case ID", "Age", "Gender", "Location", "Date of Onset"],
+  "additionalProperties": false
+}
diff --git a/tests/system/InsightBoard/projects/z_last_project_in_list/touch b/tests/system/InsightBoard/projects/z_last_project_in_list/touch
diff --git a/tests/system/test_e2e.py b/tests/system/test_e2e.py
@@ -0,0 +1,41 @@
+import time
+import pytest
+from pathlib import Path
+from utils import (
+    driver,
+    page_upload,
+    chromedriver_present,
+    save_screenshot,
+)
+
+
+@pytest.mark.skipif(not chromedriver_present, reason="chromedriver not present")
+def test_insightboard(driver):
+    upload = page_upload(driver)
+    upload.clear_data()
+    try:
+        upload.select_parser("adtl-source1")
+        data_file = (
+            Path(__file__).parent
+            / "InsightBoard"
+            / "projects"
+            / "sample_project"
+            / "data"
+            / "sample_data_source1.csv"
+        )
+        assert data_file.exists()
+        upload.select_data_file(str(data_file))
+        upload.parse()
+        time.sleep(1)
+        upload.check_DataTable_row_count(20)
+        # Only show validation errors: Check that the validated rows are hidden
+        upload.toggle_only_show_validation_errors()
+        time.sleep(1)
+        upload.check_DataTable_row_count(10)
+        # Revert to showing all rows
+        upload.toggle_only_show_validation_errors()
+        time.sleep(1)
+        upload.check_DataTable_row_count(20)
+    except Exception as e:
+        screenshot_path = save_screenshot(driver)
+        raise Exception(f"Screenshot saved to: {screenshot_path}") from e