From ddf5d4fd7e29d36fe7dcb133fef79fbd066d2a81 Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Fri, 5 Jul 2024 14:50:25 -0400 Subject: [PATCH] tests: add some CLI tests to improve coverage --- .github/workflows/ci.yaml | 2 +- cumulus_etl/cli.py | 4 +-- tests/etl/base.py | 4 ++- tests/etl/test_etl_cli.py | 14 ++++++++ tests/test_cli.py | 29 ++++++++++++++-- tests/upload_notes/test_upload_cli.py | 49 +++++++++++++++++++++++++-- 6 files changed, 93 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 81645751..8b5a186a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -78,7 +78,7 @@ jobs: uses: docker/setup-buildx-action@v3 - name: Build ETL image - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: load: true # just build, no push tags: smartonfhir/cumulus-etl:latest diff --git a/cumulus_etl/cli.py b/cumulus_etl/cli.py index 865e6dd5..c5a1654b 100644 --- a/cumulus_etl/cli.py +++ b/cumulus_etl/cli.py @@ -80,8 +80,8 @@ async def main(argv: list[str]) -> None: def main_cli(): - asyncio.run(main(sys.argv[1:])) + asyncio.run(main(sys.argv[1:])) # pragma: no cover if __name__ == "__main__": - main_cli() + main_cli() # pragma: no cover diff --git a/tests/etl/base.py b/tests/etl/base.py index 85647ac2..c19addac 100644 --- a/tests/etl/base.py +++ b/tests/etl/base.py @@ -58,15 +58,17 @@ async def run_etl( export_group: str = "test-group", export_timestamp: str = "2020-10-13T12:00:20-05:00", write_completion: bool = True, + skip_init_checks: bool = True, ) -> None: args = [ input_path or self.input_path, output_path or self.output_path, phi_path or self.phi_path, - "--skip-init-checks", f"--input-format={input_format}", f"--ctakes-overrides={self.ctakes_overrides.name}", ] + if skip_init_checks: + args.append("--skip-init-checks") if export_group is not None: args.append(f"--export-group={export_group}") if export_timestamp: diff --git a/tests/etl/test_etl_cli.py b/tests/etl/test_etl_cli.py index 7927e153..da5149cf 100644 --- a/tests/etl/test_etl_cli.py +++ b/tests/etl/test_etl_cli.py @@ -174,6 +174,20 @@ async def test_bulk_no_auth(self): await self.run_etl(input_path="https://localhost:12345/", tasks=["patient"]) self.assertEqual(errors.BULK_EXPORT_FAILED, cm.exception.code) + async def test_no_ms_tool(self): + """Verify that we require the MS tool to be in PATH.""" + self.patch_dict(os.environ, {"PATH": "/nothing-here"}) + with self.assertRaises(SystemExit) as cm: + await self.run_etl(skip_init_checks=False) + self.assertEqual(cm.exception.code, errors.MSTOOL_MISSING) + + @mock.patch("cumulus_etl.etl.tasks.basic_tasks.ProcedureTask.init_check") + async def test_task_init_checks(self, mock_check): + """Verify that we check task requirements.""" + mock_check.side_effect = ZeroDivisionError + with self.assertRaises(ZeroDivisionError): + await self.run_etl(skip_init_checks=False) + @ddt.data( # First line is CLI args # Second line is what loader will represent as the group/time diff --git a/tests/test_cli.py b/tests/test_cli.py index 90c9b08f..b6a41f9b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,12 +1,13 @@ -"""Tests for cli.py""" +"""Tests for cli.py and cli_utils.py""" import contextlib import io +import socket from unittest import mock import ddt -from cumulus_etl import cli +from cumulus_etl import cli, cli_utils from tests.utils import AsyncTestCase @@ -47,3 +48,27 @@ async def test_routing(self, argv, main_method): await cli.main(argv) self.assertTrue(mock_main.called) + + +class TestCumulusCLIUtils(AsyncTestCase): + """ + Unit tests for our CLI helpers. + """ + + async def test_url_is_unresolvable(self): + """Verify that a hostname that doesn't exist is immediately failed""" + # First test the plumbing is real and work + self.assertFalse(cli_utils.is_url_available("http://nope.invalid")) + + # Now mock the plumbing to confirm we don't retry + with mock.patch("socket.create_connection") as mock_conn: + mock_conn.side_effect = socket.gaierror + self.assertFalse(cli_utils.is_url_available("http://nope.invalid")) + self.assertEqual(mock_conn.call_count, 1) + + @mock.patch("socket.create_connection", side_effect=ConnectionRefusedError) + @mock.patch("time.sleep", new=lambda x: None) # don't sleep during retries + async def test_url_is_not_ready(self, mock_conn): + """Verify that a refused connection is retried""" + self.assertFalse(cli_utils.is_url_available("http://nope.invalid")) + self.assertEqual(mock_conn.call_count, 6) diff --git a/tests/upload_notes/test_upload_cli.py b/tests/upload_notes/test_upload_cli.py index 1ec949c1..e638e3d8 100644 --- a/tests/upload_notes/test_upload_cli.py +++ b/tests/upload_notes/test_upload_cli.py @@ -81,6 +81,7 @@ async def run_upload_notes( philter=None, no_philter=None, overwrite=False, + skip_init_checks=True, ) -> None: args = [ "upload-notes", @@ -92,8 +93,9 @@ async def run_upload_notes( f"--export-to={self.export_path}", "--ls-project=21", f"--ls-token={self.token_path}", - "--skip-init-checks", ] + if skip_init_checks: + args += ["--skip-init-checks"] if anon_docrefs: args += ["--anon-docrefs", anon_docrefs] if docrefs: @@ -160,8 +162,10 @@ def mock_search_url(respx_mock: respx.MockRouter, patient: str, doc_ids: Iterabl respx_mock.get(f"https://localhost/DocumentReference?patient={patient}&_elements=content").respond(json=bundle) @staticmethod - def mock_read_url(respx_mock: respx.MockRouter, doc_id: str, code: int = 200, **kwargs) -> None: - docref = TestUploadNotes.make_docref(doc_id, **kwargs) + def mock_read_url( + respx_mock: respx.MockRouter, doc_id: str, code: int = 200, docref: dict = None, **kwargs + ) -> None: + docref = docref or TestUploadNotes.make_docref(doc_id, **kwargs) respx_mock.get(f"https://localhost/DocumentReference/{doc_id}").respond(status_code=code, json=docref) @staticmethod @@ -451,3 +455,42 @@ async def test_grouped_encounter_offsets(self): self.assertEqual("doc?", note.text[match2c[0] : match2c[1]]) spans = {x.span().key() for x in note.ctakes_matches} self.assertEqual({match1a, match1b, match1c, match2a, match2b, match2c}, spans) + + async def test_docrefs_without_encounters_are_skipped(self): + with tempfile.TemporaryDirectory() as tmpdir: + with common.NdjsonWriter(f"{tmpdir}/DocumentReference.ndjson") as writer: + # One docref without any context/encounter info + docref1 = self.make_docref("D1") + del docref1["context"] + writer.write(docref1) + + # And one with all the normal goodies + writer.write(self.make_docref("D2")) + + await self.run_upload_notes(input_path=tmpdir) + + self.assertEqual({"D1", "D2"}, self.get_exported_ids()) + self.assertEqual({"D2"}, self.get_pushed_ids()) + + @mock.patch("cumulus_etl.nlp.restart_ctakes_with_bsv", new=lambda *_: False) + async def test_nlp_restart_failure_is_noticed(self): + with self.assertRaises(SystemExit) as cm: + await self.run_upload_notes() + self.assertEqual(cm.exception.code, errors.CTAKES_OVERRIDES_INVALID) + + @mock.patch("cumulus_etl.nlp.check_ctakes") + @mock.patch("cumulus_etl.nlp.check_negation_cnlpt") + @mock.patch("cumulus_etl.cli_utils.is_url_available") + async def test_init_checks(self, mock_url, mock_cnlpt, mock_ctakes): + # Start with error case for our URL check (against label studio) + mock_url.return_value = False + with self.assertRaises(SystemExit) as cm: + await self.run_upload_notes(skip_init_checks=False) + self.assertEqual(mock_ctakes.call_count, 1) + self.assertEqual(mock_cnlpt.call_count, 1) + self.assertEqual(mock_url.call_count, 1) + self.assertEqual(cm.exception.code, errors.LABEL_STUDIO_MISSING) + + # Let the URL pass and confirm we now run successfully + mock_url.return_value = True + await self.run_upload_notes(skip_init_checks=False)