Skip to content

Commit

Permalink
Merge pull request #295 from smart-on-fhir/mikix/nlp-regression
Browse files Browse the repository at this point in the history
ci: add full stack NLP regression test
  • Loading branch information
mikix authored Jan 12, 2024
2 parents 15b3f51 + b143b5d commit 0d0db81
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 9 deletions.
47 changes: 44 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
python-version: ["3.10"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
Expand All @@ -37,7 +37,7 @@ jobs:
pip install .[tests]
- name: Check out MS tool
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: microsoft/Tools-for-Health-Data-Anonymization
path: mstool
Expand All @@ -57,10 +57,51 @@ jobs:
run: |
python -m pytest
nlp-regression:
runs-on: ubuntu-latest
env:
UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}
steps:
- uses: actions/checkout@v4

- name: Install Docker
uses: docker/setup-buildx-action@v3

- name: Build ETL image
uses: docker/build-push-action@v5
with:
load: true # just build, no push
tags: smartonfhir/cumulus-etl:latest

- name: Download NLP images
run: docker compose --profile covid-symptom up -d --quiet-pull

- name: Run NLP
run: |
export DATADIR=$(realpath tests/data/nlp-regression)
# Run the NLP task
docker compose run --rm \
--volume $DATADIR:/in \
cumulus-etl \
/in/input \
/in/run-output \
/in/phi \
--output-format=ndjson \
--task covid_symptom__nlp_results
# Compare results
export OUTDIR=$DATADIR/run-output/covid_symptom__nlp_results
sudo chown -R $(id -u) $OUTDIR
sed -i 's/"generated_on": "[^"]*", //g' $OUTDIR/*.ndjson
diff -upr $DATADIR/expected-output $OUTDIR
echo "All Good!"
lint:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Install linters
# black is synced with the .pre-commit-hooks version
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/docker-hub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,23 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3

- name: Get Docker metadata
id: meta
uses: docker/metadata-action@v4
uses: docker/metadata-action@v5
with:
flavor: latest=true
images: smartonfhir/cumulus-etl

- name: Log in to Docker Hub
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build and push image to Docker Hub
uses: docker/build-push-action@v4
uses: docker/build-push-action@v5
with:
push: true
platforms: |
Expand Down
2 changes: 1 addition & 1 deletion compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ services:
ctakes-covid-base:
image: smartonfhir/ctakes-covid:1.1.0
environment:
- ctakes_umlsuser=umls_api_key
- ctakes_umlsuser=umls_api_key
- ctakes_umlspw=$UMLS_API_KEY
networks:
- cumulus-etl
Expand Down
7 changes: 6 additions & 1 deletion cumulus_etl/etl/studies/covid_symptom/covid_ctakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ async def covid_symptoms_extract(
def is_covid_match(m: ctakesclient.typesystem.MatchText):
return bool(covid_symptom_cuis.intersection({attr.cui for attr in m.conceptAttributes}))

matches = list(filter(is_covid_match, matches))
matches = filter(is_covid_match, matches)

# For better reliability when regression/unit testing, sort matches by begin / first code.
# (With stable sorting, we want the primary sort to be done last.)
matches = sorted(matches, key=lambda x: x.conceptAttributes and x.conceptAttributes[0].code)
matches = sorted(matches, key=lambda x: x.begin)

# OK we have cTAKES symptoms. But let's also filter through cNLP transformers to remove any that are negated
# there too. We have found this to yield better results than cTAKES alone.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"groups": [
"032b2ff6af8c883760d5a44e32ff80454d69551de6438c46be64604ddc744156",
"05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e",
"13e748c21a7c50f6c59fc4613683cd5d7f76bd5d68fda20f4e81ccce74ea7930",
"364aa545eca0a9744bc67c5ad914e2e9e35dd39a5c1f1a8f902e533a8641238d",
"36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{"id": "032b2ff6af8c883760d5a44e32ff80454d69551de6438c46be64604ddc744156.0", "docref_id": "032b2ff6af8c883760d5a44e32ff80454d69551de6438c46be64604ddc744156", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 608, "end": 615, "text": "fatigue", "polarity": 0, "conceptAttributes": [{"code": "248274002", "cui": "C0015672", "codingScheme": "SNOMEDCT_US", "tui": "T184"}, {"code": "84229001", "cui": "C0015672", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "032b2ff6af8c883760d5a44e32ff80454d69551de6438c46be64604ddc744156.1", "docref_id": "032b2ff6af8c883760d5a44e32ff80454d69551de6438c46be64604ddc744156", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 608, "end": 615, "text": "fatigue", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0015672", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "032b2ff6af8c883760d5a44e32ff80454d69551de6438c46be64604ddc744156.2", "docref_id": "032b2ff6af8c883760d5a44e32ff80454d69551de6438c46be64604ddc744156", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 812, "end": 821, "text": "headaches", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0018681", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e.0", "docref_id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 6, "end": 14, "text": "Headache", "polarity": 0, "conceptAttributes": [{"code": "25064002", "cui": "C0018681", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e.1", "docref_id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 6, "end": 14, "text": "Headache", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0018681", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e.2", "docref_id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 114, "end": 133, "text": "nausea and vomiting", "polarity": 0, "conceptAttributes": [{"code": "16932000", "cui": "C0027498", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e.3", "docref_id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 114, "end": 133, "text": "nausea and vomiting", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0027498", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e.4", "docref_id": "05d0686aec0a65069a1e5b1a4937f5196b75ae336b7fbe10300882184523f95e", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 603, "end": 611, "text": "fatigued", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0015672", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "13e748c21a7c50f6c59fc4613683cd5d7f76bd5d68fda20f4e81ccce74ea7930.2", "docref_id": "13e748c21a7c50f6c59fc4613683cd5d7f76bd5d68fda20f4e81ccce74ea7930", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 303, "end": 318, "text": "short of breath", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0013404", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8.0", "docref_id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 343, "end": 348, "text": "cough", "polarity": 0, "conceptAttributes": [{"code": "263731006", "cui": "C0010200", "codingScheme": "SNOMEDCT_US", "tui": "T184"}, {"code": "49727002", "cui": "C0010200", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8.1", "docref_id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 343, "end": 348, "text": "cough", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0010200", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8.2", "docref_id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 350, "end": 356, "text": "fevers", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0015967", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8.3", "docref_id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 372, "end": 378, "text": "chills", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0085593", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8.4", "docref_id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 1536, "end": 1541, "text": "fever", "polarity": 0, "conceptAttributes": [{"code": "386661006", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}, {"code": "50177009", "cui": "C0015967", "codingScheme": "SNOMEDCT_US", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8.5", "docref_id": "36ecd07bc327bba4e5ea36e34e66ca7f4f54360aef5bbcafc745c9f144aa87f8", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4, "match": {"begin": 1536, "end": 1541, "text": "fever", "polarity": 0, "conceptAttributes": [{"code": "n/a", "cui": "C0015967", "codingScheme": "custom", "tui": "T184"}], "type": "SignSymptomMention"}}
{"id": "364aa545eca0a9744bc67c5ad914e2e9e35dd39a5c1f1a8f902e533a8641238d.0", "docref_id": "364aa545eca0a9744bc67c5ad914e2e9e35dd39a5c1f1a8f902e533a8641238d", "encounter_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "subject_id": "827db3458e3d956437c2b43f441eca441851c2f2e937e2c5467fdd0c5f980db5", "task_version": 4}
Loading

0 comments on commit 0d0db81

Please sign in to comment.