Skip to content

ci: add full stack NLP regression test #869

ci: add full stack NLP regression test

ci: add full stack NLP regression test #869

Workflow file for this run

name: CI
on:
pull_request:
push:
branches:
- main
paths-ignore:
- 'docs/**'
- '*.md'
# The goal here is to cancel older workflows when a PR is updated (because it's pointless work)
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
jobs:
# unittest:
# name: unit tests
# runs-on: ubuntu-22.04
# strategy:
# matrix:
# # while we are still private, don't go crazy with the Python versions as they eat up CI minutes
# python-version: ["3.10"]
#
# steps:
# - uses: actions/checkout@v4
#
# - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v4
# with:
# python-version: ${{ matrix.python-version }}
#
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install pytest
# pip install .[tests]
#
# - name: Check out MS tool
# uses: actions/checkout@v4
# with:
# repository: microsoft/Tools-for-Health-Data-Anonymization
# path: mstool
#
# - name: Build MS tool
# run: |
# sudo apt-get update
# sudo apt-get install dotnet6
# dotnet publish \
# --runtime=linux-x64 \
# --configuration=Release \
# -p:PublishSingleFile=true \
# --output=$HOME/.local/bin \
# mstool/FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool
#
# - name: Test with pytest
# run: |
# python -m pytest
nlp-regression:
runs-on: ubuntu-latest
env:
UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}
steps:
- uses: actions/checkout@v4
- name: Install Docker
uses: docker/setup-buildx-action@v3
- name: Build ETL image
uses: docker/build-push-action@v5
with:
load: true # just build, no push
tags: smartonfhir/cumulus-etl:latest
- name: Download NLP images
run: docker compose --profile covid-symptom up -d --quiet-pull
- name: Run NLP
run: |
export DATADIR=$(realpath tests/data/nlp-regression)
# Run the NLP task
docker compose run --rm \
--volume $DATADIR:/in \
cumulus-etl \
/in/input \
/in/run-output \
/in/phi \
--output-format=ndjson \
--task covid_symptom__nlp_results
# Compare results
export OUTDIR=$DATADIR/run-output/covid_symptom__nlp_results
sudo chown -R $(id -u) $OUTDIR
sed -i 's/"generated_on": "[^"]*", //g' $OUTDIR/*.ndjson
diff -upr $DATADIR/expected-output $OUTDIR
# lint:
# runs-on: ubuntu-22.04
# steps:
# - uses: actions/checkout@v4
#
# - name: Install linters
# # black is synced with the .pre-commit-hooks version
# run: |
# python -m pip install --upgrade pip
# pip install bandit[toml] pycodestyle pylint black==23.11.0
#
# - name: Run pycodestyle
# # E203: pycodestyle is a little too rigid about slices & whitespace
# # See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices
# # W503: a default ignore that we are restoring
# run: |
# pycodestyle --max-line-length=120 --ignore=E203,W503 .
#
# - name: Run pylint
# if: success() || failure() # still run pylint if above checks fail
# run: |
# pylint cumulus_etl tests
#
# - name: Run bandit
# if: success() || failure() # still run bandit if above checks fail
# run: |
# bandit -c pyproject.toml -r .
#
# - name: Run black
# if: success() || failure() # still run black if above checks fails
# run: |
# black --check --verbose --line-length 120 .