ci: add full stack NLP regression test #869

Workflow file for this run

	name: CI
	on:
	pull_request:
	push:
	branches:
	- main
	paths-ignore:
	- 'docs/**'
	- '*.md'

	# The goal here is to cancel older workflows when a PR is updated (because it's pointless work)
	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.ref_name }}
	cancel-in-progress: true

	jobs:
	# unittest:
	# name: unit tests
	# runs-on: ubuntu-22.04
	# strategy:
	# matrix:
	# # while we are still private, don't go crazy with the Python versions as they eat up CI minutes
	# python-version: ["3.10"]
	#
	# steps:
	# - uses: actions/checkout@v4
	#
	# - name: Set up Python ${{ matrix.python-version }}
	# uses: actions/setup-python@v4
	# with:
	# python-version: ${{ matrix.python-version }}
	#
	# - name: Install dependencies
	# run: \|
	# python -m pip install --upgrade pip
	# pip install pytest
	# pip install .[tests]
	#
	# - name: Check out MS tool
	# uses: actions/checkout@v4
	# with:
	# repository: microsoft/Tools-for-Health-Data-Anonymization
	# path: mstool
	#
	# - name: Build MS tool
	# run: \|
	# sudo apt-get update
	# sudo apt-get install dotnet6
	# dotnet publish \
	# --runtime=linux-x64 \
	# --configuration=Release \
	# -p:PublishSingleFile=true \
	# --output=$HOME/.local/bin \
	# mstool/FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool
	#
	# - name: Test with pytest
	# run: \|
	# python -m pytest

	nlp-regression:
	runs-on: ubuntu-latest
	env:
	UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}
	steps:
	- uses: actions/checkout@v4

	- name: Install Docker
	uses: docker/setup-buildx-action@v3

	- name: Build ETL image
	uses: docker/build-push-action@v5
	with:
	load: true # just build, no push
	tags: smartonfhir/cumulus-etl:latest

	- name: Download NLP images
	run: docker compose --profile covid-symptom up -d --quiet-pull

	- name: Run NLP
	run: \|
	export DATADIR=$(realpath tests/data/nlp-regression)

	# Run the NLP task
	docker compose run --rm \
	--volume $DATADIR:/in \
	cumulus-etl \
	/in/input \
	/in/run-output \
	/in/phi \
	--output-format=ndjson \
	--task covid_symptom__nlp_results

	# Compare results
	export OUTDIR=$DATADIR/run-output/covid_symptom__nlp_results
	sudo chown -R $(id -u) $OUTDIR
	sed -i 's/"generated_on": "[^"]", //g' $OUTDIR/.ndjson
	diff -upr $DATADIR/expected-output $OUTDIR

	# lint:
	# runs-on: ubuntu-22.04
	# steps:
	# - uses: actions/checkout@v4
	#
	# - name: Install linters
	# # black is synced with the .pre-commit-hooks version
	# run: \|
	# python -m pip install --upgrade pip
	# pip install bandit[toml] pycodestyle pylint black==23.11.0
	#
	# - name: Run pycodestyle
	# # E203: pycodestyle is a little too rigid about slices & whitespace
	# # See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices
	# # W503: a default ignore that we are restoring
	# run: \|
	# pycodestyle --max-line-length=120 --ignore=E203,W503 .
	#
	# - name: Run pylint
	# if: success() \|\| failure() # still run pylint if above checks fail
	# run: \|
	# pylint cumulus_etl tests
	#
	# - name: Run bandit
	# if: success() \|\| failure() # still run bandit if above checks fail
	# run: \|
	# bandit -c pyproject.toml -r .
	#
	# - name: Run black
	# if: success() \|\| failure() # still run black if above checks fails
	# run: \|
	# black --check --verbose --line-length 120 .

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ci: add full stack NLP regression test #869

Workflow file

ci: add full stack NLP regression test #869

Jobs

Run details

Workflow file for this run