diff --git a/cumulus_etl/etl/studies/covid_symptom/covid_ctakes.py b/cumulus_etl/etl/studies/covid_symptom/covid_ctakes.py index 30bebd41..377ef088 100644 --- a/cumulus_etl/etl/studies/covid_symptom/covid_ctakes.py +++ b/cumulus_etl/etl/studies/covid_symptom/covid_ctakes.py @@ -46,9 +46,8 @@ async def covid_symptoms_extract( # cTAKES cache namespace history (and thus, cache invalidation history): # v1: original cTAKES processing - # TODO: Ideally we'd also be able to ask ctakesclient for NLP algorithm information as part of this namespace. - # For now, we'll manually update this namespace if/when the cTAKES algorithm we use changes. - ctakes_namespace = "covid_symptom_v1" + # v2+: see CovidSymptomNlpResultsTask's version history + ctakes_namespace = f"covid_symptom_v{task_version}" # cNLP cache namespace history (and thus, cache invalidation history): # v1: original addition of cNLP filtering diff --git a/cumulus_etl/etl/studies/covid_symptom/covid_tasks.py b/cumulus_etl/etl/studies/covid_symptom/covid_tasks.py index f192fcc9..95db4791 100644 --- a/cumulus_etl/etl/studies/covid_symptom/covid_tasks.py +++ b/cumulus_etl/etl/studies/covid_symptom/covid_tasks.py @@ -73,9 +73,14 @@ class CovidSymptomNlpResultsTask(tasks.EtlTask): # We could combine all that info into a field we save with the results. But it's more human-friendly to have a # simple version to refer to. So anytime these properties get changed, bump the version and record the old bundle # of metadata too. - task_version = 1 + task_version = 2 # Task Version History: + # ** 2 (2023-08): Corrected the cache location (version 1 results might be using stale cache) ** + # cTAKES: smartonfhir/ctakes-covid:1.1 + # cNLP: smartonfhir/cnlp-transformers:negation-0.4 + # ctakesclient: 5.0 + # # ** 1 (2023-08): Updated ICD10 codes from ctakesclient ** # cTAKES: smartonfhir/ctakes-covid:1.1 # cNLP: smartonfhir/cnlp-transformers:negation-0.4 diff --git a/tests/data/i2b2/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson b/tests/data/i2b2/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson index 91526a38..289d2a0c 100644 --- a/tests/data/i2b2/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson +++ b/tests/data/i2b2/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson @@ -1,4 +1,4 @@ -{"id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0.0","docref_id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","encounter_id":"5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f","subject_id":"26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} -{"id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0.1","docref_id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","encounter_id":"5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f","subject_id":"26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} -{"id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588.0","docref_id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","encounter_id":"fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687","subject_id":"49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} -{"id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588.1","docref_id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","encounter_id":"fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687","subject_id":"49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0.0","docref_id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","encounter_id":"5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f","subject_id":"26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0.1","docref_id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","encounter_id":"5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f","subject_id":"26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588.0","docref_id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","encounter_id":"fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687","subject_id":"49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588.1","docref_id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","encounter_id":"fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687","subject_id":"49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} diff --git a/tests/data/simple/batched-output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson b/tests/data/simple/batched-output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson index 4c18e6df..ce565cd8 100644 --- a/tests/data/simple/batched-output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson +++ b/tests/data/simple/batched-output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson @@ -1,2 +1,2 @@ -{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.0","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} -{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.1","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.0","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.1","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} diff --git a/tests/data/simple/batched-output/covid_symptom__nlp_results/covid_symptom__nlp_results.001.ndjson b/tests/data/simple/batched-output/covid_symptom__nlp_results/covid_symptom__nlp_results.001.ndjson index 595b4e1b..3b014925 100644 --- a/tests/data/simple/batched-output/covid_symptom__nlp_results/covid_symptom__nlp_results.001.ndjson +++ b/tests/data/simple/batched-output/covid_symptom__nlp_results/covid_symptom__nlp_results.001.ndjson @@ -1,2 +1,2 @@ -{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.0","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} -{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.1","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.0","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.1","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} diff --git a/tests/data/simple/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson b/tests/data/simple/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson index 4aedc6da..0d9514a0 100644 --- a/tests/data/simple/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson +++ b/tests/data/simple/output/covid_symptom__nlp_results/covid_symptom__nlp_results.000.ndjson @@ -1,4 +1,4 @@ -{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.0","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} -{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.1","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} -{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.0","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} -{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.1","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.0","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.1","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.0","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} +{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.1","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}} diff --git a/tests/etl/test_etl_cli.py b/tests/etl/test_etl_cli.py index 1fcbda97..ef601330 100644 --- a/tests/etl/test_etl_cli.py +++ b/tests/etl/test_etl_cli.py @@ -379,13 +379,13 @@ async def test_stores_cached_json(self): for index, checksum in enumerate(self.expected_checksums): ner = fake_ctakes_extract(facts[index]) - self.assertEqual(ner.as_json(), common.read_json(self.path_for_checksum("covid_symptom_v1", checksum))) - self.assertEqual([0, 0], common.read_json(self.path_for_checksum("covid_symptom_v1-cnlp_v2", checksum))) + self.assertEqual(ner.as_json(), common.read_json(self.path_for_checksum("covid_symptom_v2", checksum))) + self.assertEqual([0, 0], common.read_json(self.path_for_checksum("covid_symptom_v2-cnlp_v2", checksum))) async def test_does_not_hit_server_if_cache_exists(self): for index, checksum in enumerate(self.expected_checksums): # Write out some fake results to the cache location - filename = self.path_for_checksum("covid_symptom_v1", checksum) + filename = self.path_for_checksum("covid_symptom_v2", checksum) os.makedirs(os.path.dirname(filename)) common.write_json( filename, @@ -405,7 +405,7 @@ async def test_does_not_hit_server_if_cache_exists(self): }, ) - cnlp_filename = self.path_for_checksum("covid_symptom_v1-cnlp_v2", checksum) + cnlp_filename = self.path_for_checksum("covid_symptom_v2-cnlp_v2", checksum) os.makedirs(os.path.dirname(cnlp_filename)) common.write_json(cnlp_filename, [0])