Skip to content

Commit

Permalink
Merge pull request #269 from smart-on-fhir/mikix/ctakes-cache
Browse files Browse the repository at this point in the history
fix: change covid task cache dir due to recent dictionary change
  • Loading branch information
mikix authored Aug 28, 2023
2 parents d08b595 + a8754d2 commit c93e95a
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 20 deletions.
5 changes: 2 additions & 3 deletions cumulus_etl/etl/studies/covid_symptom/covid_ctakes.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@ async def covid_symptoms_extract(

# cTAKES cache namespace history (and thus, cache invalidation history):
# v1: original cTAKES processing
# TODO: Ideally we'd also be able to ask ctakesclient for NLP algorithm information as part of this namespace.
# For now, we'll manually update this namespace if/when the cTAKES algorithm we use changes.
ctakes_namespace = "covid_symptom_v1"
# v2+: see CovidSymptomNlpResultsTask's version history
ctakes_namespace = f"covid_symptom_v{task_version}"

# cNLP cache namespace history (and thus, cache invalidation history):
# v1: original addition of cNLP filtering
Expand Down
7 changes: 6 additions & 1 deletion cumulus_etl/etl/studies/covid_symptom/covid_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,14 @@ class CovidSymptomNlpResultsTask(tasks.EtlTask):
# We could combine all that info into a field we save with the results. But it's more human-friendly to have a
# simple version to refer to. So anytime these properties get changed, bump the version and record the old bundle
# of metadata too.
task_version = 1
task_version = 2

# Task Version History:
# ** 2 (2023-08): Corrected the cache location (version 1 results might be using stale cache) **
# cTAKES: smartonfhir/ctakes-covid:1.1
# cNLP: smartonfhir/cnlp-transformers:negation-0.4
# ctakesclient: 5.0
#
# ** 1 (2023-08): Updated ICD10 codes from ctakesclient **
# cTAKES: smartonfhir/ctakes-covid:1.1
# cNLP: smartonfhir/cnlp-transformers:negation-0.4
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0.0","docref_id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","encounter_id":"5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f","subject_id":"26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0.1","docref_id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","encounter_id":"5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f","subject_id":"26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588.0","docref_id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","encounter_id":"fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687","subject_id":"49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588.1","docref_id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","encounter_id":"fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687","subject_id":"49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0.0","docref_id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","encounter_id":"5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f","subject_id":"26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0.1","docref_id":"228b982ddae20b8da26a212666995acde914b941a4ff7c314adf89d02c3831f0","encounter_id":"5388b42b262276bfbcb659b1ff937b0e3e5b0ec8901ed3ad53fa387fd6f2589f","subject_id":"26f4d6d38eaa3347b8bd22bb4bc66ecbff5384926152738d282e841a247bfefb","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588.0","docref_id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","encounter_id":"fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687","subject_id":"49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588.1","docref_id":"dfc45702900136d5fb09b8737853f5c727132882bd6ba0871942685c0b1df588","encounter_id":"fb29ea2a68ca2e1e4bbe22bdeedf021d94ec89f7e3d38ecbe908a8f2b3d89687","subject_id":"49fbb06b4b49eb49a096cf2a96674fb84a4d52ee74ec25c8f6f26023cb4764a7","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.0","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.1","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.0","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.1","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.0","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.1","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.0","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.1","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.0","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.1","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.0","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.1","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":1,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.0","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd.1","docref_id":"f29736c29af5b962b3947fd40bed6b8c3e97c642b72aaa08e082fec05148e7dd","encounter_id":"d30aad4b-4503-8e22-0bc4-621b94398520","subject_id":"118dc10e-7745-20d7-e98d-7c358a84c15c","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":6,"end":9,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.0","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"386661006","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"},{"code":"50177009","cui":"C0015967","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
{"id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971.1","docref_id":"c601849ceffe49dba22ee952533ac87928cd7a472dee6d0390d53c9130519971","encounter_id":"af1e6186-3f9a-1fa9-3c73-cfa56c84a056","subject_id":"1de9ea66-70d3-da1f-c735-df5ef7697fb9","generated_on":"2021-09-14T21:23:45+00:00","task_version":2,"match":{"begin":7,"end":10,"text":"for","polarity":0,"conceptAttributes":[{"code":"422587007","cui":"C0027497","codingScheme":"SNOMEDCT_US","tui":"T184"}],"type":"SignSymptomMention"}}
8 changes: 4 additions & 4 deletions tests/etl/test_etl_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,13 +379,13 @@ async def test_stores_cached_json(self):

for index, checksum in enumerate(self.expected_checksums):
ner = fake_ctakes_extract(facts[index])
self.assertEqual(ner.as_json(), common.read_json(self.path_for_checksum("covid_symptom_v1", checksum)))
self.assertEqual([0, 0], common.read_json(self.path_for_checksum("covid_symptom_v1-cnlp_v2", checksum)))
self.assertEqual(ner.as_json(), common.read_json(self.path_for_checksum("covid_symptom_v2", checksum)))
self.assertEqual([0, 0], common.read_json(self.path_for_checksum("covid_symptom_v2-cnlp_v2", checksum)))

async def test_does_not_hit_server_if_cache_exists(self):
for index, checksum in enumerate(self.expected_checksums):
# Write out some fake results to the cache location
filename = self.path_for_checksum("covid_symptom_v1", checksum)
filename = self.path_for_checksum("covid_symptom_v2", checksum)
os.makedirs(os.path.dirname(filename))
common.write_json(
filename,
Expand All @@ -405,7 +405,7 @@ async def test_does_not_hit_server_if_cache_exists(self):
},
)

cnlp_filename = self.path_for_checksum("covid_symptom_v1-cnlp_v2", checksum)
cnlp_filename = self.path_for_checksum("covid_symptom_v2-cnlp_v2", checksum)
os.makedirs(os.path.dirname(cnlp_filename))
common.write_json(cnlp_filename, [0])

Expand Down

0 comments on commit c93e95a

Please sign in to comment.