Skip to content

Commit 6110d51

Browse files
committed
start example
1 parent b7fceb6 commit 6110d51

File tree

3 files changed

+111
-0
lines changed

3 files changed

+111
-0
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily.
2+
Keystone plant species such as fig trees are good for the soil.
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import sys
2+
from termcolor import colored
3+
4+
from forte.data.data_pack import DataPack
5+
from forte.data.readers import PlainTextReader
6+
from forte.pipeline import Pipeline
7+
from forte.processors.writers import PackIdJsonPackWriter
8+
9+
from ft.onto.base_ontology import (
10+
Token,
11+
)
12+
from fortex.spacy import SpacyProcessor
13+
14+
from ftx.medical.clinical_ontology import Hyponym, Abbreviation, Phrase
15+
from fortex.health.processors.scispacy_processor import (
16+
ScispaCyProcessor,
17+
)
18+
19+
20+
def main(
21+
input_path: str, # Path to mimic3 data if use_mimic3_reader=True else path to notes directory
22+
output_path: str, # Path to output directory
23+
max_packs: int = -1, # Max number of notes to read from mimic3 dataset. Set to -1 to read all.
24+
use_mimic3_reader: bool = True, # Read from mimic3 dataset or plain text
25+
):
26+
pl = Pipeline[DataPack]()
27+
28+
if use_mimic3_reader is False:
29+
pl.set_reader(PlainTextReader())
30+
else:
31+
pl.set_reader(Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs})
32+
33+
pl.add(
34+
SpacyProcessor(),
35+
{"processors": ["sentence"], "lang": "en_ner_bionlp13cg_md"},
36+
)
37+
pl.add(
38+
ICDCodingProcessor(),
39+
{
40+
"entry_type": "ft.onto.base_ontology.Document",
41+
"attribute_name": "classification",
42+
"multi_class": True,
43+
"model_name": "AkshatSurolia/ICD-10-Code-Prediction", # You can use other ICD predictors here.
44+
"cuda_devices": -1,
45+
},
46+
)
47+
pl.add(
48+
PackIdJsonPackWriter(),
49+
{
50+
"output_dir": output_path,
51+
"indent": 2,
52+
"overwrite": True,
53+
"drop_record": True,
54+
"zip_pack": True,
55+
},
56+
)
57+
58+
pl.initialize()
59+
60+
packs = pl.process_dataset(input_path)
61+
for pack in packs:
62+
show_data(pack)
63+
64+
65+
def show_data(pack: DataPack):
66+
# The ICD processor predicts ICD code for each article.
67+
# The result is stored as article.icd_code.
68+
# The articles are packed into DataPack.
69+
# Therefore, we first extract articles from DataPack and then get their ICD codes.
70+
71+
for article in pack.get(MedicalArticle):
72+
article_text = article.text
73+
74+
# get the ICD code and its coding version
75+
icd_code = article.icd_code
76+
icd_version = article.icd_version
77+
78+
print(colored("Article:", "red"), article_text, "\n")
79+
print(colored(f"ICD-{icd_version} Code:", "cyan"), icd_code, "\n")
80+
81+
input(colored("Press ENTER to continue...\n", "green"))
82+
83+
84+
# Examples:
85+
#
86+
# Read from MIMIC3:
87+
# python icd_coding.py /path/to/mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 1000 True
88+
#
89+
# Read from sample_data:
90+
# python icd_coding.py sample_data/ /path_to_sample_output 1000 False
91+
main(sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4].lower() == "true")

fortex/health/processors/test.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import spacy
2+
from timexy import Timexy
3+
4+
nlp = spacy.load("en_core_web_sm")
5+
6+
# Optionally add config if varying from default values
7+
config = {
8+
"kb_id_type": "timex3", # possible values: 'timex3'(default), 'timestamp'
9+
"label": "timexy", # default: 'timexy'
10+
"overwrite": False, # default: False
11+
}
12+
nlp.add_pipe("timexy", config=config, before="ner")
13+
14+
doc = nlp(
15+
"Today is the 10.10.2010. I was in Paris for six years. 2 pm 3 days ago"
16+
)
17+
for e in doc.ents:
18+
print(f"{e.text}\t{e.label_}\t{e.kb_id_}")

0 commit comments

Comments
 (0)