Skip to content

Commit

Permalink
final changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Padraig20 committed Jan 16, 2024
1 parent cd6bf44 commit fa8a8db
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 47 deletions.
4 changes: 3 additions & 1 deletion frontend/src/app/components/home/home.component.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
<h1 class="w-fit mx-auto my-4 sm:my-8 md:my-10 text-2xl sm:text-3xl md:text-5xl md:h-14 text-center font-bold bg-clip-text text-transparent bg-gradient-to-r from-sky-500 dark:from-sky-300 to-sky-800 dark:to-sky-600 tracking-wider">Admission Note Analyzer</h1>
<h1 class="w-fit mx-auto my-4 sm:my-8 md:my-10 text-2xl sm:text-3xl md:text-5xl md:h-14 text-center font-bold bg-clip-text text-transparent bg-gradient-to-r from-sky-500 dark:from-sky-300 to-sky-800 dark:to-sky-600 tracking-wider">
Medical Condition Extractor
</h1>

<main class="max-w-5xl mx-auto my-4 sm:my-8 md:my-10 flex flex-col">
<textarea [(ngModel)]="text" rows="5" placeholder="Enter your admission note here" autofocus
Expand Down
6 changes: 1 addition & 5 deletions frontend/src/app/components/home/home.component.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,7 @@ export class HomeComponent {
'c pain. The tumor is located in the T-L spine, unresectable anaplastic astrocytoma s/p radiation. Complicated by ' +
'progressive lower extremity weakness and urinary retention. Patient initially presented with RLE weakness where h' +
'is right knee gave out with difficulty walking and right anterior thigh numbness. MRI showed a spinal cord conus ' +
'mass which was biopsied and found to be anaplastic astrocytoma. Therapy included field radiation t10-l1 followed ' +
'by 11 cycles of temozolomide 7 days on and 7 days off. This was followed by CPT-11 Weekly x4 with Avastin Q2 week' +
's/ 2 weeks rest and repeat cycle. On ROS, pt denies pain, lightheadedness, headache, neck pain, sore throat, recent ' +
'illness or sick contacts, cough, shortness of breath, chest discomfort, heartburn, abd pain, n/v, diarrhea, ' +
'constipation, dysuria. '
'mass which was biopsied and found to be anaplastic astrocytoma.'
}

analyzeNote() {
Expand Down
2 changes: 1 addition & 1 deletion src/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import argparse
parser = argparse.ArgumentParser(description='The backend of the specified frontend. Service obtains sentences and predicts entities.')

parser.add_argument('-l', '--length', type=bool, default=128,
parser.add_argument('-l', '--length', type=int, default=128,
help='Choose the maximum length of the model\'s input layer.')
parser.add_argument('-m', '--model', type=str, default='../models/medcondbert.pth',
help='Choose the directory of the model to be used for prediction.')
Expand Down
42 changes: 23 additions & 19 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,3 @@
from utils.dataloader import Dataloader
from utils.BertArchitecture import BertNER
from utils.BertArchitecture import BioBertNER
from utils.metric_tracking import MetricsTracking
from utils.training import train_loop

import torch
from torch.optim import SGD
from torch.optim import Adam
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd

from tqdm import tqdm

#-------MAIN-------#

import argparse

parser = argparse.ArgumentParser(
Expand All @@ -35,9 +17,29 @@
help='Choose whether the BioBERT model should be used as baseline or not.')
parser.add_argument('-v', '--verbose', type=bool, default=False,
help='Choose whether the model should be evaluated after each epoch or only after the training.')
parser.add_argument('-l', '--input_length', type=int, default=128,
help='Choose the maximum length of the model\'s input layer.')

args = parser.parse_args()

from utils.dataloader import Dataloader
from utils.BertArchitecture import BertNER
from utils.BertArchitecture import BioBertNER
from utils.metric_tracking import MetricsTracking
from utils.training import train_loop

import torch
from torch.optim import SGD
from torch.optim import Adam
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd

from tqdm import tqdm

#-------MAIN-------#

if not args.transfer_learning:
print("Training base BERT model...")
model = BertNER(3) #O, B-MEDCOND, I-MEDCOND -> 3 entities
Expand Down Expand Up @@ -69,10 +71,12 @@
2:'O'
}

dataloader = Dataloader(label_to_ids, ids_to_label, args.transfer_learning)
dataloader = Dataloader(label_to_ids, ids_to_label, args.transfer_learning, args.input_length)

train, test = dataloader.load_dataset()

print(len(train.__getitem__(0)['input_ids']))

if args.optimizer == 'SGD':
print("Using SGD optimizer...")
optimizer = SGD(model.parameters(), lr=args.learning_rate, momentum = 0.9)
Expand Down
30 changes: 15 additions & 15 deletions src/predict_single_sentence.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,7 @@
from utils.dataloader import Dataloader
from utils.BertArchitecture import BertNER, BioBertNER
from utils.metric_tracking import MetricsTracking

import torch
from torch.optim import SGD
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd

from tqdm import tqdm
from transformers import BertTokenizer,BertForTokenClassification

import argparse
parser = argparse.ArgumentParser(description='Enter a sentence for the model to work on.')

parser.add_argument('-l', '--length', type=bool, default=128,
parser.add_argument('-l', '--length', type=int, default=128,
help='Choose the maximum length of the model\'s input layer.')
parser.add_argument('-m', '--model', type=str, default='../models/medcondbert.pth',
help='Choose the directory of the model to be used for prediction.')
Expand All @@ -29,6 +15,20 @@
model_path = args.model
sentence = args.sentence

from utils.dataloader import Dataloader
from utils.BertArchitecture import BertNER, BioBertNER
from utils.metric_tracking import MetricsTracking

import torch
from torch.optim import SGD
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd

from tqdm import tqdm
from transformers import BertTokenizer,BertForTokenClassification

if not args.transfer_learning:
model = BertNER(3)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
Expand Down
12 changes: 6 additions & 6 deletions src/utils/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Dataloader():
tokenization of the data.
"""

def __init__(self, label_to_ids, ids_to_label, transfer_learning=False, max_tokens=128):
def __init__(self, label_to_ids, ids_to_label, transfer_learning, max_tokens):
self.label_to_ids = label_to_ids
self.ids_to_label = ids_to_label
self.max_tokens = max_tokens
Expand Down Expand Up @@ -49,12 +49,12 @@ def load_dataset(self, full = False):
train_data = data.sample((int) (len(data)*0.8), random_state=7).reset_index(drop=True)
test_data = data.drop(train_data.index).reset_index(drop=True)

train_dataset = Custom_Dataset(train_data, tokenizer, self.label_to_ids, self.ids_to_label)
test_dataset = Custom_Dataset(test_data, tokenizer, self.label_to_ids, self.ids_to_label)
train_dataset = Custom_Dataset(train_data, tokenizer, self.label_to_ids, self.ids_to_label, self.max_tokens)
test_dataset = Custom_Dataset(test_data, tokenizer, self.label_to_ids, self.ids_to_label, self.max_tokens)

return train_dataset, test_dataset
else:
dataset = Custom_Dataset(data, tokenizer, self.label_to_ids, self.ids_to_label)
dataset = Custom_Dataset(data, tokenizer, self.label_to_ids, self.ids_to_label, self.max_tokens)
return dataset

def load_custom(self, data):
Expand All @@ -75,7 +75,7 @@ def load_custom(self, data):
else:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer.add_tokens(['B-MEDCOND', 'I-MEDCOND'])
dataset = Custom_Dataset(data, tokenizer, self.label_to_ids, self.ids_to_label)
dataset = Custom_Dataset(data, tokenizer, self.label_to_ids, self.ids_to_label, self.max_tokens)
return dataset

def convert_id_to_label(self, ids):
Expand Down Expand Up @@ -124,7 +124,7 @@ class Custom_Dataset(Dataset):
Dataset used for loading and tokenizing sentences on-the-fly.
"""

def __init__(self, data, tokenizer, label_to_ids, ids_to_label, max_tokens=128):
def __init__(self, data, tokenizer, label_to_ids, ids_to_label, max_tokens):
self.data = data
self.tokenizer = tokenizer
self.label_to_ids = label_to_ids
Expand Down

0 comments on commit fa8a8db

Please sign in to comment.