final changes

Padraig20 · Jan 16, 2024 · fa8a8db · fa8a8db
1 parent cd6bf44
commit fa8a8db
Show file tree

Hide file tree

Showing 6 changed files with 49 additions and 47 deletions.
diff --git a/frontend/src/app/components/home/home.component.html b/frontend/src/app/components/home/home.component.html
@@ -1,4 +1,6 @@
-<h1 class="w-fit mx-auto my-4 sm:my-8 md:my-10 text-2xl sm:text-3xl md:text-5xl md:h-14 text-center font-bold bg-clip-text text-transparent bg-gradient-to-r from-sky-500 dark:from-sky-300 to-sky-800 dark:to-sky-600 tracking-wider">Admission Note Analyzer</h1>
+<h1 class="w-fit mx-auto my-4 sm:my-8 md:my-10 text-2xl sm:text-3xl md:text-5xl md:h-14 text-center font-bold bg-clip-text text-transparent bg-gradient-to-r from-sky-500 dark:from-sky-300 to-sky-800 dark:to-sky-600 tracking-wider">
+  Medical Condition Extractor
+</h1>
 
 <main class="max-w-5xl mx-auto my-4 sm:my-8 md:my-10 flex flex-col">
   <textarea [(ngModel)]="text" rows="5" placeholder="Enter your admission note here" autofocus

diff --git a/frontend/src/app/components/home/home.component.ts b/frontend/src/app/components/home/home.component.ts
@@ -39,11 +39,7 @@ export class HomeComponent {
       'c pain. The tumor is located in the T-L spine, unresectable anaplastic astrocytoma s/p radiation. Complicated by ' +
       'progressive lower extremity weakness and urinary retention. Patient initially presented with RLE weakness where h' +
       'is right knee gave out with difficulty walking and right anterior thigh numbness. MRI showed a spinal cord conus ' +
-      'mass which was biopsied and found to be anaplastic astrocytoma. Therapy included field radiation t10-l1 followed ' +
-      'by 11 cycles of temozolomide 7 days on and 7 days off. This was followed by CPT-11 Weekly x4 with Avastin Q2 week' +
-      's/ 2 weeks rest and repeat cycle. On ROS, pt denies pain, lightheadedness, headache, neck pain, sore throat, recent ' +
-      'illness or sick contacts, cough, shortness of breath, chest discomfort, heartburn, abd pain, n/v, diarrhea, ' +
-      'constipation, dysuria. '
+      'mass which was biopsied and found to be anaplastic astrocytoma.'
   }
 
   analyzeNote() {

diff --git a/src/api.py b/src/api.py
@@ -1,7 +1,7 @@
 import argparse
 parser = argparse.ArgumentParser(description='The backend of the specified frontend. Service obtains sentences and predicts entities.')
 
-parser.add_argument('-l', '--length', type=bool, default=128,
+parser.add_argument('-l', '--length', type=int, default=128,
                     help='Choose the maximum length of the model\'s input layer.')
 parser.add_argument('-m', '--model', type=str, default='../models/medcondbert.pth',
                     help='Choose the directory of the model to be used for prediction.')

diff --git a/src/main.py b/src/main.py
@@ -1,21 +1,3 @@
-from utils.dataloader import Dataloader
-from utils.BertArchitecture import BertNER
-from utils.BertArchitecture import BioBertNER
-from utils.metric_tracking import MetricsTracking
-from utils.training import train_loop
-
-import torch
-from torch.optim import SGD
-from torch.optim import Adam
-from torch.utils.data import DataLoader
-
-import numpy as np
-import pandas as pd
-
-from tqdm import tqdm
-
-#-------MAIN-------#
-
 import argparse
 
 parser = argparse.ArgumentParser(
@@ -35,9 +17,29 @@
                     help='Choose whether the BioBERT model should be used as baseline or not.')
 parser.add_argument('-v', '--verbose', type=bool, default=False,
                     help='Choose whether the model should be evaluated after each epoch or only after the training.')
+parser.add_argument('-l', '--input_length', type=int, default=128,
+                    help='Choose the maximum length of the model\'s input layer.')
 
 args = parser.parse_args()
 
+from utils.dataloader import Dataloader
+from utils.BertArchitecture import BertNER
+from utils.BertArchitecture import BioBertNER
+from utils.metric_tracking import MetricsTracking
+from utils.training import train_loop
+
+import torch
+from torch.optim import SGD
+from torch.optim import Adam
+from torch.utils.data import DataLoader
+
+import numpy as np
+import pandas as pd
+
+from tqdm import tqdm
+
+#-------MAIN-------#
+
 if not args.transfer_learning:
     print("Training base BERT model...")
     model = BertNER(3) #O, B-MEDCOND, I-MEDCOND -> 3 entities
@@ -69,10 +71,12 @@
         2:'O'
         }
 
-dataloader = Dataloader(label_to_ids, ids_to_label, args.transfer_learning)
+dataloader = Dataloader(label_to_ids, ids_to_label, args.transfer_learning, args.input_length)
 
 train, test = dataloader.load_dataset()
 
+print(len(train.__getitem__(0)['input_ids']))
+
 if args.optimizer == 'SGD':
     print("Using SGD optimizer...")
     optimizer = SGD(model.parameters(), lr=args.learning_rate, momentum = 0.9)

diff --git a/src/predict_single_sentence.py b/src/predict_single_sentence.py
@@ -1,21 +1,7 @@
-from utils.dataloader import Dataloader
-from utils.BertArchitecture import BertNER, BioBertNER
-from utils.metric_tracking import MetricsTracking
-
-import torch
-from torch.optim import SGD
-from torch.utils.data import DataLoader
-
-import numpy as np
-import pandas as pd
-
-from tqdm import tqdm
-from transformers import BertTokenizer,BertForTokenClassification
-
 import argparse
 parser = argparse.ArgumentParser(description='Enter a sentence for the model to work on.')
 
-parser.add_argument('-l', '--length', type=bool, default=128,
+parser.add_argument('-l', '--length', type=int, default=128,
                     help='Choose the maximum length of the model\'s input layer.')
 parser.add_argument('-m', '--model', type=str, default='../models/medcondbert.pth',
                     help='Choose the directory of the model to be used for prediction.')
@@ -29,6 +15,20 @@
 model_path = args.model
 sentence = args.sentence
 
+from utils.dataloader import Dataloader
+from utils.BertArchitecture import BertNER, BioBertNER
+from utils.metric_tracking import MetricsTracking
+
+import torch
+from torch.optim import SGD
+from torch.utils.data import DataLoader
+
+import numpy as np
+import pandas as pd
+
+from tqdm import tqdm
+from transformers import BertTokenizer,BertForTokenClassification
+
 if not args.transfer_learning:
     model = BertNER(3)
     tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

diff --git a/src/utils/dataloader.py b/src/utils/dataloader.py
@@ -13,7 +13,7 @@ class Dataloader():
     tokenization of the data.
     """
 
-    def __init__(self, label_to_ids, ids_to_label, transfer_learning=False, max_tokens=128):
+    def __init__(self, label_to_ids, ids_to_label, transfer_learning, max_tokens):
         self.label_to_ids = label_to_ids
         self.ids_to_label = ids_to_label
         self.max_tokens = max_tokens
@@ -49,12 +49,12 @@ def load_dataset(self, full = False):
             train_data = data.sample((int) (len(data)*0.8), random_state=7).reset_index(drop=True)
             test_data = data.drop(train_data.index).reset_index(drop=True)
 
-            train_dataset = Custom_Dataset(train_data, tokenizer, self.label_to_ids, self.ids_to_label)
-            test_dataset = Custom_Dataset(test_data, tokenizer, self.label_to_ids, self.ids_to_label)
+            train_dataset = Custom_Dataset(train_data, tokenizer, self.label_to_ids, self.ids_to_label, self.max_tokens)
+            test_dataset = Custom_Dataset(test_data, tokenizer, self.label_to_ids, self.ids_to_label, self.max_tokens)
 
             return train_dataset, test_dataset
         else:
-            dataset = Custom_Dataset(data, tokenizer, self.label_to_ids, self.ids_to_label)
+            dataset = Custom_Dataset(data, tokenizer, self.label_to_ids, self.ids_to_label, self.max_tokens)
             return dataset
 
     def load_custom(self, data):
@@ -75,7 +75,7 @@ def load_custom(self, data):
         else:
             tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
             tokenizer.add_tokens(['B-MEDCOND', 'I-MEDCOND'])
-        dataset = Custom_Dataset(data, tokenizer, self.label_to_ids, self.ids_to_label)
+        dataset = Custom_Dataset(data, tokenizer, self.label_to_ids, self.ids_to_label, self.max_tokens)
         return dataset
 
     def convert_id_to_label(self, ids):
@@ -124,7 +124,7 @@ class Custom_Dataset(Dataset):
     Dataset used for loading and tokenizing sentences on-the-fly.
     """
 
-    def __init__(self, data, tokenizer, label_to_ids, ids_to_label, max_tokens=128):
+    def __init__(self, data, tokenizer, label_to_ids, ids_to_label, max_tokens):
         self.data = data
         self.tokenizer = tokenizer
         self.label_to_ids = label_to_ids