add some notes

metaphysicser · metaphysicser · commit 55162ecbb31a · 2022-05-27T19:52:44.000+08:00
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ We begin our work on the basis of [MISA](https://github.com/declare-lab/MISA) in
 - Python 3.8
 - Pytorch 1.11.0
 
-you could run the code to build the environment.
+you could run the following command to build the environment.
 
 ```shell
 pip install requirements.txt
diff --git a/config.py b/config.py
@@ -1,4 +1,3 @@
-
 import argparse
 from datetime import datetime
 from pathlib import Path
@@ -10,7 +9,7 @@
 word_emb_path = ''
 assert(word_emb_path is not None)
 
-project_dir = Path(__file__).resolve().parent.parent
+project_dir = Path(__file__).resolve().parent
 sdk_dir = project_dir.joinpath('CMU-MultimodalSDK')
 data_dir = project_dir.joinpath('datasets')
 data_dict = {'mosi': data_dir.joinpath('MOSI'), 'mosei': data_dir.joinpath(
@@ -69,11 +68,9 @@ def get_config(parse=True, **optional_kwargs):
     # Mode
     parser.add_argument('--mode', type=str, default='train')
 
-    # parser.add_argument('--use_bert', type=str2bool, default=True)
-
     # Train
     time_now = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
-    parser.add_argument('--name', type=str, default=f"{time_now}")
+    parser.add_argument('--name', type=str, default=f"{time_now}")  # saved-model's name
     parser.add_argument('--batch_size', type=int, default=128)
     parser.add_argument('--n_epoch', type=int, default=100)
     parser.add_argument('--patience', type=int, default=6)
@@ -82,16 +79,17 @@ def get_config(parse=True, **optional_kwargs):
     parser.add_argument('--learning_rate', type=float, default=1e-4)
     parser.add_argument('--optimizer', type=str, default='Adam')
 
-    parser.add_argument('--rnncell', type=str, default='lstm')
-    parser.add_argument('--embedding_size', type=int, default=300)
-    parser.add_argument('--hidden_size', type=int, default=128)
-    parser.add_argument('--mlp_hidden_size', type=int, default=64)
+    parser.add_argument('--rnncell', type=str, default='lstm')  # lstm or GRU
+    parser.add_argument('--embedding_size', type=int, default=300)  # embedding size in bert
+    parser.add_argument('--hidden_size', type=int, default=128)  # modality embedding size
+    parser.add_argument('--mlp_hidden_size', type=int, default=64)  # mlp-communicator hidden size
     parser.add_argument('--dropout', type=float, default=0.5)
-    parser.add_argument('--depth', type=int, default=1)
+    parser.add_argument('--depth', type=int, default=1)  # mlp-communicator depth number
 
     # Selectin activation from 'elu', "hardshrink", "hardtanh", "leakyrelu", "prelu", "relu", "rrelu", "tanh"
     parser.add_argument('--activation', type=str, default='relu')
 
+    # three loss weights
     parser.add_argument('--cls_weight', type=float, default=1)
     parser.add_argument('--polar_weight', type=float, default=0.1)
     parser.add_argument('--scale_weight', type=float, default=0.1)
@@ -102,7 +100,7 @@ def get_config(parse=True, **optional_kwargs):
     parser.add_argument('--test_duration', type=int, default=1)
 
     # Data
-    parser.add_argument('--data', type=str, default='mosi')
+    parser.add_argument('--data', type=str, default='mosi')  # mosi or mosei
 
     # Parse arguments
     if parse:
diff --git a/create_dataset.py b/create_dataset.py
@@ -5,11 +5,23 @@
 import numpy as np
 from tqdm import tqdm_notebook
 from collections import defaultdict
-from mmsdk import mmdatasdk as md
+
+try:
+    from mmsdk import mmdatasdk as md
+except:
+    print("please install mmsdk first(https://github.com/A2Zadeh/CMU-MultimodalSDK)")
 from subprocess import check_call
 import torch
 
-
+"""
+The method of data process is used in MISA, thanks to:
+@article{hazarika2020misa,
+  title={MISA: Modality-Invariant and-Specific Representations for Multimodal Sentiment Analysis},
+  author={Hazarika, Devamanyu and Zimmermann, Roger and Poria, Soujanya},
+  journal={arXiv preprint arXiv:2005.03545},
+  year={2020}
+}
+"""
 def to_pickle(obj, path):
     with open(path, 'wb') as f:
         pickle.dump(obj, f)
@@ -31,6 +43,7 @@ def return_unk():
     return UNK
 
 
+#  transfer word into glove embedding
 def load_emb(w2i, path_to_embedding, embedding_size=300, embedding_vocab=2196017, init_emb=None):
     if init_emb is None:
         emb_mat = np.random.randn(len(w2i), embedding_size)
@@ -52,9 +65,8 @@ def load_emb(w2i, path_to_embedding, embedding_size=300, embedding_vocab=2196017
 
 class MOSI:
     def __init__(self, config):
-
         if config.sdk_dir is None:
-            print("SDK path is not specified! Please specify first in constants/paths.py")
+            print("SDK path is not specified! Please specify first in config.py")
             exit(0)
         else:
             sys.path.append(str(config.sdk_dir))
@@ -104,7 +116,6 @@ def __init__(self, config):
             ]
 
             recipe = {feat: os.path.join(DATA_PATH, feat) + '.csd' for feat in features}
-            print(recipe)
             dataset = md.mmdataset(recipe)
 
             # we define a simple averaging function that does not depend on intervals
@@ -229,7 +240,7 @@ class MOSEI:
     def __init__(self, config):
 
         if config.sdk_dir is None:
-            print("SDK path is not specified! Please specify first in constants/paths.py")
+            print("SDK path is not specified! Please specify first in config.py")
             exit(0)
         else:
             sys.path.append(str(config.sdk_dir))
@@ -243,9 +254,7 @@ def __init__(self, config):
             self.dev = load_pickle(DATA_PATH + '/dev.pkl')
             self.test = load_pickle(DATA_PATH + '/test.pkl')
             self.pretrained_emb, self.word2id = torch.load(CACHE_PATH)
-
         except:
-
             # create folders for storing the data
             if not os.path.exists(DATA_PATH):
                 check_call(' '.join(['mkdir', '-p', DATA_PATH]), shell=True)
@@ -403,4 +412,3 @@ def get_data(self, mode):
         else:
             print("Mode is not set properly (train/dev/test)")
             exit()
-
diff --git a/data_loader.py b/data_loader.py
@@ -7,11 +7,9 @@
 
 bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 
-
 class MSADataset(Dataset):
     def __init__(self, config):
-
-        ## Fetch dataset
+        # Fetch dataset
         if "mosi" in str(config.data_dir).lower():
             dataset = MOSI(config)
         elif "mosei" in str(config.data_dir).lower():
@@ -22,7 +20,6 @@ def __init__(self, config):
 
         self.data, self.word2id, self.pretrained_emb = dataset.get_data(config.mode)
         self.len = len(self.data)
-
         self.label = np.abs(np.array(self.data)[:, 1])
 
         config.visual_size = self.data[0][0][1].shape[1]
@@ -40,9 +37,7 @@ def __len__(self):
 
 def get_loader(config, shuffle=True):
     """Load DataLoader of given DialogDataset"""
-
     dataset = MSADataset(config)
-
     print(config.mode)
     config.data_len = len(dataset)
 
@@ -51,9 +46,7 @@ def collate_fn(batch):
         Collate functions assume batch = [Dataset[i] for i in index_set]
         '''
         # for later use we sort the batch in descending order of length
-
         batch = sorted(batch, key=lambda x: x[0][0].shape[0], reverse=True)
-
         # get the data out of the batch - use pad sequence util functions from PyTorch to pad things
 
         labels = torch.cat([torch.from_numpy(sample[1]) for sample in batch], dim=0)
diff --git a/new_models.py b/new_models.py
@@ -4,7 +4,6 @@
 from transformers import BertModel, BertConfig
 from einops.layers.torch import Rearrange
 
-
 # let's define a simple model that can deal with multimodal variable length sequence
 class MISA(nn.Module):
     def __init__(self, config):
@@ -13,7 +12,6 @@ def __init__(self, config):
         self.text_size = config.embedding_size
         self.visual_size = config.visual_size
         self.acoustic_size = config.acoustic_size
-
         self.input_sizes = input_sizes = [self.text_size, self.visual_size, self.acoustic_size]
         self.hidden_sizes = hidden_sizes = [int(self.text_size), int(self.visual_size), int(self.acoustic_size)]
         self.output_size = output_size = config.num_classes
@@ -65,8 +63,8 @@ def __init__(self, config):
         self.shared1.add_module('shared_activation_1', nn.Sigmoid())
 
         self.shared2 = nn.Sequential()
-        self.shared2.add_module('shared_1', nn.Linear(in_features=config.hidden_size, out_features=config.hidden_size))
-        self.shared2.add_module('shared_activation_1', nn.Sigmoid())
+        self.shared2.add_module('shared_2', nn.Linear(in_features=config.hidden_size, out_features=config.hidden_size))
+        self.shared2.add_module('shared_activation_2', nn.Sigmoid())
 
         self.fusion = nn.Sequential()
         self.fusion.add_module('fusion_layer_1', nn.Linear(in_features=self.config.hidden_size * 2,
diff --git a/solver.py b/solver.py
@@ -11,11 +11,9 @@
 torch.manual_seed(123)
 torch.cuda.manual_seed_all(123)
 
-
 class Solver(object):
     def __init__(self, train_config, train_data_loader, dev_data_loader, test_data_loader,
                  is_train=True, model=None):
-
         self.scale_criterion = corr_loss()
         self.polar_criterion = cos_loss()
         self.criterion = nn.MSELoss(reduction="mean")
@@ -28,7 +26,7 @@ def __init__(self, train_config, train_data_loader, dev_data_loader, test_data_l
 
     def build(self, cuda=True):
         if self.model is None:
-            self.model = getattr(new_models, self.train_config.model)(self.train_config)
+            self.model = getattr(new_models, self.train_config.model)(self.train_config)  # init the model
 
         # Final list
         for name, param in self.model.named_parameters():
@@ -41,11 +39,9 @@ def build(self, cuda=True):
             elif self.train_config.data == "ur_funny":
                 if "bert" in name:
                     param.requires_grad = False
-
             if 'weight_hh' in name:
                 nn.init.orthogonal_(param)
             # print('\t' + name, param.requires_grad)
-
         if torch.cuda.is_available() and cuda:
             self.model.cuda()
 
@@ -55,11 +51,16 @@ def build(self, cuda=True):
                 lr=self.train_config.learning_rate)
 
     def model_input2output(self, batch):
+        """
+        get output from model input
+        :param batch: batch
+        :return: y_tilde: model predict output
+                 y: true label
+        """
         self.model.zero_grad()
 
         v, a, y, l, bert_sent, bert_sent_type, bert_sent_mask = batch
 
-
         v = to_gpu(v)
         a = to_gpu(a)
         y = to_gpu(y)
@@ -73,6 +74,9 @@ def model_input2output(self, batch):
         return y_tilde, y
 
     def loss_function(self, y_tilde, y):
+        """
+        total_loss = w1 * cls_loss + w_2 * polar_loss + w_3 * scale_loss
+        """
         polar_loss = self.polar_criterion(self.model.polar_vector, y, y_tilde)
         scale_loss = self.scale_criterion(self.model.scale, y)
         cls_loss = self.criterion(y_tilde, y)
@@ -89,16 +93,14 @@ def train(self):
         lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.5)
 
         train_losses = []
-
         for e in range(self.train_config.n_epoch):
             self.model.train()
             train_loss = []
             for batch in self.train_data_loader:
                 y_tilde, y = self.model_input2output(batch)
-
                 loss = self.loss_function(y_tilde, y)
-                loss.backward()
 
+                loss.backward()
                 self.optimizer.step()
                 train_loss.append(loss.item())
             train_losses.append(train_loss)
@@ -136,18 +138,7 @@ def train(self):
             if num_trials <= 0:
                 print("Running out of patience, early stopping.")
                 break
-
         self.eval(mode="test", to_print=True)
-        # print(f"best test accracy: {best_test_acc}")
-        #
-        # # data = {"train_loss": self.train_loss, "train_task": self.task_loss, "train_polar":self.polar_loss, "train_scale":self.scale_loss,
-        # #         "val_loss": self.val_loss, "val_task": self.task_loss_val, "val_polar":self.polar_loss_val, "val_scale":self.scale_loss_val}
-        # #
-        # data = {"polar":np.concatenate(self.polar_vector, axis=0).squeeze(),"scale":np.concatenate(self.scale_vector, axis=0).squeeze(),"label":np.concatenate(self.label, axis=0).squeeze(),"label_pred":np.concatenate(self.label_pred, axis=0).squeeze()}
-        # # def to_pickle(obj, path):
-        # #     with open(path, 'wb') as f:
-        # #         pickle.dump(obj, f)
-        # # to_pickle(data,"cluster_7.pkl")
 
     def eval(self, mode=None, to_print=False):
         assert (mode is not None)
@@ -160,16 +151,13 @@ def eval(self, mode=None, to_print=False):
             dataloader = self.dev_data_loader
         elif mode == "test":
             dataloader = self.test_data_loader
-
             if to_print:
                 self.model.load_state_dict(torch.load(
                     f'checkpoints/model_{self.train_config.name}.std'))
 
         with torch.no_grad():
-
             for batch in dataloader:
                 y_tilde, y = self.model_input2output(batch)
-
                 loss = self.loss_function(y_tilde, y)
 
                 eval_loss.append(loss.item())
@@ -193,8 +181,6 @@ def multiclass_acc(self, preds, truths):
         :param truths: Float/int array representing the groundtruth classes, dimension (N,)
         :return: Classification accuracy
         """
-        # print(Counter(np.round(preds)))
-        # print(Counter(np.round(truths)))
         return np.sum(np.round(preds) == np.round(truths)) / float(len(truths))
 
     def calc_metrics(self, y_true, y_pred, mode=None, to_print=False):
@@ -229,7 +215,6 @@ def calc_metrics(self, y_true, y_pred, mode=None, to_print=False):
             print("mae: ", mae)
             print("corr: ", corr)
             print("mult_acc5: ", mult_a5)
-
             print("Classification Report (pos/neg) :")
             print(classification_report(binary_truth, binary_preds, digits=5))
             print("Accuracy (pos/neg) ", accuracy_score(binary_truth, binary_preds))
@@ -248,5 +233,4 @@ def calc_metrics(self, y_true, y_pred, mode=None, to_print=False):
             print("F1:", f_score2)
 
         print("Accuracy (non-neg/neg) ", accuracy_score(binary_truth, binary_preds))
-
         return accuracy_score(binary_truth, binary_preds)
diff --git a/train.py b/train.py
@@ -6,11 +6,9 @@
 import torch
 import warnings
 
-
 warnings.filterwarnings("ignore")  # ignore the warning
 
 if __name__ == '__main__':
-    
     # Setting random seed
     random_name = str(random())
     random_seed = 5546
@@ -32,7 +30,6 @@
     dev_data_loader = get_loader(dev_config, shuffle = False)
     test_data_loader = get_loader(test_config, shuffle = False)
 
-
     # Solver is a wrapper for model traiing and testing
     # solver = Solver
     solver = Solver(train_config,  train_data_loader, dev_data_loader, test_data_loader, is_train=True)