From c75522b518296b5045a8e8682d29b81534948c00 Mon Sep 17 00:00:00 2001
From: WeihengZ <weiheng0zhong@gmail.com>
Date: Sun, 16 Oct 2022 21:11:27 -0500
Subject: [PATCH] 'update Autoformer'

---
 .../scalbo/benchmark/autoformer/model.py      |  6 +--
 .../scalbo/benchmark/autoformer/utils/data.py | 48 ++++++++++---------
 .../benchmark/autoformer/utils/train_test.py  | 27 ++++++++---
 3 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/src/scalbo/scalbo/benchmark/autoformer/model.py b/src/scalbo/scalbo/benchmark/autoformer/model.py
index d0465a1..4a97848 100755
--- a/src/scalbo/scalbo/benchmark/autoformer/model.py
+++ b/src/scalbo/scalbo/benchmark/autoformer/model.py
@@ -504,7 +504,7 @@ def __init__(self, adj, configs, DEVICE):
 
         # define encoder
         self.encoder_blocks = nn.Sequential( *(encoder_depth * [Block(encoder_width)])) 
-        self.encoder = nn.Sequential(nn.Linear(1, encoder_width), nn.Tanh(), self.encoder_blocks,\
+        self.encoder = nn.Sequential(nn.Linear(3, encoder_width), nn.Tanh(), self.encoder_blocks,\
             nn.Linear(encoder_width, configs.d_model))
         
         # define the Autoformer
@@ -524,10 +524,6 @@ def forward(self, x_enc, t):
         return: (B, T = num_pred_len * pred_len, N)
         '''
 
-        # always use segment of length of pred_len to initialize the future
-        # return (B,pred_len,F)
-        x_enc = x_enc.unsqueeze(-1)    # (B,T,N,1)
-
         # apply encoder: (B,T,N,F)
         x_enc = self.encoder(x_enc)
         # permute the shape, return (B,N,T,F)
diff --git a/src/scalbo/scalbo/benchmark/autoformer/utils/data.py b/src/scalbo/scalbo/benchmark/autoformer/utils/data.py
index 12ed9c6..438f479 100644
--- a/src/scalbo/scalbo/benchmark/autoformer/utils/data.py
+++ b/src/scalbo/scalbo/benchmark/autoformer/utils/data.py
@@ -1,11 +1,13 @@
-import pickle
+__all__ = ['trainingset_construct', 'load_graph_data', 'sparse_adj', 'scaled_Laplacian_list']
 
+import torch
+from torch.utils.data import Dataset, DataLoader
+import torch.nn as nn
+import pickle
 import numpy as np
 import pandas as pd
 import scipy.sparse as sp
-import torch
 from scipy.sparse.linalg import eigs
-from torch.utils.data import DataLoader, Dataset
 
 
 def training_loader_construct(dataset, batch_num, Shuffle):
@@ -34,16 +36,17 @@ def __init__(self, traffic_data, args, num_data_limit, mean, std, transform=None
             args: 
         '''
         # extract information
+        assert args.pred_len % 12 == 0, 'this script is only for short-term traffic prediction'
         pred_len = args.pred_len
-        input_length = args.time_steps
+        input_length = args.pred_len
 
         PEMS =  traffic_data   # return (N,T)
         print('traffic data shape:', PEMS.shape)
 
         timestep_a_week = 7*24*12
         timestep_a_day = 24*12
-        time_stamp_week = np.arange(timestep_a_week).repeat(15)
-        time_stamp_day = np.arange(timestep_a_day).repeat(15*7)
+        time_stamp_week = np.arange(timestep_a_week).repeat(30)
+        time_stamp_day = np.arange(timestep_a_day).repeat(30*7)
         t = np.sin(time_stamp_week/timestep_a_week * 2*np.pi) + np.sin(time_stamp_day/timestep_a_day * 2*np.pi)
 
         self.x = []
@@ -52,17 +55,25 @@ def __init__(self, traffic_data, args, num_data_limit, mean, std, transform=None
         self.ty = []
 
         sample_steps = 1
-        num_datapoints = int(np.floor((PEMS.shape[1] - input_length) / sample_steps))
+        num_datapoints = int(np.floor((PEMS.shape[1] - timestep_a_week) / sample_steps))
         print('total number of datapoints:', num_datapoints)
-        starting_point = input_length
+        starting_point = timestep_a_week
         endding_point = PEMS.shape[1] - pred_len
 
         num_data = 0
         for k in range(starting_point, endding_point, sample_steps):
             if num_data < num_data_limit:
-                self.x.append((PEMS[:,k-input_length:k] - mean) / std)
+
+                input_data = []
+                # store weekly data
+                input_data.append(np.expand_dims(PEMS[:,k-timestep_a_week: k-timestep_a_week+pred_len], axis=-1))
+                input_data.append(np.expand_dims(PEMS[:,k-timestep_a_day: k-timestep_a_day+pred_len], axis=-1))
+                input_data.append(np.expand_dims(PEMS[:,k-pred_len: k], axis=-1))
+                input_data = np.concatenate(tuple(input_data), axis=-1)    # return (N,T,F)
+        
+                self.x.append((input_data - mean) / std)
                 self.y.append(np.array(PEMS[:, k : k + pred_len]))
-                self.tx.append(t[k-input_length:k])
+                self.tx.append(t[k-pred_len: k])
                 self.ty.append(t[k:k+pred_len])
                 num_data += 1
 
@@ -154,18 +165,15 @@ def sparse_adj():
 
     return W
 
-def scaled_Laplacian_list(W):
+def scaled_Laplacian_list(W, device):
     '''
     compute \tilde{L}
-
     Parameters
     ----------
     W: np.ndarray, shape is (N, N), N is the num of vertices
-
     Returns
     ----------
     scaled_Laplacian: np.ndarray, shape (N, N)
-
     '''
 
     assert W.shape[0] == W.shape[1]
@@ -180,16 +188,10 @@ def scaled_Laplacian_list(W):
 
     N = L_tilde.shape[0]
 
-    cheb_polynomials = [np.identity(N), L_tilde.copy()]
+    cheb_polynomials = [torch.eye(N).to(device), torch.from_numpy(L_tilde.copy()).to(device)]
+    L_tilde = torch.from_numpy(L_tilde).to(device)
 
     for i in range(2, 3):
         cheb_polynomials.append(2 * L_tilde * cheb_polynomials[i - 1] - cheb_polynomials[i - 2])
 
-    return cheb_polynomials
-
-
-
-
-    
-
-    
+    return cheb_polynomials
\ No newline at end of file
diff --git a/src/scalbo/scalbo/benchmark/autoformer/utils/train_test.py b/src/scalbo/scalbo/benchmark/autoformer/utils/train_test.py
index dee07de..9b23d6c 100644
--- a/src/scalbo/scalbo/benchmark/autoformer/utils/train_test.py
+++ b/src/scalbo/scalbo/benchmark/autoformer/utils/train_test.py
@@ -4,7 +4,6 @@
 import torch
 from tqdm import tqdm
 
-
 # define training function
 def train(loader, model, optimizer, criterion, device, mean, std, update_step):
 
@@ -21,15 +20,23 @@ def train(loader, model, optimizer, criterion, device, mean, std, update_step):
 
         update_step = update_step
 
-        inputs = inputs.permute(0,2,1).to(device)  # (B,T,N)
+        if inputs.dim() == 3:
+            inputs = inputs.permute(0,2,1).to(device)  # (B,T,N)
+        if inputs.dim() == 4:
+            inputs = inputs.permute(0,2,1,3).to(device)  # (B,T,N,F)
+
         targets = targets.permute(0,2,1).to(device)    # (B,T,N)
+
+        # create a mask for missing data
+        mask = torch.where(targets<1, torch.zeros_like(targets), torch.ones_like(targets))
+
         tx = tx.to(device)    # (B,T)
         ty = ty.to(device)    # (B,T)
         t_stamp = torch.cat((tx,ty),1)
         outputs = model.forward(inputs, t_stamp)[0]    # (B,T,N)
         outputs = outputs * std + mean
 
-        loss = criterion(outputs, targets) / update_step
+        loss = criterion(mask * outputs, mask * targets) / update_step
         loss.backward()
         if idx % update_step == 0:
             optimizer.step()
@@ -49,8 +56,16 @@ def eval(loader, model, device, args, mean, std):
     for idx, (inputs, targets, tx, ty) in enumerate(tqdm(loader)):
         model.eval()
 
-        inputs = (inputs).permute(0,2,1).to(device)  # (B,T,N)
+        if inputs.dim() == 3:
+            inputs = inputs.permute(0,2,1).to(device)  # (B,T,N)
+        if inputs.dim() == 4:
+            inputs = inputs.permute(0,2,1,3).to(device)  # (B,T,N,F)
+            
         targets = targets.permute(0,2,1).to(device)  # (B,T,N)
+
+        # create a mask for missing data
+        mask = torch.where(targets<1, torch.zeros_like(targets), torch.ones_like(targets))
+
         tx = tx.to(device)    # (B,T)
         ty = ty.to(device)    # (B,T)
         t_stamp = torch.cat((tx,ty),1)
@@ -58,8 +73,8 @@ def eval(loader, model, device, args, mean, std):
 
         outputs = outputs * std + mean
         
-        out_unnorm = outputs.detach().cpu().numpy()
-        target_unnorm = (targets).detach().cpu().numpy()
+        out_unnorm = (mask * outputs).detach().cpu().numpy()
+        target_unnorm = (mask * targets).detach().cpu().numpy()
 
         mae_loss = np.zeros(args.pred_len)
         for k in range(out_unnorm.shape[1]):