Skip to content

Commit

Permalink
Update general logic.
Browse files Browse the repository at this point in the history
  • Loading branch information
Neroxn committed Mar 6, 2023
1 parent 9646657 commit da87464
Show file tree
Hide file tree
Showing 16 changed files with 805 additions and 212 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ wandb
*.ipynb_checkpoints
*.ipynb
*.DS_Store
*.pth
*.pth
regression_datasets/*
39 changes: 25 additions & 14 deletions configs/toy_dataset.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
network: # basic MLP layer configuration
num_networks: 5
layer_sizes: [16,32,64]
estimator: # basic MLP layer configuration
class: 'ensemble'
model:
num_networks : 5
layer_sizes : [50]
optimizer:
class : 'Adam'
lr : 0.01
dataset: # for now, just use toy-dataset
class: 'toy'
func: 'toy_function_complex'
bounds : [0, 4]
bounds: [-6, -4, 1, 4]
sigmas: [0, 0, 0]
imbalance_ratios: [0.1, 0.9, 0.7]
batch_size : 128
test_ratio : 0.1
transforms:
x :
- class : standardize
y :
- class : standardize
train:
num_iter : 5000
print_every : 250
weighted : False
test:
batch_size : 32
logger:
type: 'wandb'
project: 'uncertainty-estimation'
entity: 'kbora'
name: 'Toy Dataset Complex Weighted'
train_type : iter
num_iter : 100
# logger:
# type: 'wandb'
# project: 'uncertainty-estimation'
# entity: 'kbora'
# name: 'Toy Dataset Complex Weighted'

45 changes: 28 additions & 17 deletions configs/xls_dataset.yaml
Original file line number Diff line number Diff line change
@@ -1,30 +1,41 @@
# Estimator configurations
estimator: # basic MLP layer configuration
class: 'ensemble'
model:
num_networks : 5
layer_sizes : [100]
num_networks : 5
network:
estimator_network:
- fc1 : {class : Linear, in_features : 13, out_features : 32}
- fc2 : {class : Linear, in_features : 32, out_features : 64}
- projection : {class : LinearVarianceNetworkHead, in_features : 64, out_features : 1}
predictor_network:
- fc1 : {class : Linear, in_features : 13, out_features : 32}
- fc2 : {class : Linear, in_features : 32, out_features : 64}
- projection : {class : Linear, in_features : 64, out_features : 1}
optimizer:
class : 'Adam'
lr : 0.01
dataset: # for now, just use toy-dataset

# Dataset configurations
dataset:
class: 'xls'
path: "regression_datasets/year_prediction.csv"
batch_size : 128
cv_split_num: 1
path: "regression_datasets/boston.csv"
batch_size : 512
cv_split_num: 10
test_ratio: 0.10
y_col : [0]
transforms:
x :
- class : standardize
- {class : standardize}
y :
- class : standardize
- {class : standardize}

# Training configurations
train:
train_type : epoch
num_iter : 40
print_every : 5
val_every: 10
logger:
type: 'wandb'
project: 'uncertainty-estimation'
entity: 'kbora'
name: 'Toy Dataset Complex Weighted'

# Logger configurations
# logger:
# type: 'wandb'
# project: 'uncertainty-estimation'
# entity: 'kbora'
# name: 'Toy Dataset Complex Weighted'
90 changes: 58 additions & 32 deletions datasets/toy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def complex(x):
class ToyDataset(Dataset):
def __init__(self, x, y):
self.x = np.array(x)
self.y = np.array(y)
self.y = None
if y is not None:
self.y = np.array(y)

def __len__(self):
"""
Expand All @@ -57,44 +59,46 @@ def __getitem__(self, idx):
"""
Transform the data to torch.Tensor
"""
return self.x[idx], self.y[idx]
if self.y is not None:
return self.x[idx], self.y[idx]
else:
return self.x[idx]

def make_toy_dataset(
func : Callable, **kwargs) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:

data_range = kwargs.get("data_range", 7)
data_step = kwargs.get("data_step", 0.001)
bounds = kwargs.get("bounds", (-2, 2))
sigmas = kwargs.get("sigmas", (0.1, 0.5))
test_ratio = kwargs.get("test_ratio", 0.2)
bounds = kwargs.get("bounds")
sigmas = kwargs.get("sigmas")
imbalance_ratios = kwargs.get("imbalance_ratios")

bound1, bound2 = bounds
data_sigma1, data_sigma2 = sigmas
assert len(bounds) - 1 == len(sigmas)
assert len(bounds) - 1 == len(imbalance_ratios)

data_x1 = np.arange(-data_range, bound1 + data_step, data_step)
data_x2 = np.arange(bound2, data_range + data_step, data_step)

data_x = np.concatenate((data_x1, data_x2))
data_x = np.reshape(data_x, [data_x.shape[0], 1])
x,y = [], []
for i in range(len(sigmas)):
bound = bounds[i]
next_bound = bounds[i+1]
sigma = sigmas[i]

data_y = np.zeros([data_x.shape[0], 1])
data_x = np.arange(bound, next_bound + data_step, data_step)
data_y = func(data_x) + np.random.normal(0, sigma, data_x.shape)

for i in range(data_x.shape[0]):
if (data_x[i,0] < bound1):
data_y[i, 0] = func(data_x[i,0]) + np.random.normal(0, data_sigma1)
else:
data_y[i, 0] = func(data_x[i,0]) + np.random.normal(0, data_sigma2)
imbalance_ratio = imbalance_ratios[i]
data_x, data_y = shuffle(data_x, data_y)

num_to_drop = int(data_x.shape[0] * imbalance_ratio)

data_x, data_y = shuffle(data_x, data_y)

num_train_data = int(data_x.shape[0] * (1 - test_ratio))
data_x = data_x[num_to_drop:] # we randomly dropped points
data_y = data_y[num_to_drop:]

train_x = data_x[:num_train_data, :]
train_y = data_y[:num_train_data, :]
test_x = data_x[num_train_data:, :]
test_y = data_y[num_train_data:, :]
x.extend(data_x)
y.extend(data_y)

return train_x, train_y, test_x, test_y
x = np.array(x, dtype=np.float32).reshape(-1,1)
y = np.array(y, dtype=np.float32).reshape(-1,1)
return x, y

def create_toy_dataloader(**kwargs):
"""
Expand All @@ -112,13 +116,35 @@ def create_toy_dataloader(**kwargs):
raise ValueError("Please provide a function to sample from.")
func = TOY_FUNC_REGISTRY[func]()

cv_split_num = kwargs.pop("cv_split_num", 1)
batch_size = kwargs.pop("batch_size", 32)
train_x, train_y, test_x, test_y = make_toy_dataset(func = func, **kwargs)
train_dataset = ToyDataset(train_x, train_y)
test_dataset = ToyDataset(test_x, test_y)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
return train_loader, test_loader, train_dataset, test_dataset
test_ratio = kwargs.pop("test_ratio", 0.2)
x,y = make_toy_dataset(func = func, **kwargs)

if batch_size == -1:
batch_size = x.shape[0]

num_train_data = int(x.shape[0] * (1 - test_ratio))

for i in range(cv_split_num):
data_x, data_y = shuffle(x, y)
train_x = data_x[:num_train_data, :]
train_y = data_y[:num_train_data, :]
test_x = data_x[num_train_data:, :]
test_y = data_y[num_train_data:, :]

plt.figure()
plt.scatter(train_x, train_y, s=1, c="b")
plt.scatter(test_x, test_y, s=1, c="r")
plt.title("Toy Data")
plt.savefig("figures/toy_data.png")

train_dataset = ToyDataset(train_x, train_y)
val_dataset = ToyDataset(test_x, test_y)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
yield train_loader, val_loader, train_dataset, val_dataset


def sample_toy_data(func : Callable, start : float, end : float, step : float):
Expand Down
3 changes: 3 additions & 0 deletions datasets/xls_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ def create_xls_dataloader(**kwargs):
x, y = parser.parse(x_col, y_col)
num_test = int(x.shape[0] * test_ratio)

if batch_size == -1:
batch_size = x.shape[0]

# create train and test data for cross validation
for i in range(cv_split_num):
x, y = shuffle(x, y)
Expand Down
2 changes: 1 addition & 1 deletion estimations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def create_estimator(cfg, **kwargs):
if estimator_name not in ESTIMATOR_REGISTRY:
raise ValueError(f"Unknown estimator : {estimator_name}")
estimator = ESTIMATOR_REGISTRY[estimator_name](
network_config=cfg.get("model"),
network_config=cfg.get("network"),
optimizer_config = cfg.get("optimizer"),
**kwargs)
return estimator
15 changes: 13 additions & 2 deletions estimations/base.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,29 @@
import typing
import torch
import abc


from models import create_model

class UncertaintyEstimator(object):
def init_estimator(self, **kwargs):
raise NotImplementedError

def init_predictor(self, **kwargs):
raise NotImplementedError

def train_estimator(self, **kwargs):
raise NotImplementedError

def test_estimator(self, **kwargs):
raise NotImplementedError

def train_predictor(self, **kwargs):
raise NotImplementedError

def test_predictor(self, **kwargs):
raise NotImplementedError

def _build_network(self, network_build_config, network_name):
return create_model(network_build_config.get(network_name, None))

def __repr__(self) -> str:
return self.__class__.__name__ + '()'
Loading

0 comments on commit da87464

Please sign in to comment.