diff --git a/README.md b/README.md index f5df02f..11a6009 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,4 @@ #### For the benchmarking framework, please visit the public folder. The revamped version is visible in the src folder -## This is an open source effort for benchmarking split inference. If you would like to be a contributor in this benchmarking effort then please reach out to abhi24@mit.edu - -### Contributors to the codebase so far - -#### 1. Abhishek Singh -#### 2. Justin Yu -#### 3. John Mose -#### 4. Rohan Sukumaran -#### 5. Emily Zhang -#### 6. Ethan Garza +## This is an open source effort for benchmarking split inference. diff --git a/src/algos/input_model_optimization.py b/src/algos/input_model_optimization.py index 2028211..d1d8baf 100644 --- a/src/algos/input_model_optimization.py +++ b/src/algos/input_model_optimization.py @@ -1,5 +1,5 @@ import json -from utils.config_utils import process_config +from utils.config_utils import combine_configs, process_config from algos.uniform_noise import UniformNoise from algos.nopeek import NoPeek from algos.simba_algo import SimbaAttack @@ -31,21 +31,24 @@ def initialize(self, config): self.attribute = config["attribute"] self.obf_model_name = config["target_model"] + self.img_size = config["img_size"] # load obfuscator model target_exp_config = json.load(open(config["target_model_config"])) #config_loader(config["model_config"]) system_config = json.load(open("./configs/system_config.json")) #config_loader(config["model_config"]) - target_config = process_config(system_config, target_exp_config) + target_exp_config["client"]["challenge"] = True + target_config = process_config(combine_configs(system_config, target_exp_config)) self.target_config = target_config from interface import load_algo self.obf_model = load_algo(target_config, self.utils) - wts_path = self.target_config["model_path"] + "/client_model.pt" - wts = torch.load(wts_path) - if isinstance(self.obf_model.client_model, torch.nn.DataParallel): # type: ignore - self.obf_model.client_model.module.load_state_dict(wts) - else: - self.obf_model.client_model.load_state_dict(wts) + if not config["target_model"] == "gaussian_blur": + wts_path = config["target_model_path"] + wts = torch.load(wts_path) + if isinstance(self.obf_model.client_model, torch.nn.DataParallel): # type: ignore + self.obf_model.client_model.module.load_state_dict(wts) + else: + self.obf_model.client_model.load_state_dict(wts) self.obf_model.enable_logs(False) self.obf_model.set_detached(False) @@ -131,7 +134,9 @@ def forward(self, items): rand_inp = rand_inp.to(self.utils.device) optim.zero_grad() - ys = gen_model(rand_inp)[:,:,:128, :128] + ys = gen_model(rand_inp) + # resize the width and height to self.image_size + ys = torch.nn.functional.interpolate(ys, size=(self.img_size, self.img_size), mode='bilinear', align_corners=True) out = self.obf_model({"x": ys}) loss = self.loss_fn(out, z) diff --git a/src/algos/input_optimization.py b/src/algos/input_optimization.py index 0225a22..f33e89e 100644 --- a/src/algos/input_optimization.py +++ b/src/algos/input_optimization.py @@ -1,5 +1,5 @@ import json -from utils.config_utils import process_config +from utils.config_utils import combine_configs, process_config from algos.uniform_noise import UniformNoise from algos.nopeek import NoPeek from algos.simba_algo import SimbaAttack @@ -16,21 +16,24 @@ def initialize(self, config): self.attribute = config["attribute"] self.model_name = config["target_model"] + self.img_size = config["img_size"] # load obfuscator model target_exp_config = json.load(open(config["target_model_config"])) #config_loader(config["model_config"]) system_config = json.load(open("./configs/system_config.json")) #config_loader(config["model_config"]) - target_config = process_config(system_config, target_exp_config) + target_exp_config["client"]["challenge"] = True + target_config = process_config(combine_configs(system_config, target_exp_config)) self.target_config = target_config from interface import load_algo self.model = load_algo(target_config, self.utils) - - wts_path = self.target_config["model_path"] + "/client_model.pt" - wts = torch.load(wts_path) - if isinstance(self.model.client_model, torch.nn.DataParallel): # type: ignore - self.model.client_model.module.load_state_dict(wts) - else: - self.model.client_model.load_state_dict(wts) + + if not config["target_model"] == "gaussian_blur": + wts_path = config["target_model_path"] + wts = torch.load(wts_path) + if isinstance(self.model.client_model, torch.nn.DataParallel): # type: ignore + self.model.client_model.module.load_state_dict(wts) + else: + self.model.client_model.load_state_dict(wts) self.metric = MetricLoader(data_range=1) @@ -89,7 +92,8 @@ def forward(self, items): for _ in range(self.iters): optim.zero_grad() - out = self.model({"x": ys}) + ys = torch.nn.functional.interpolate(ys, size=(self.img_size, self.img_size), mode='bilinear', align_corners=True) + out = self.model({"x": ys}) loss = self.loss_fn(out, z) ssim = self.metric.ssim(img, ys) diff --git a/src/algos/siamese_embedding.py b/src/algos/siamese_embedding.py index a27c2b0..13a9f7c 100644 --- a/src/algos/siamese_embedding.py +++ b/src/algos/siamese_embedding.py @@ -34,6 +34,10 @@ def __init__(self, config, utils) -> None: self.initialize(config) def initialize(self, config): + if config.get("challenge", False): + self.is_attacked = True + else: + self.is_attacked = False self.client_model = self.init_client_model(config) self.put_on_gpus() self.utils.register_model("client_model", self.client_model) @@ -47,11 +51,12 @@ def initialize(self, config): def forward(self, items): x = items["x"] - pred_lbls = items["pred_lbls"] self.z = self.client_model(x) - self.contrastive_loss = self.loss(self.z, pred_lbls) - self.utils.logger.add_entry(self.mode + "/" + self.ct_loss_tag, - self.contrastive_loss.item()) + if not self.is_attacked: + pred_lbls = items["pred_lbls"] + self.contrastive_loss = self.loss(self.z, pred_lbls) + self.utils.logger.add_entry(self.mode + "/" + self.ct_loss_tag, + self.contrastive_loss.item()) # z will be detached to prevent any grad flow from the client z = self.z.detach() z.requires_grad = True diff --git a/src/algos/supervised_decoder.py b/src/algos/supervised_decoder.py index 87a05b2..b87b847 100644 --- a/src/algos/supervised_decoder.py +++ b/src/algos/supervised_decoder.py @@ -1,3 +1,4 @@ +import torch from algos.simba_algo import SimbaAttack from models.image_decoder import Decoder from utils.metrics import MetricLoader @@ -11,6 +12,7 @@ def __init__(self, config, utils): def initialize(self, config): self.attribute = config["attribute"] self.metric = MetricLoader() + self.img_size = config["img_size"] if self.attribute == "data": self.loss_tag = "recons_loss" @@ -48,6 +50,7 @@ def initialize(self, config): def forward(self, items): z = items["z"] self.reconstruction = self.model(z) + ys = torch.nn.functional.interpolate(ys, size=(self.img_size, self.img_size), mode='bilinear', align_corners=True) self.orig = items["x"] self.loss = self.loss_fn(self.reconstruction, self.orig) diff --git a/src/configs/split_inference.json b/src/configs/split_inference.json index 9858a75..5c54420 100644 --- a/src/configs/split_inference.json +++ b/src/configs/split_inference.json @@ -1,6 +1,7 @@ { + "experiment_type": "time_profiling", "method": "split_inference", - "client": {"model_name": "resnet18", "split_layer": 6, + "client": {"model_name": "resnet18", "split_layer": 1, "pretrained": false, "optimizer": "adam", "lr": 3e-4}, "server": {"model_name": "resnet18", "split_layer": 6, "logits": 2, "pretrained": false, "lr": 3e-4, "optimizer": "adam"}, diff --git a/src/configs/system_config.json b/src/configs/system_config.json index 0c127a2..6d196d2 100644 --- a/src/configs/system_config.json +++ b/src/configs/system_config.json @@ -1,5 +1,5 @@ { - "dataset_path": "/home/justinyu/fairface/", - "experiments_folder": "/home/justinyu/experiments/", + "dataset_path": "/u/abhi24/Datasets/Faces/fairface/", + "experiments_folder": "/u/abhi24/Workspace/simba/experiments/", "gpu_devices": [1, 3] } diff --git a/src/data/dataset_utils.py b/src/data/dataset_utils.py index 5198178..ddaec54 100644 --- a/src/data/dataset_utils.py +++ b/src/data/dataset_utils.py @@ -91,17 +91,15 @@ def load_label(self): def __getitem__(self, index): filepath = self.indicies[index] - # Added all cases - Train, valid, challenge and when no arg is specified - if self.config["train"] is True: - filename = "train/"+str(filepath)+".jpg" - elif self.config["challenge"] is True: # check if this is actually present in the config file. If not, lets add it - (Rohan) - filename = "challenge/"+str(filepath)+".jpg" - elif self.config["val"] is True: - filename = "val/"+str(filepath)+".jpg" - else: - filename = filepath.split('/')[-1].split('.')[0] - + # if self.config["train"] is True: + # filename = "train/"+str(filepath)+".jpg" + # elif self.config.get("challenge", False): # check if this is actually present in the config file. If not, lets add it - (Rohan) + # filename = "challenge/"+str(filepath)+".jpg" + # else: + # filename = "val/"+str(filepath)+".jpg" + filename = str(filepath) + img = self.load_image(filepath) img = self.transforms(img) pred_label = self.load_label(filepath, "pred") @@ -111,9 +109,11 @@ def __getitem__(self, index): else: privacy_label = self.load_label(filepath, "privacy") privacy_label = self.to_tensor(privacy_label) + # print(img.shape, pred_label.shape, privacy_label.shape, filepath, filename) sample = {'img': img, 'prediction_label': pred_label, 'private_label': privacy_label, 'filepath': filepath, 'filename': filename} + return sample def __len__(self): return len(self.indicies) @@ -211,8 +211,10 @@ def __init__(self, config): self.data_to_run_on = None if config["train"] is True: + config["path"] += "/train" self.data_to_run_on = self.train_dict else: + config["path"] += "/val" self.data_to_run_on = self.val_dict super(Cifar10, self).__init__(config) @@ -244,10 +246,7 @@ def load_label(self, filepath, label_type): except: return 1, 1 - - - -class CelebA(datasets.CelebA): +class CelebA(datasets.CelebA, BaseDataset): def __init__(self, config): config = deepcopy(config) data_split = "train" if config["train"] else "valid" @@ -262,10 +261,10 @@ def __init__(self, config): 'wavy_hair': 33, 'big_nose': 7, 'mouth_open': 21} - if self.prediction_attribute in self.attr_indices.keys(): + if self.prediction_attribute in self.attr_indices.keys() or self.prediction_attribute == 'data': target_pred = 'attr' - else: - raise ValueError("Prediction Attribute {} is not supported.".format(self.prediction_attribute)) + # else: + # raise ValueError("Prediction Attribute {} is not supported.".format(self.prediction_attribute)) if self.protected_attribute in self.attr_indices.keys(): target_protect = 'attr' target_type = [target_pred, target_protect] @@ -313,10 +312,16 @@ def __init__(self, config): self.format = "pt" # hardcoded for now self.set_filepaths(config["challenge_dir"]) self.protected_attribute = config["protected_attribute"] - + self.config = config self.transforms = config["transforms"] if config["dataset"] == "fairface": self.dataset_obj = FairFace(config) + elif config["dataset"] == "celeba": + self.dataset_obj = CelebA(config) + self.dataset_obj.format = "jpg" + elif config["dataset"] == "cifar10": + self.dataset_obj = Cifar10(config) + self.dataset_obj.format = "jpg" else: print("not implemented yet") exit() @@ -334,11 +339,22 @@ def get_imgpath(self, filename): """ The challenge folder only consists of filename but the corresponding file in the dataset is obtained here """ - filename = "/" + filename + "." + self.dataset_obj.format - l = list(filter(lambda x: x.endswith(filename), - self.dataset_obj.filepaths)) - assert len(l) == 1 - return l[0] + if self.config["dataset"] == "celeba": + filename = os.path.join(self.dataset_obj.root, self.dataset_obj.base_folder, "img_align_celeba", filename + ".jpg") + return filename + elif self.config["dataset"] == "fairface": + filename = "/" + filename + "." + self.dataset_obj.format + l = list(filter(lambda x: x.endswith(filename), + self.dataset_obj.filepaths)) + assert len(l) == 1 + return l[0] + elif self.config["dataset"] == "cifar10": + filename = self.dataset_obj.indicies[int(filename)] + return filename + else: + print("not implemented yet", self.config["dataset"]) + exit() + def __getitem__(self, index): filepath = self.filepaths[index] @@ -347,7 +363,7 @@ def __getitem__(self, index): imgpath = self.get_imgpath(filename) img = self.load_image(imgpath) if self.protected_attribute == "data": - privacy_label = self.dataset_obj.transforms(img) + privacy_label = self.transforms(img) else: privacy_label = self.load_label(imgpath, "privacy") privacy_label = self.to_tensor(privacy_label) diff --git a/src/data/download_pytorch_dataset.py b/src/data/download_pytorch_dataset.py index 97c9d1b..cf30060 100644 --- a/src/data/download_pytorch_dataset.py +++ b/src/data/download_pytorch_dataset.py @@ -21,8 +21,12 @@ def translate_into_images(dataset, output_dir, prefix=None): new_dir = os.path.join(output_dir, prefix) if not os.path.exists(new_dir): os.mkdir(new_dir) + skip=False if not os.path.exists(output_dir): os.mkdir(output_dir) + else: + if os.listdir(output_dir): + skip=True num_of_images = dataset.data.shape[0] filenames = [] filepaths = [] @@ -31,8 +35,10 @@ def translate_into_images(dataset, output_dir, prefix=None): if prefix: filename = os.path.join(prefix, filename) filepath = os.path.join(output_dir, filename) - img = Image.fromarray(dataset.data[i]) - img.save(filepath) + if not skip: + print(filepath) + img = Image.fromarray(dataset.data[i]) + img.save(filepath) filenames.append(filename) filepaths.append(filepath) return filenames, filepaths @@ -92,20 +98,20 @@ def load_cifar_as_dict(output_dir): trainset, valset = load_CIFAR10_dataset(output_dir) - train_files, train_filepaths = translate_into_images(trainset, output_dir, "train") - val_files, val_filepaths = translate_into_images(valset, output_dir, "val") + # train_files, train_filepaths = translate_into_images(trainset, output_dir, "train") + # val_files, val_filepaths = translate_into_images(valset, output_dir, "val") train_labels = load_labels(trainset) val_labels = load_labels(valset) train_dict = dict() train_dict["set"] = trainset - train_dict["file"] = train_files + # train_dict["file"] = train_files train_dict["animated"] = map_class_to_animated(trainset) train_dict["class"] = train_labels val_dict = dict() val_dict["set"] = valset - val_dict["file"] = val_files + # val_dict["file"] = val_files val_dict["animated"] = map_class_to_animated(valset) val_dict["class"] = val_labels @@ -113,6 +119,6 @@ def load_cifar_as_dict(output_dir): if __name__ == '__main__': - base_dir = "/home/mit6_91621/cybop/" + base_dir = "./datasets/" output_dir = base_dir + "cifar10" save_and_organize_cifar10_dataset(output_dir) diff --git a/src/scheduler.py b/src/scheduler.py index 0f9cc28..6390217 100644 --- a/src/scheduler.py +++ b/src/scheduler.py @@ -1,5 +1,6 @@ +import time from interface import (load_config, load_algo, load_data, load_model, load_utils) -import torch, random, numpy +import torch, random, numpy as np class Scheduler(): def __init__(self) -> None: @@ -17,7 +18,7 @@ def initialize(self) -> None: # set seeds seed = self.config["seed"] - torch.manual_seed(seed); random.seed(seed); numpy.random.seed(seed) + torch.manual_seed(seed); random.seed(seed); np.random.seed(seed) if not self.config["experiment_type"] == "challenge": self.utils.copy_source_code(self.config["results_path"]) @@ -41,6 +42,8 @@ def run_job(self): self.run_defense_job() elif exp_type == "attack": self.run_attack_job() + elif exp_type == "time_profiling": + self.algorithm_time_profiling() else: print("unknown experiment type") @@ -99,6 +102,28 @@ def attack_test(self): z = self.algo.forward(items) self.utils.save_images(z,items["filename"]) + def algorithm_time_profiling(self): + self.algo.eval() + total_iter, warmup_iter, counter = 20, 20, [] + self.utils.logger.register_tag("runtime_mean") + self.utils.logger.register_tag("runtime_std") + + for it, sample in enumerate(self.dataloader.train): + items = self.utils.get_data(sample) + if it > warmup_iter + total_iter: + break + if it > warmup_iter: + start = time.perf_counter() + _ = self.algo.forward(items) + end = time.perf_counter() + counter.append(end - start) + else: + _ = self.algo.forward(items) + counter = np.array(counter) + self.utils.logger.log_data("runtime_mean", counter.mean(), 1) + self.utils.logger.log_data("runtime_std", counter.std(), 1) + self.utils.logger.flush_epoch() + def epoch_summary(self): self.utils.logger.flush_epoch() self.utils.save_models() diff --git a/src/utils/config_utils.py b/src/utils/config_utils.py index ed45fa4..da553a0 100644 --- a/src/utils/config_utils.py +++ b/src/utils/config_utils.py @@ -33,14 +33,15 @@ def process_config(json_dict): json_dict['num_gpus'] = len(json_dict.get('gpu_devices')) json_dict['train_batch_size'] = json_dict.get('training_batch_size', 64) * json_dict['num_gpus'] json_dict['experiment_type'] = json_dict.get('experiment_type') or "defense" - json_dict['seed'] = json_dict.get('seed') or 1 + if json_dict.get('seed') or not json_dict.get('client') or not json_dict['client'].get('challenge'): + json_dict['seed'] = json_dict.get('seed') or 1 if 'manual_expt_name' in json_dict.keys(): '''serves two use cases - generating challenge for past experiments which followed different naming convention. The other case is when we want to transfer a pretrained obfuscation network to a different client model.''' experiment_name = json_dict['manual_expt_name'] - elif json_dict["experiment_type"] in ["defense", "challenge"]: + elif json_dict["experiment_type"] in ["defense", "challenge", "time_profiling"]: experiment_name = "{}_{}_{}_{}_split{}_{}".format( json_dict['method'], json_dict['dataset'], @@ -71,7 +72,10 @@ def process_config(json_dict): "/challenge/" experiments_folder = json_dict["experiments_folder"] - results_path = experiments_folder + experiment_name + f"_seed{json_dict['seed']}" + if json_dict.get('seed') or not json_dict.get('client') or not json_dict['client'].get('challenge'): + results_path = experiments_folder + experiment_name + f"_seed{json_dict['seed']}" + else: + results_path = experiments_folder + experiment_name log_path = results_path + "/logs/" images_path = results_path + "/images/"