From 9dfa1e64ca9b2edaeaa5640d58740d748da292d0 Mon Sep 17 00:00:00 2001 From: Doguk Kim <39759898+logue311@users.noreply.github.com> Date: Wed, 5 Dec 2018 14:39:02 +0900 Subject: [PATCH 1/3] Monte Carlo method - Randomly train the hyper-network - Randomly select architectures and compute validation acc - Select nodes that have largest avg val acc --- cnn/genotypes.py | 2 +- cnn/model_search.py | 85 ++++++++++------- cnn/train_search.py | 221 ++++++++++++++++++++++++++++++-------------- 3 files changed, 207 insertions(+), 101 deletions(-) diff --git a/cnn/genotypes.py b/cnn/genotypes.py index 7849f6a..2e4e172 100644 --- a/cnn/genotypes.py +++ b/cnn/genotypes.py @@ -3,7 +3,7 @@ Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat') PRIMITIVES = [ - 'none', + #'none', 'max_pool_3x3', 'avg_pool_3x3', 'skip_connect', diff --git a/cnn/model_search.py b/cnn/model_search.py index d923aae..56a2482 100644 --- a/cnn/model_search.py +++ b/cnn/model_search.py @@ -5,6 +5,7 @@ from torch.autograd import Variable from genotypes import PRIMITIVES from genotypes import Genotype +import random class MixedOp(nn.Module): @@ -18,7 +19,11 @@ def __init__(self, C, stride): self._ops.append(op) def forward(self, x, weights): - return sum(w * op(x) for w, op in zip(weights, self._ops)) + for w, op in zip(weights, self._ops): + if w > 0: + return w * op(x) + + return w * op(x) class Cell(nn.Module): @@ -52,15 +57,8 @@ def forward(self, s0, s1, weights, fixed_weights): states = [s0, s1] offset = 0 - for i in range(self._steps): - if i < self._steps-1: - s = sum(self._ops[offset+j](h, fixed_weights[offset+j]) for j, h in enumerate(states)) - else: - #if len(states) > 4: - #for j, h in enumerate(states): - #st = self._ops[offset+j](h, weights[offset+j]) - #print (st.size()) - s = sum(self._ops[offset+j](h, weights[offset+j]) for j, h in enumerate(states)) + for i in range(self._steps): + s = sum(self._ops[offset+j](h, weights[offset+j]) for j, h in enumerate(states)) offset += len(states) states.append(s) @@ -69,7 +67,7 @@ def forward(self, s0, s1, weights, fixed_weights): class Network(nn.Module): - def __init__(self, C, num_classes, layers, criterion, steps=1, multiplier=1, stem_multiplier=3): + def __init__(self, C, num_classes, layers, criterion, steps=4, multiplier=4, stem_multiplier=3): super(Network, self).__init__() self._C = C self._num_classes = num_classes @@ -122,27 +120,30 @@ def forward(self, input): #normal_weights = Variable(normal_weights, requires_grad=True) #reduce_weights = torch.from_numpy(reduce_weights).cuda() #reduce_weights = Variable(reduce_weights, requires_grad=True) - if self._steps > 1: - fixed_normal_edges, fixed_reduce_edges = self.fixed_edges - k = sum(1 for i in range(self._steps-1) for n in range(2+i)) - fixed_normal_weights = torch.zeros((k,len(PRIMITIVES))) - fixed_reduce_weights = torch.zeros((k,len(PRIMITIVES))) - for i, edge in enumerate(fixed_reduce_edges): - fixed_reduce_weights[edge[1]][edge[0]] = 1.0 - for i, edge in enumerate(fixed_normal_edges): - fixed_normal_weights[edge[1]][edge[0]] = 1.0 - fixed_reduce_weights = Variable(fixed_reduce_weights, requires_grad=False).cuda() - fixed_normal_weights = Variable(fixed_normal_weights, requires_grad=False).cuda() - else: - fixed_reduce_weights = None - fixed_normal_weights = None + + #if self._steps > 1: + #fixed_normal_edges, fixed_reduce_edges = self.fixed_edges + #k = sum(1 for i in range(self._steps-1) for n in range(2+i)) + #fixed_normal_weights = torch.zeros((k,len(PRIMITIVES))) + #fixed_reduce_weights = torch.zeros((k,len(PRIMITIVES))) + #for i, edge in enumerate(fixed_reduce_edges): + #fixed_reduce_weights[edge[1]][edge[0]] = 1.0 + #for i, edge in enumerate(fixed_normal_edges): + #fixed_normal_weights[edge[1]][edge[0]] = 1.0 + #fixed_reduce_weights = Variable(fixed_reduce_weights, requires_grad=False).cuda() + #fixed_normal_weights = Variable(fixed_normal_weights, requires_grad=False).cuda() + #else: + fixed_reduce_weights = None + fixed_normal_weights = None for i, cell in enumerate(self.cells): if cell.reduction: - weights = F.softmax(self.alphas_reduce, dim=-1) + #weights = F.softmax(self.alphas_reduce, dim=-1) + weights = self.alphas_reduce fixed_weights = fixed_reduce_weights #weights = reduce_weights else: - weights = F.softmax(self.alphas_normal, dim=-1) + #weights = F.softmax(self.alphas_normal, dim=-1) + weights = self.alphas_normal fixed_weights = fixed_normal_weights #weights = normal_weights s0, s1 = s1, cell(s0, s1, weights, fixed_weights) @@ -158,10 +159,10 @@ def _initialize_alphas(self): k = sum(1 for i in range(self._steps) for n in range(2+i)) num_ops = len(PRIMITIVES) - self.alphas_normal = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True) - self.alphas_reduce = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True) - #self.alphas_normal = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=True) - #self.alphas_reduce = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=True) + #self.alphas_normal = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True) + #self.alphas_reduce = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True) + self.alphas_normal = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + self.alphas_reduce = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) self._arch_parameters = [ self.alphas_normal, self.alphas_reduce, @@ -274,4 +275,24 @@ def _compute_weight_from_alphas(self): for i, edge in enumerate(fixed_reduce_edges): reduce_weights[edge[1]][edge[0]] = 1.0 - return (normal_weights, reduce_weights) \ No newline at end of file + return (normal_weights, reduce_weights) + + def generate_random_alphas(self): + self._initialize_alphas() + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + indices = list(range(start, end)) + random.shuffle(indices) + for e in indices[:2]: + selected_op = random.randint(0, len(PRIMITIVES)-1) + self.alphas_normal[e][selected_op] = 1.0 + + random.shuffle(indices) + for e in indices[:2]: + selected_op = random.randint(0, len(PRIMITIVES)-1) + self.alphas_reduce[e][selected_op] = 1.0 + + start = end + n += 1 \ No newline at end of file diff --git a/cnn/train_search.py b/cnn/train_search.py index 8ed9fb3..1a7d5e8 100644 --- a/cnn/train_search.py +++ b/cnn/train_search.py @@ -12,22 +12,25 @@ import torch.nn.functional as F import torchvision.datasets as dset import torch.backends.cudnn as cudnn +import pickle from torch.autograd import Variable from model_search import Network from architect import Architect +from genotypes import PRIMITIVES +from genotypes import Genotype parser = argparse.ArgumentParser("cifar") parser.add_argument('--data', type=str, default='../data', help='location of the data corpus') parser.add_argument('--batch_size', type=int, default=256, help='batch size') -parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate') +parser.add_argument('--learning_rate', type=float, default=0.05, help='init learning rate') parser.add_argument('--learning_rate_min', type=float, default=0.001, help='min learning rate') parser.add_argument('--momentum', type=float, default=0.9, help='momentum') parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay') parser.add_argument('--report_freq', type=float, default=50, help='report frequency') parser.add_argument('--gpu', type=int, default=0, help='gpu device id') -parser.add_argument('--epochs', type=int, default=1, help='num of training epochs') +parser.add_argument('--epochs', type=int, default=300, help='num of training epochs') parser.add_argument('--init_channels', type=int, default=16, help='num of init channels') parser.add_argument('--layers', type=int, default=8, help='total number of layers') parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model') @@ -42,7 +45,7 @@ parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding') parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding') #Added -parser.add_argument('--max_hidden_node_num', type=int, default=5, help='max number of hidden nodes in a cell') +parser.add_argument('--max_hidden_node_num', type=int, default=1, help='max number of hidden nodes in a cell') args = parser.parse_args() args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) @@ -57,19 +60,39 @@ CIFAR_CLASSES = 10 - - -def main(): +model_path = 'search-EXP-20181203-165707/weights.pt' +node_num = 4 +search_epoch = 100 + +def w_parse(weights): + gene = [] + n = 2 + start = 0 + for i in range(node_num): + end = start + n + W = weights[start:end].copy() + edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x]))))[:2] + for j in edges: + k_best = None + for k in range(len(W[j])): + if k_best is None or W[j][k] > W[j][k_best]: + k_best = k + gene.append((PRIMITIVES[k_best], j)) + start = end + n += 1 + return gene + +def main(seed): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) - np.random.seed(args.seed) + np.random.seed(seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True - torch.manual_seed(args.seed) + torch.manual_seed(seed) cudnn.enabled=True - torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed(seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) @@ -83,7 +106,8 @@ def main(): model.parameters(), args.learning_rate, momentum=args.momentum, - weight_decay=args.weight_decay) + weight_decay=args.weight_decay, + nesterov=True) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) @@ -109,59 +133,71 @@ def main(): optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) - - for node_nums in range(args.max_hidden_node_num): - for epoch in range(args.epochs): - scheduler.step() - lr = scheduler.get_lr()[0] - logging.info('epoch %d lr %e', epoch, lr) - - genotype = model.genotype() - logging.info('genotype = %s', genotype) - - #print(F.softmax(model.alphas_normal, dim=-1)) - #print(F.softmax(model.alphas_reduce, dim=-1)) - normal_weights, reduce_weights = model._compute_weight_from_alphas() - #normal_weights = torch.from_numpy(normal_weights) - #reduce_weights = torch.from_numpy(reduce_weights) - print(normal_weights) - print(reduce_weights) - - # training - train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) - logging.info('train_acc %f', train_acc) + #model.load_state_dict(torch.load(model_path)) - # validation - if (epoch+1) % 1 == 0: - valid_acc, valid_obj = infer(valid_queue, model, criterion) - logging.info('valid_acc %f', valid_acc) + for epoch in range(args.epochs): + scheduler.step() + lr = scheduler.get_lr()[0] + logging.info('epoch %d lr %e', epoch, lr) + + #genotype = model.genotype() + #logging.info('genotype = %s', genotype) + + #print(model.alphas_normal) + #print(model.alphas_reduce) + #normal_weights, reduce_weights = model._compute_weight_from_alphas() + #normal_weights = torch.from_numpy(normal_weights) + #reduce_weights = torch.from_numpy(reduce_weights) + #print(normal_weights) + #print(reduce_weights) + + # training + train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) + logging.info('train_acc %f', train_acc) + + # validation + if (epoch+1) % 5 == 0: + valid_acc, valid_obj = infer(valid_queue, model, criterion) + logging.info('valid_acc %f', valid_acc) + + utils.save(model, os.path.join(args.save, 'weights.pt')) - utils.save(model, os.path.join(args.save, 'weights.pt')) + k = sum(1 for i in range(node_num) for n in range(2+i)) + num_ops = len(PRIMITIVES) + acc_mat_normal = np.zeros((k, num_ops)) + cnt_mat_normal = np.zeros((k, num_ops)) + acc_mat_reduce = np.zeros((k, num_ops)) + cnt_mat_reduce = np.zeros((k, num_ops)) + np.set_printoptions(precision=3) + for epoch in range(search_epoch): + print ('Search epoch: ', epoch) + model.generate_random_alphas() + acc_normal, cnt_normal, acc_reduce, cnt_reduce = infer_for_search(valid_queue, model) - model.add_node_to_cell() - model = model.cuda() + acc_mat_normal = acc_mat_normal + acc_normal + cnt_mat_normal = cnt_mat_normal + cnt_normal + acc_mat_reduce = acc_mat_reduce + acc_reduce + cnt_mat_reduce = cnt_mat_reduce + cnt_reduce - optimizer = torch.optim.SGD( - model.parameters(), - args.learning_rate, - momentum=args.momentum, - weight_decay=args.weight_decay) + avg_acc_normal = np.divide(acc_mat_normal, cnt_mat_normal) + avg_acc_normal[np.isnan(avg_acc_normal)] = 0 + avg_acc_normal[np.isinf(avg_acc_normal)] = 0 - if node_nums != 2: - batch_size = int(batch_size/2) - train_queue = torch.utils.data.DataLoader( - train_data, batch_size=batch_size, - sampler=train_sampler, - pin_memory=False, num_workers=0) - valid_queue = torch.utils.data.DataLoader( - train_data, batch_size=batch_size, - sampler=valid_sampler, - pin_memory=False, num_workers=0) + avg_acc_reduce = np.divide(acc_mat_reduce, cnt_mat_reduce) + avg_acc_reduce[np.isnan(avg_acc_reduce)] = 0 + avg_acc_reduce[np.isinf(avg_acc_reduce)] = 0 - scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( - optimizer, float(args.epochs), eta_min=args.learning_rate_min) - architect.reset_optimizer() - + print ('Avg accuracies') + print (avg_acc_normal) + print (avg_acc_reduce, '\n') + + gene_normal = w_parse(avg_acc_normal) + gene_reduce = w_parse(avg_acc_reduce) + print ('Normal genotype: ', gene_normal) + print ('Reduce genotype: ', gene_reduce, '\n') + + with open(os.path.join(args.save, 'search_results.pkl'), 'wb') as f: + pickle.dump((acc_mat_normal, cnt_mat_normal, acc_mat_reduce, cnt_mat_reduce), f) def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr): objs = utils.AvgrageMeter() @@ -176,13 +212,16 @@ def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr): target = Variable(target, requires_grad=False).cuda(async=True) # get a random minibatch from the search queue with replacement - input_search, target_search = next(iter(valid_queue)) - input_search = Variable(input_search, requires_grad=False).cuda() - target_search = Variable(target_search, requires_grad=False).cuda(async=True) + #input_search, target_search = next(iter(valid_queue)) + #input_search = Variable(input_search, requires_grad=False).cuda() + #target_search = Variable(target_search, requires_grad=False).cuda(async=True) - architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) + #architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() + model.generate_random_alphas() + #print(model.alphas_normal) + #print(model.alphas_reduce) logits = model(input) loss = criterion(logits, target) @@ -197,23 +236,38 @@ def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr): if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) - if step >= 50: - break + #if step >= 50: + #break return top1.avg, objs.avg +def train_arch(train_queue, valid_queue, model, architect, criterion, optimizer, lr): + for step, (input, target) in enumerate(train_queue): + model.train() + n = input.size(0) + + input = Variable(input, requires_grad=False).cuda() + target = Variable(target, requires_grad=False).cuda(async=True) + + # get a random minibatch from the search queue with replacement + input_search, target_search = next(iter(valid_queue)) + input_search = Variable(input_search, requires_grad=False).cuda() + target_search = Variable(target_search, requires_grad=False).cuda(async=True) + + architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() - model.eval() + model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = Variable(input).cuda() target = Variable(target).cuda(async=True) - + + model.generate_random_alphas() logits = model(input) loss = criterion(logits, target) @@ -225,12 +279,43 @@ def infer(valid_queue, model, criterion): if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) - if step >= 50: - break + #if step >= 50: + #break return top1.avg, objs.avg +def infer_for_search(valid_queue, model): + model.eval() + + k = sum(1 for i in range(node_num) for n in range(2+i)) + num_ops = len(PRIMITIVES) + acc_mat_normal = np.zeros((k, num_ops)) + cnt_mat_normal = np.zeros((k, num_ops)) + acc_mat_reduce = np.zeros((k, num_ops)) + cnt_mat_reduce = np.zeros((k, num_ops)) + with torch.no_grad(): + for step, (input, target) in enumerate(valid_queue): + input = Variable(input).cuda() + target = Variable(target).cuda(async=True) + + model.generate_random_alphas() + logits = model(input) + prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) + valid_acc = prec1.data.cpu().numpy() + + normal_gene = model.alphas_normal.data.cpu().numpy() + cnt_mat_normal = cnt_mat_normal + normal_gene + acc_mat_normal = acc_mat_normal + normal_gene * valid_acc + + reduce_gene = model.alphas_reduce.data.cpu().numpy() + cnt_mat_reduce = cnt_mat_reduce + reduce_gene + acc_mat_reduce = acc_mat_reduce + reduce_gene * valid_acc + + return acc_mat_normal, cnt_mat_normal, acc_mat_reduce, cnt_mat_reduce + if __name__ == '__main__': - main() + main(1) + #main(4) + #main(5) From d2217c01e0b1a9b2af51ebd9f07feca8b31f1072 Mon Sep 17 00:00:00 2001 From: HeungChangLee <36104855+HeungChangLee@users.noreply.github.com> Date: Wed, 2 Jan 2019 14:43:30 +0900 Subject: [PATCH 2/3] max_picker --- cnn/model_search_max_picker.py | 398 +++++++++++++++++++++++++++++++++ cnn/train_search_max_picker.py | 373 ++++++++++++++++++++++++++++++ 2 files changed, 771 insertions(+) create mode 100644 cnn/model_search_max_picker.py create mode 100644 cnn/train_search_max_picker.py diff --git a/cnn/model_search_max_picker.py b/cnn/model_search_max_picker.py new file mode 100644 index 0000000..0299bdc --- /dev/null +++ b/cnn/model_search_max_picker.py @@ -0,0 +1,398 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from operations import * +from torch.autograd import Variable +from genotypes import PRIMITIVES +from genotypes import Genotype +import random + +class MixedOp(nn.Module): + + def __init__(self, C, stride): + super(MixedOp, self).__init__() + self._ops = nn.ModuleList() + for primitive in PRIMITIVES: + op = OPS[primitive](C, stride, False) + if 'pool' in primitive: + op = nn.Sequential(op, nn.BatchNorm2d(C, affine=False)) + self._ops.append(op) + + def forward(self, x, weights): + #return sum(w * op(x) for w, op in zip(weights, self._ops)) + for w, op in zip(weights, self._ops): + if w.data.cpu().numpy() > 0.0: + return w * op(x) + + return w * op(x) + + +class Cell(nn.Module): + + def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): + super(Cell, self).__init__() + self.reduction = reduction + #Added + #self.C = C + self.reduction_prev = reduction_prev + + if reduction_prev: + self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) + else: + self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) + self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) + self._steps = steps + self._multiplier = multiplier + + self._ops = nn.ModuleList() + self._bns = nn.ModuleList() + for i in range(self._steps): + for j in range(2+i): + stride = 2 if reduction and j < 2 else 1 + op = MixedOp(C, stride) + self._ops.append(op) + + def forward(self, s0, s1, weights): + s0 = self.preprocess0(s0) + s1 = self.preprocess1(s1) + + states = [s0, s1] + offset = 0 + for i in range(self._steps): + s = sum(self._ops[offset+j](h, weights[offset+j]) for j, h in enumerate(states)) + offset += len(states) + states.append(s) + + return torch.cat(states[-self._multiplier:], dim=1) + + +class Network(nn.Module): + + def __init__(self, C, num_classes, layers, criterion, steps=4, multiplier=4, stem_multiplier=3): + super(Network, self).__init__() + self._C = C + self._num_classes = num_classes + self._layers = layers + self._criterion = criterion + self._steps = steps + self._multiplier = multiplier + #Added + self.fixed_edges = None + self.init_C = stem_multiplier*C + + C_curr = stem_multiplier*C + self.stem = nn.Sequential( + nn.Conv2d(3, C_curr, 3, padding=1, bias=False), + nn.BatchNorm2d(C_curr) + ) + + C_prev_prev, C_prev, C_curr = C_curr, C_curr, C + self.cells = nn.ModuleList() + reduction_prev = False + for i in range(layers): + if i in [layers//3, 2*layers//3]: + C_curr *= 2 + reduction = True + else: + reduction = False + cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) + reduction_prev = reduction + self.cells += [cell] + C_prev_prev, C_prev = C_prev, multiplier*C_curr + + self.global_pooling = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Linear(C_prev, num_classes) + + self._initialize_alphas() + self.edge_comb_dict = {} + for i in range(self._steps): + count = 0 + temp_dict = {} + for j in range(i+2): + for k in range(j+1): + temp_dict[count] = (k, j+1) + count += 1 + self.edge_comb_dict[i] = temp_dict + + def new(self): + model_new = Network(self._C, self._num_classes, self._layers, self._criterion).cuda() + for x, y in zip(model_new.arch_parameters(), self.arch_parameters()): + x.data.copy_(y.data) + return model_new + + def forward(self, input): + s0 = s1 = self.stem(input) + #Added + #normal_weights, reduce_weights = self._compute_weight_from_alphas() + #normal_weights = F.softmax(self.alphas_normal, dim=-1) + #reduce_weights = F.softmax(self.alphas_reduce, dim=-1) + #normal_weights = torch.from_numpy(normal_weights).cuda() + #normal_weights = Variable(normal_weights, requires_grad=True) + #reduce_weights = torch.from_numpy(reduce_weights).cuda() + #reduce_weights = Variable(reduce_weights, requires_grad=True) + + for i, cell in enumerate(self.cells): + if cell.reduction: + #weights = F.softmax(self.alphas_reduce, dim=-1) + weights = self.sampled_weight_normal + #weights = reduce_weights + else: + #weights = F.softmax(self.alphas_normal, dim=-1) + weights = self.sampled_weight_reduce + #weights = normal_weights + s0, s1 = s1, cell(s0, s1, weights) + out = self.global_pooling(s1) + logits = self.classifier(out.view(out.size(0),-1)) + return logits + + def _loss(self, input, target): + logits = self(input) + return self._criterion(logits, target) + + def _initialize_alphas(self): + k = sum(1 for i in range(self._steps) for n in range(2+i)) + num_ops = len(PRIMITIVES) + + self.alphas_normal = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True) + self.alphas_reduce = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True) + #self.alphas_normal = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + #self.alphas_reduce = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + self.edge_alphas_normal = [] + self.edge_alphas_reduce = [] + for i in range(self._steps-1): + self.edge_alphas_normal.append(Variable(1e-3*torch.randn(sum(j for j in range(i+3))).cuda(), requires_grad=True)) + self.edge_alphas_reduce.append(Variable(1e-3*torch.randn(sum(j for j in range(i+3))).cuda(), requires_grad=True)) + + self._arch_parameters = [ + self.alphas_normal, + self.alphas_reduce, + ] + self._arch_parameters.extend(self.edge_alphas_normal) + self._arch_parameters.extend(self.edge_alphas_reduce) + + def sample_new_architecture(self): + selected_normal_edges = [0, 1] + selected_reduce_edges = [0, 1] + criterion = nn.CrossEntropyLoss() + criterion = criterion.cuda() + log_prob_list = [] + for i in range(len(self.edge_alphas_normal)): + comb_dict = self.edge_comb_dict[i] + for j in range(2): + if j == 0: + edges = self.edge_alphas_normal[i] + else: + edges = self.edge_alphas_reduce[i] + edge_probs = F.softmax(edges) + #sampled = torch.multinomial(edge_probs, 1) + sampled = torch.max(edge_probs, -1)[1] + log_prob_list.append(criterion(edge_probs.view(1, edge_probs.size(0)), sampled)) + edge_comb = int(sampled) + edge1, edge2 = comb_dict[edge_comb] + + if j == 0: + selected_normal_edges.append(edge1) + selected_normal_edges.append(edge2) + else: + selected_reduce_edges.append(edge1) + selected_reduce_edges.append(edge2) + + + k = sum(1 for i in range(self._steps) for n in range(2+i)) + num_ops = len(PRIMITIVES) + self.sampled_weight_normal = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + self.sampled_weight_reduce = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + for j in range(2): + offset = 0 + for i in range(self._steps): + for e in range(2): + if j == 0: + edge_index = offset+selected_normal_edges[i*2+e] + op_weights = self.alphas_normal[edge_index] + else: + edge_index = offset+selected_reduce_edges[i*2+e] + op_weights = self.alphas_reduce[edge_index] + op_probs = F.softmax(op_weights) + #sampled = torch.multinomial(op_probs, 1) + sampled = torch.max(op_probs, -1)[1] + log_prob_list.append(criterion(op_probs.view(1, op_probs.size(0)), sampled)) + if j == 0: + self.sampled_weight_normal[edge_index, int(sampled)] = 1.0 + else: + self.sampled_weight_reduce[edge_index, int(sampled)] = 1.0 + offset += (i+2) + + self.sample_log_probs = torch.sum(torch.cat(log_prob_list)) + + def arch_parameters(self): + return self._arch_parameters + + def genotype(self): + + def _parse(weights): + gene = [] + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + W = weights[start:end].copy() + edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] + for j in edges: + k_best = None + for k in range(len(W[j])): + if k != PRIMITIVES.index('none'): + if k_best is None or W[j][k] > W[j][k_best]: + k_best = k + gene.append((PRIMITIVES[k_best], j)) + start = end + n += 1 + return gene + + #gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).data.cpu().numpy()) + #gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).data.cpu().numpy()) + + normal_weights, reduce_weights = self._compute_weight_from_alphas() + gene_normal = _parse(normal_weights.data.cpu().numpy()) + gene_reduce = _parse(reduce_weights.data.cpu().numpy()) + + concat = range(2+self._steps-self._multiplier, self._steps+2) + genotype = Genotype( + normal=gene_normal, normal_concat=concat, + reduce=gene_reduce, reduce_concat=concat + ) + return genotype + + def add_node_to_cell(self): + #Add new node to cells + self._multiplier += 1 + C_prev_prev, C_prev, C_curr = self.init_C, self.init_C, self._C + for i, cell in enumerate(self.cells): + if i in [self._layers//3, 2*self._layers//3]: + C_curr *= 2 + + if cell.reduction_prev: + cell.preprocess0 = FactorizedReduce(C_prev_prev, C_curr, affine=False) + else: + cell.preprocess0 = ReLUConvBN(C_prev_prev, C_curr, 1, 1, 0, affine=False) + cell.preprocess1 = ReLUConvBN(C_prev, C_curr, 1, 1, 0, affine=False) + + node_num = cell._steps + for j in range(2+node_num): + stride = 2 if cell.reduction and j < 2 else 1 + op = MixedOp(C_curr, stride) + cell._ops.append(op) + cell._steps += 1 + cell._multiplier += 1 + + C_prev_prev, C_prev = C_prev, self._multiplier*C_curr + + self.classifier = nn.Linear(C_prev, self._num_classes) + + def _alpha_idx_parse(weights): + gene = [] + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + W = weights[start:end].copy() + edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] + for j in edges: + k_best = None + for k in range(len(W[j])): + if k != PRIMITIVES.index('none'): + if k_best is None or W[j][k] > W[j][k_best]: + k_best = k + gene.append((k_best, start+j)) + start = end + n += 1 + return gene + + normal_weights, reduce_weights = self._compute_weight_from_alphas() + fixed_normal_edges = _alpha_idx_parse(normal_weights.data.cpu().numpy()) + fixed_reduce_edges = _alpha_idx_parse(reduce_weights.data.cpu().numpy()) + self.fixed_edges = (fixed_normal_edges, fixed_reduce_edges) + self._steps += 1 + self._initialize_alphas() + + def _compute_weight_from_alphas(self): + if self.fixed_edges == None: + return (F.softmax(self.alphas_normal, dim=-1), F.softmax(self.alphas_reduce, dim=-1)) + else: + fixed_normal_edges, fixed_reduce_edges = self.fixed_edges + normal_weights = F.softmax(self.alphas_normal, dim=-1) + k = sum(1 for i in range(self._steps-1) for n in range(2+i)) + temp = torch.zeros((k,len(PRIMITIVES))) + normal_weights[:k] = temp + for i, edge in enumerate(fixed_normal_edges): + normal_weights[edge[1]][edge[0]] = 1.0 + + reduce_weights = F.softmax(self.alphas_reduce, dim=-1) + reduce_weights[:k] = temp + for i, edge in enumerate(fixed_reduce_edges): + reduce_weights[edge[1]][edge[0]] = 1.0 + + return (normal_weights, reduce_weights) + + def generate_random_alphas(self): #, is_uniform, max_normal, max_reduce): + self._initialize_alphas() + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + indices = list(range(start, end)) + random.shuffle(indices) + for e in indices[:2]: + selected_op = random.randint(0, len(PRIMITIVES)-1) + self.alphas_normal[e, selected_op] = 1.0 + + random.shuffle(indices) + for e in indices[:2]: + selected_op = random.randint(0, len(PRIMITIVES)-1) + self.alphas_reduce[e, selected_op] = 1.0 + + start = end + n += 1 + + def generate_droppath_alphas(self): #, is_uniform, max_normal, max_reduce): + self._initialize_alphas() + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + indices = list(range(start, end)) + op_list = list(range(len(PRIMITIVES))) + drop_prop = 0.25 + remain_op_num = int(len(PRIMITIVES)*(1-drop_prop)) + for e in indices: + random.shuffle(op_list) + for o in op_list[:remain_op_num]: + self.alphas_normal[e, o] = 1.0 + + random.shuffle(op_list) + for o in op_list[:remain_op_num]: + self.alphas_reduce[e, o] = 1.0 + + start = end + n += 1 + + def generate_fixed_alphas(self): #, is_uniform, max_normal, max_reduce): + self._initialize_alphas() + fixed_normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)] + fixed_reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)] + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + for e in fixed_normal[i*2:i*2+2]: + edge_idx = e[1] + start + op_idx = PRIMITIVES.index(e[0]) + self.alphas_normal[edge_idx, op_idx] = 1.0 + + for e in fixed_reduce[i*2:i*2+2]: + edge_idx = e[1] + start + op_idx = PRIMITIVES.index(e[0]) + self.alphas_reduce[edge_idx, op_idx] = 1.0 + + start = end + n += 1 + \ No newline at end of file diff --git a/cnn/train_search_max_picker.py b/cnn/train_search_max_picker.py new file mode 100644 index 0000000..c35ac13 --- /dev/null +++ b/cnn/train_search_max_picker.py @@ -0,0 +1,373 @@ +import os +import sys +import time +import glob +import numpy as np +import torch +import utils +import logging +import argparse +import torch.nn as nn +import torch.utils +import torch.nn.functional as F +import torchvision.datasets as dset +import torch.backends.cudnn as cudnn +import pickle +import time +import pandas as pd +import random + +from torch.autograd import Variable +from model_search import Network +from architect import Architect + +from genotypes import PRIMITIVES +from genotypes import Genotype + +parser = argparse.ArgumentParser("cifar") +parser.add_argument('--data', type=str, default='../data', help='location of the data corpus') +parser.add_argument('--batch_size', type=int, default=128, help='batch size') +parser.add_argument('--learning_rate', type=float, default=0.05, help='init learning rate') +parser.add_argument('--learning_rate_min', type=float, default=0.001, help='min learning rate') +parser.add_argument('--momentum', type=float, default=0.9, help='momentum') +parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay') +parser.add_argument('--report_freq', type=float, default=50, help='report frequency') +parser.add_argument('--gpu', type=int, default=0, help='gpu device id') +parser.add_argument('--epochs', type=int, default=300, help='num of training epochs') +parser.add_argument('--init_channels', type=int, default=16, help='num of init channels') +parser.add_argument('--layers', type=int, default=8, help='total number of layers') +parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model') +parser.add_argument('--cutout', action='store_true', default=False, help='use cutout') +parser.add_argument('--cutout_length', type=int, default=16, help='cutout length') +parser.add_argument('--drop_path_prob', type=float, default=0.3, help='drop path probability') +parser.add_argument('--save', type=str, default='EXP', help='experiment name') +parser.add_argument('--seed', type=int, default=1, help='random seed') +parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping') +parser.add_argument('--train_portion', type=float, default=0.8, help='portion of training data') +parser.add_argument('--unrolled', action='store_true', default=False, help='use one-step unrolled validation loss') +parser.add_argument('--arch_learning_rate', type=float, default=3.5e-4, help='learning rate for arch encoding') +parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding') +#Added +parser.add_argument('--max_hidden_node_num', type=int, default=1, help='max number of hidden nodes in a cell') +args = parser.parse_args() + +args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) +utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) + +log_format = '%(asctime)s %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.INFO, + format=log_format, datefmt='%m/%d %I:%M:%S %p') +fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) +fh.setFormatter(logging.Formatter(log_format)) +logging.getLogger().addHandler(fh) + + +CIFAR_CLASSES = 10 +model_path = 'search-RL-100iter/weights.pt' +node_num = 4 +search_epoch = 1 #35*100/2 +search_arch_num = 100 + +def w_parse(weights): + gene = [] + n = 2 + start = 0 + for i in range(node_num): + end = start + n + W = weights[start:end].copy() + edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x]))))[:2] + for j in edges: + k_best = None + for k in range(len(W[j])): + if k_best is None or W[j][k] > W[j][k_best]: + k_best = k + gene.append((PRIMITIVES[k_best], j)) + start = end + n += 1 + return gene + +def main(seed): + if not torch.cuda.is_available(): + logging.info('no gpu device available') + sys.exit(1) + + np.random.seed(seed) + torch.cuda.set_device(args.gpu) + cudnn.benchmark = True + torch.manual_seed(seed) + cudnn.enabled=True + torch.cuda.manual_seed(seed) + logging.info('gpu device = %d' % args.gpu) + logging.info("args = %s", args) + + criterion = nn.CrossEntropyLoss() + criterion = criterion.cuda() + model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) + model = model.cuda() + logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) + + optimizer = torch.optim.SGD( + model.parameters(), + args.learning_rate, + momentum=args.momentum, + weight_decay=args.weight_decay, + nesterov=True) + + train_transform, valid_transform = utils._data_transforms_cifar10(args) + train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) + + num_train = len(train_data) + indices = list(range(num_train)) + split = int(np.floor(args.train_portion * num_train)) + + batch_size = args.batch_size + train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[:split]) + valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]) + train_queue = torch.utils.data.DataLoader( + train_data, batch_size=batch_size, + sampler=train_sampler, + pin_memory=False, num_workers=0) + + valid_queue = torch.utils.data.DataLoader( + train_data, batch_size=batch_size, + sampler=valid_sampler, + pin_memory=False, num_workers=0) + + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, float(args.epochs), eta_min=args.learning_rate_min) + + architect = Architect(model, args) + model.load_state_dict(torch.load(model_path)) + + start_time = time.time() + best_arch_search(valid_queue, model) +''' + for epoch in range(args.epochs): + scheduler.step() + lr = scheduler.get_lr()[0] + logging.info('epoch %d lr %e', epoch, lr) + + #genotype = model.genotype() + #logging.info('genotype = %s', genotype) + + #print(model.alphas_normal) + #print(model.alphas_reduce) + #normal_weights, reduce_weights = model._compute_weight_from_alphas() + #normal_weights = torch.from_numpy(normal_weights) + #reduce_weights = torch.from_numpy(reduce_weights) + #print(normal_weights) + #print(reduce_weights) + + # training + train_acc, train_obj = train(train_queue, valid_queue, model, criterion, optimizer, lr) + logging.info('train_acc %f', train_acc) + + valid_acc = train_arch(train_queue, valid_queue, model, architect, criterion, optimizer, lr) + logging.info('valid_acc %f', valid_acc) + + # validation + #if (epoch+1) % 1 == 0: + #valid_acc, valid_obj = infer(valid_queue, model, criterion) + #logging.info('valid_acc %f', valid_acc) + + utils.save(model, os.path.join(args.save, 'weights.pt')) +''' + + #best_arch_search(valid_queue, model) + +''' + k = sum(1 for i in range(node_num) for n in range(2+i)) + num_ops = len(PRIMITIVES) + acc_mat_normal = np.zeros((k, num_ops)) + cnt_mat_normal = np.zeros((k, num_ops)) + acc_mat_reduce = np.zeros((k, num_ops)) + cnt_mat_reduce = np.zeros((k, num_ops)) + np.set_printoptions(precision=3) + #for epoch in range(search_epoch): + nop = True + if nop: + print ('Search epoch: ', epoch) + #model.generate_random_alphas() + model.generate_fixed_alphas() + valid_acc = infer_for_search(valid_queue, model) + + #valid_acc = valid_acc.data.cpu().numpy() + + normal_gene = model.alphas_normal.data.cpu().numpy() + cnt_mat_normal = cnt_mat_normal + normal_gene + acc_mat_normal = acc_mat_normal + normal_gene * valid_acc + + reduce_gene = model.alphas_reduce.data.cpu().numpy() + cnt_mat_reduce = cnt_mat_reduce + reduce_gene + acc_mat_reduce = acc_mat_reduce + reduce_gene * valid_acc + + avg_acc_normal = np.divide(acc_mat_normal, cnt_mat_normal) + avg_acc_normal[np.isnan(avg_acc_normal)] = 0 + avg_acc_normal[np.isinf(avg_acc_normal)] = 0 + + avg_acc_reduce = np.divide(acc_mat_reduce, cnt_mat_reduce) + avg_acc_reduce[np.isnan(avg_acc_reduce)] = 0 + avg_acc_reduce[np.isinf(avg_acc_reduce)] = 0 + + if (epoch+1) % 1 == 0: + print ('Avg accuracies') + print (avg_acc_normal) + print (avg_acc_reduce, '\n') + + gene_normal = w_parse(avg_acc_normal) + gene_reduce = w_parse(avg_acc_reduce) + print ('Normal genotype: ', gene_normal) + print ('Reduce genotype: ', gene_reduce) + elapsed_time = time.time() - start_time + elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time)) + print('Elapsed time: ', elapsed_time, '\n') + + #with open(os.path.join(args.save, 'search_results.pkl'), 'wb') as f: + #pickle.dump((acc_mat_normal, cnt_mat_normal, acc_mat_reduce, cnt_mat_reduce), f) ''' + +def train(train_queue, valid_queue, model, criterion, optimizer, lr): + objs = utils.AvgrageMeter() + top1 = utils.AvgrageMeter() + top5 = utils.AvgrageMeter() + + for step, (input, target) in enumerate(train_queue): + model.train() + n = input.size(0) + + input = Variable(input, requires_grad=False).cuda() + target = Variable(target, requires_grad=False).cuda(async=True) + + # get a random minibatch from the search queue with replacement + #input_search, target_search = next(iter(valid_queue)) + #input_search = Variable(input_search, requires_grad=False).cuda() + #target_search = Variable(target_search, requires_grad=False).cuda(async=True) + + #architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) + + optimizer.zero_grad() + #model.generate_droppath_alphas() + model.sample_new_architecture() + #print(model.sampled_weight_normal) + #print(model.sampled_weight_reduce) + logits = model(input) + loss = criterion(logits, target) + + loss.backward() + nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) + optimizer.step() + + prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) + objs.update(loss.data[0], n) + top1.update(prec1.data[0], n) + top5.update(prec5.data[0], n) + + if step % args.report_freq == 0: + logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) + #if step >= 50: + #break + + return top1.avg, objs.avg + +def train_arch(train_queue, valid_queue, model, architect, criterion, optimizer, lr): + top1 = utils.AvgrageMeter() + for step, (input, target) in enumerate(valid_queue): + model.eval() + n = input.size(0) + + input = Variable(input, requires_grad=False).cuda() + target = Variable(target, requires_grad=False).cuda(async=True) + + # get a random minibatch from the search queue with replacement + model.sample_new_architecture() + if step == 0: + for e in model.edge_alphas_normal: + print(F.softmax(e, dim=-1)) + for e in model.edge_alphas_reduce: + print(F.softmax(e, dim=-1)) + print(F.softmax(model.alphas_normal, dim=-1)) + print(F.softmax(model.alphas_reduce, dim=-1)) + print(architect.baseline) + logits = model(input) + architect.step(input, target, lr, optimizer, unrolled=args.unrolled) + prec1, _ = utils.accuracy(logits, target, topk=(1, 5)) + n = input.size(0) + top1.update(prec1.data[0], n) + + return top1.avg + + +def infer(valid_queue, model, criterion): + objs = utils.AvgrageMeter() + top1 = utils.AvgrageMeter() + top5 = utils.AvgrageMeter() + model.eval() + + #with torch.no_grad(): + for step, (input, target) in enumerate(valid_queue): + input = Variable(input).cuda() + target = Variable(target).cuda(async=True) + + #model.generate_random_alphas() + #model.generate_droppath_alphas() + model.sample_new_architecture() + logits = model(input) + loss = criterion(logits, target) + + prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) + n = input.size(0) + objs.update(loss.data[0], n) + top1.update(prec1.data[0], n) + top5.update(prec5.data[0], n) + + if step % args.report_freq == 0: + logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) + #if step >= 50: + #break + + return top1.avg, objs.avg + +def infer_for_search(valid_queue, model): + top1 = utils.AvgrageMeter() + model.eval() + + #with torch.no_grad(): + for step, (input, target) in enumerate(valid_queue): + input = Variable(input).cuda() + target = Variable(target).cuda(async=True) + + #model.generate_random_alphas() + logits = model(input) + + prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) + n = input.size(0) + top1.update(prec1.data[0], n) + + #if step >= 50: + #break + + return top1.avg + +def best_arch_search(valid_queue, model): + model.eval() + result_df = pd.DataFrame(columns = ['Normal', 'Reduce', 'Val_acc']) + for i in range(search_arch_num): + input_search, target_search = next(iter(valid_queue)) + if input_search.size(0) != args.batch_size: + input_search, target_search = next(iter(valid_queue)) + input_search = Variable(input_search, requires_grad=False).cuda() + target_search = Variable(target_search, requires_grad=False).cuda(async=True) + + model.sample_new_architecture() + logits = model(input_search) + + prec1, _ = utils.accuracy(logits, target_search, topk=(1, 5)) + gene_normal = w_parse(model.sampled_weight_normal.data.cpu().numpy()) + gene_reduce = w_parse(model.sampled_weight_reduce.data.cpu().numpy()) + temp_df = pd.DataFrame([[gene_normal, gene_reduce, prec1.data[0]]], columns = ['Normal', 'Reduce', 'Val_acc']) + result_df = result_df.append(temp_df, ignore_index=True) + result_df = result_df.sort_values(by='Val_acc', ascending=False) + result_df.to_csv('search_result.csv') + + +if __name__ == '__main__': + main(2) \ No newline at end of file From e82c2b70d3ad189079c63d4074fe5fdacc925e16 Mon Sep 17 00:00:00 2001 From: HeungChangLee <36104855+HeungChangLee@users.noreply.github.com> Date: Wed, 2 Jan 2019 21:10:23 +0900 Subject: [PATCH 3/3] average_max_picker --- cnn/model_search_avg_max.py | 399 +++++++++++++++++++++++++++++++++++ cnn/train_search_avg_max.py | 405 ++++++++++++++++++++++++++++++++++++ 2 files changed, 804 insertions(+) create mode 100644 cnn/model_search_avg_max.py create mode 100644 cnn/train_search_avg_max.py diff --git a/cnn/model_search_avg_max.py b/cnn/model_search_avg_max.py new file mode 100644 index 0000000..561cb00 --- /dev/null +++ b/cnn/model_search_avg_max.py @@ -0,0 +1,399 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from operations import * +from torch.autograd import Variable +from genotypes import PRIMITIVES +from genotypes import Genotype +import random + +class MixedOp(nn.Module): + + def __init__(self, C, stride): + super(MixedOp, self).__init__() + self._ops = nn.ModuleList() + for primitive in PRIMITIVES: + op = OPS[primitive](C, stride, False) + if 'pool' in primitive: + op = nn.Sequential(op, nn.BatchNorm2d(C, affine=False)) + self._ops.append(op) + + def forward(self, x, weights): + #return sum(w * op(x) for w, op in zip(weights, self._ops)) + for w, op in zip(weights, self._ops): + if w.data.cpu().numpy() > 0.0: + return w * op(x) + + return w * op(x) + + +class Cell(nn.Module): + + def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev): + super(Cell, self).__init__() + self.reduction = reduction + #Added + #self.C = C + self.reduction_prev = reduction_prev + + if reduction_prev: + self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False) + else: + self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False) + self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False) + self._steps = steps + self._multiplier = multiplier + + self._ops = nn.ModuleList() + self._bns = nn.ModuleList() + for i in range(self._steps): + for j in range(2+i): + stride = 2 if reduction and j < 2 else 1 + op = MixedOp(C, stride) + self._ops.append(op) + + def forward(self, s0, s1, weights): + s0 = self.preprocess0(s0) + s1 = self.preprocess1(s1) + + states = [s0, s1] + offset = 0 + for i in range(self._steps): + s = sum(self._ops[offset+j](h, weights[offset+j]) for j, h in enumerate(states)) + offset += len(states) + states.append(s) + + return torch.cat(states[-self._multiplier:], dim=1) + + +class Network(nn.Module): + + def __init__(self, C, num_classes, layers, criterion, steps=4, multiplier=4, stem_multiplier=3): + super(Network, self).__init__() + self._C = C + self._num_classes = num_classes + self._layers = layers + self._criterion = criterion + self._steps = steps + self._multiplier = multiplier + #Added + + self.fixed_edges = None + self.init_C = stem_multiplier*C + + C_curr = stem_multiplier*C + self.stem = nn.Sequential( + nn.Conv2d(3, C_curr, 3, padding=1, bias=False), + nn.BatchNorm2d(C_curr) + ) + + C_prev_prev, C_prev, C_curr = C_curr, C_curr, C + self.cells = nn.ModuleList() + reduction_prev = False + for i in range(layers): + if i in [layers//3, 2*layers//3]: + C_curr *= 2 + reduction = True + else: + reduction = False + cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev) + reduction_prev = reduction + self.cells += [cell] + C_prev_prev, C_prev = C_prev, multiplier*C_curr + + self.global_pooling = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Linear(C_prev, num_classes) + + self._initialize_alphas() + self.edge_comb_dict = {} + for i in range(self._steps): + count = 0 + temp_dict = {} + for j in range(i+2): + for k in range(j+1): + temp_dict[count] = (k, j+1) + count += 1 + self.edge_comb_dict[i] = temp_dict + + def new(self): + model_new = Network(self._C, self._num_classes, self._layers, self._criterion).cuda() + for x, y in zip(model_new.arch_parameters(), self.arch_parameters()): + x.data.copy_(y.data) + return model_new + + def forward(self, input): + s0 = s1 = self.stem(input) + #Added + #normal_weights, reduce_weights = self._compute_weight_from_alphas() + #normal_weights = F.softmax(self.alphas_normal, dim=-1) + #reduce_weights = F.softmax(self.alphas_reduce, dim=-1) + #normal_weights = torch.from_numpy(normal_weights).cuda() + #normal_weights = Variable(normal_weights, requires_grad=True) + #reduce_weights = torch.from_numpy(reduce_weights).cuda() + #reduce_weights = Variable(reduce_weights, requires_grad=True) + + for i, cell in enumerate(self.cells): + if cell.reduction: + #weights = F.softmax(self.alphas_reduce, dim=-1) + weights = self.sampled_weight_normal + #weights = reduce_weights + else: + #weights = F.softmax(self.alphas_normal, dim=-1) + weights = self.sampled_weight_reduce + #weights = normal_weights + s0, s1 = s1, cell(s0, s1, weights) + out = self.global_pooling(s1) + logits = self.classifier(out.view(out.size(0),-1)) + return logits + + def _loss(self, input, target): + logits = self(input) + return self._criterion(logits, target) + + def _initialize_alphas(self): + k = sum(1 for i in range(self._steps) for n in range(2+i)) + num_ops = len(PRIMITIVES) + + self.alphas_normal = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True) + self.alphas_reduce = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True) + #self.alphas_normal = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + #self.alphas_reduce = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + self.edge_alphas_normal = [] + self.edge_alphas_reduce = [] + for i in range(self._steps-1): + self.edge_alphas_normal.append(Variable(1e-3*torch.randn(sum(j for j in range(i+3))).cuda(), requires_grad=True)) + self.edge_alphas_reduce.append(Variable(1e-3*torch.randn(sum(j for j in range(i+3))).cuda(), requires_grad=True)) + + self._arch_parameters = [ + self.alphas_normal, + self.alphas_reduce, + ] + self._arch_parameters.extend(self.edge_alphas_normal) + self._arch_parameters.extend(self.edge_alphas_reduce) + + def sample_new_architecture(self): + selected_normal_edges = [0, 1] + selected_reduce_edges = [0, 1] + criterion = nn.CrossEntropyLoss() + criterion = criterion.cuda() + log_prob_list = [] + for i in range(len(self.edge_alphas_normal)): + comb_dict = self.edge_comb_dict[i] + for j in range(2): + if j == 0: + edges = self.edge_alphas_normal[i] + else: + edges = self.edge_alphas_reduce[i] + edge_probs = F.softmax(edges) + sampled = torch.multinomial(edge_probs, 1) + #sampled = torch.max(edge_probs, -1)[1] + log_prob_list.append(criterion(edge_probs.view(1, edge_probs.size(0)), sampled)) + edge_comb = int(sampled) + edge1, edge2 = comb_dict[edge_comb] + + if j == 0: + selected_normal_edges.append(edge1) + selected_normal_edges.append(edge2) + else: + selected_reduce_edges.append(edge1) + selected_reduce_edges.append(edge2) + + + k = sum(1 for i in range(self._steps) for n in range(2+i)) + num_ops = len(PRIMITIVES) + self.sampled_weight_normal = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + self.sampled_weight_reduce = Variable(1e-3*torch.zeros(k, num_ops).cuda(), requires_grad=False) + for j in range(2): + offset = 0 + for i in range(self._steps): + for e in range(2): + if j == 0: + edge_index = offset+selected_normal_edges[i*2+e] + op_weights = self.alphas_normal[edge_index] + else: + edge_index = offset+selected_reduce_edges[i*2+e] + op_weights = self.alphas_reduce[edge_index] + op_probs = F.softmax(op_weights) + sampled = torch.multinomial(op_probs, 1) + #sampled = torch.max(op_probs, -1)[1] + log_prob_list.append(criterion(op_probs.view(1, op_probs.size(0)), sampled)) + if j == 0: + self.sampled_weight_normal[edge_index, int(sampled)] = 1.0 + else: + self.sampled_weight_reduce[edge_index, int(sampled)] = 1.0 + offset += (i+2) + + self.sample_log_probs = torch.sum(torch.cat(log_prob_list)) + + def arch_parameters(self): + return self._arch_parameters + + def genotype(self): + + def _parse(weights): + gene = [] + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + W = weights[start:end].copy() + edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] + for j in edges: + k_best = None + for k in range(len(W[j])): + if k != PRIMITIVES.index('none'): + if k_best is None or W[j][k] > W[j][k_best]: + k_best = k + gene.append((PRIMITIVES[k_best], j)) + start = end + n += 1 + return gene + + #gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).data.cpu().numpy()) + #gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).data.cpu().numpy()) + + normal_weights, reduce_weights = self._compute_weight_from_alphas() + gene_normal = _parse(normal_weights.data.cpu().numpy()) + gene_reduce = _parse(reduce_weights.data.cpu().numpy()) + + concat = range(2+self._steps-self._multiplier, self._steps+2) + genotype = Genotype( + normal=gene_normal, normal_concat=concat, + reduce=gene_reduce, reduce_concat=concat + ) + return genotype + + def add_node_to_cell(self): + #Add new node to cells + self._multiplier += 1 + C_prev_prev, C_prev, C_curr = self.init_C, self.init_C, self._C + for i, cell in enumerate(self.cells): + if i in [self._layers//3, 2*self._layers//3]: + C_curr *= 2 + + if cell.reduction_prev: + cell.preprocess0 = FactorizedReduce(C_prev_prev, C_curr, affine=False) + else: + cell.preprocess0 = ReLUConvBN(C_prev_prev, C_curr, 1, 1, 0, affine=False) + cell.preprocess1 = ReLUConvBN(C_prev, C_curr, 1, 1, 0, affine=False) + + node_num = cell._steps + for j in range(2+node_num): + stride = 2 if cell.reduction and j < 2 else 1 + op = MixedOp(C_curr, stride) + cell._ops.append(op) + cell._steps += 1 + cell._multiplier += 1 + + C_prev_prev, C_prev = C_prev, self._multiplier*C_curr + + self.classifier = nn.Linear(C_prev, self._num_classes) + + def _alpha_idx_parse(weights): + gene = [] + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + W = weights[start:end].copy() + edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2] + for j in edges: + k_best = None + for k in range(len(W[j])): + if k != PRIMITIVES.index('none'): + if k_best is None or W[j][k] > W[j][k_best]: + k_best = k + gene.append((k_best, start+j)) + start = end + n += 1 + return gene + + normal_weights, reduce_weights = self._compute_weight_from_alphas() + fixed_normal_edges = _alpha_idx_parse(normal_weights.data.cpu().numpy()) + fixed_reduce_edges = _alpha_idx_parse(reduce_weights.data.cpu().numpy()) + self.fixed_edges = (fixed_normal_edges, fixed_reduce_edges) + self._steps += 1 + self._initialize_alphas() + + def _compute_weight_from_alphas(self): + if self.fixed_edges == None: + return (F.softmax(self.alphas_normal, dim=-1), F.softmax(self.alphas_reduce, dim=-1)) + else: + fixed_normal_edges, fixed_reduce_edges = self.fixed_edges + normal_weights = F.softmax(self.alphas_normal, dim=-1) + k = sum(1 for i in range(self._steps-1) for n in range(2+i)) + temp = torch.zeros((k,len(PRIMITIVES))) + normal_weights[:k] = temp + for i, edge in enumerate(fixed_normal_edges): + normal_weights[edge[1]][edge[0]] = 1.0 + + reduce_weights = F.softmax(self.alphas_reduce, dim=-1) + reduce_weights[:k] = temp + for i, edge in enumerate(fixed_reduce_edges): + reduce_weights[edge[1]][edge[0]] = 1.0 + + return (normal_weights, reduce_weights) + + def generate_random_alphas(self): #, is_uniform, max_normal, max_reduce): + self._initialize_alphas() + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + indices = list(range(start, end)) + random.shuffle(indices) + for e in indices[:2]: + selected_op = random.randint(0, len(PRIMITIVES)-1) + self.alphas_normal[e, selected_op] = 1.0 + + random.shuffle(indices) + for e in indices[:2]: + selected_op = random.randint(0, len(PRIMITIVES)-1) + self.alphas_reduce[e, selected_op] = 1.0 + + start = end + n += 1 + + def generate_droppath_alphas(self): #, is_uniform, max_normal, max_reduce): + self._initialize_alphas() + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + indices = list(range(start, end)) + op_list = list(range(len(PRIMITIVES))) + drop_prop = 0.25 + remain_op_num = int(len(PRIMITIVES)*(1-drop_prop)) + for e in indices: + random.shuffle(op_list) + for o in op_list[:remain_op_num]: + self.alphas_normal[e, o] = 1.0 + + random.shuffle(op_list) + for o in op_list[:remain_op_num]: + self.alphas_reduce[e, o] = 1.0 + + start = end + n += 1 + + def generate_fixed_alphas(self): #, is_uniform, max_normal, max_reduce): + self._initialize_alphas() + fixed_normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)] + fixed_reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)] + n = 2 + start = 0 + for i in range(self._steps): + end = start + n + for e in fixed_normal[i*2:i*2+2]: + edge_idx = e[1] + start + op_idx = PRIMITIVES.index(e[0]) + self.alphas_normal[edge_idx, op_idx] = 1.0 + + for e in fixed_reduce[i*2:i*2+2]: + edge_idx = e[1] + start + op_idx = PRIMITIVES.index(e[0]) + self.alphas_reduce[edge_idx, op_idx] = 1.0 + + start = end + n += 1 + \ No newline at end of file diff --git a/cnn/train_search_avg_max.py b/cnn/train_search_avg_max.py new file mode 100644 index 0000000..3f67543 --- /dev/null +++ b/cnn/train_search_avg_max.py @@ -0,0 +1,405 @@ +import os +import sys +import time +import glob +import numpy as np +import torch +import utils +import logging +import argparse +import torch.nn as nn +import torch.utils +import torch.nn.functional as F +import torchvision.datasets as dset +import torch.backends.cudnn as cudnn +import pickle +import time +import pandas as pd +import random + +from torch.autograd import Variable +from model_search import Network +from architect import Architect + +from genotypes import PRIMITIVES +from genotypes import Genotype + +parser = argparse.ArgumentParser("cifar") +parser.add_argument('--data', type=str, default='../data', help='location of the data corpus') +parser.add_argument('--batch_size', type=int, default=128, help='batch size') +parser.add_argument('--learning_rate', type=float, default=0.05, help='init learning rate') +parser.add_argument('--learning_rate_min', type=float, default=0.001, help='min learning rate') +parser.add_argument('--momentum', type=float, default=0.9, help='momentum') +parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay') +parser.add_argument('--report_freq', type=float, default=50, help='report frequency') +parser.add_argument('--gpu', type=int, default=0, help='gpu device id') +parser.add_argument('--epochs', type=int, default=300, help='num of training epochs') +parser.add_argument('--init_channels', type=int, default=16, help='num of init channels') +parser.add_argument('--layers', type=int, default=8, help='total number of layers') +parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model') +parser.add_argument('--cutout', action='store_true', default=False, help='use cutout') +parser.add_argument('--cutout_length', type=int, default=16, help='cutout length') +parser.add_argument('--drop_path_prob', type=float, default=0.3, help='drop path probability') +parser.add_argument('--save', type=str, default='EXP', help='experiment name') +parser.add_argument('--seed', type=int, default=1, help='random seed') +parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping') +parser.add_argument('--train_portion', type=float, default=0.8, help='portion of training data') +parser.add_argument('--unrolled', action='store_true', default=False, help='use one-step unrolled validation loss') +parser.add_argument('--arch_learning_rate', type=float, default=3.5e-4, help='learning rate for arch encoding') +parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding') +#Added +parser.add_argument('--max_hidden_node_num', type=int, default=1, help='max number of hidden nodes in a cell') +args = parser.parse_args() + +args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) +utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) + +log_format = '%(asctime)s %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.INFO, + format=log_format, datefmt='%m/%d %I:%M:%S %p') +fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) +fh.setFormatter(logging.Formatter(log_format)) +logging.getLogger().addHandler(fh) + + +CIFAR_CLASSES = 10 +model_path = 'search-RL-100iter/weights.pt' +node_num = 4 +search_epoch = 1 #35*100/2 +search_arch_num = 1000 + +def w_parse(weights): + gene = [] + n = 2 + start = 0 + for i in range(node_num): + end = start + n + W = weights[start:end].copy() + edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x]))))[:2] + for j in edges: + k_best = None + for k in range(len(W[j])): + if k_best is None or W[j][k] > W[j][k_best]: + k_best = k + gene.append((PRIMITIVES[k_best], j)) + start = end + n += 1 + return gene + +def main(seed): + if not torch.cuda.is_available(): + logging.info('no gpu device available') + sys.exit(1) + + np.random.seed(seed) + torch.cuda.set_device(args.gpu) + cudnn.benchmark = True + torch.manual_seed(seed) + cudnn.enabled=True + torch.cuda.manual_seed(seed) + logging.info('gpu device = %d' % args.gpu) + logging.info("args = %s", args) + + criterion = nn.CrossEntropyLoss() + criterion = criterion.cuda() + model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) + model = model.cuda() + logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) + + optimizer = torch.optim.SGD( + model.parameters(), + args.learning_rate, + momentum=args.momentum, + weight_decay=args.weight_decay, + nesterov=True) + + train_transform, valid_transform = utils._data_transforms_cifar10(args) + train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) + + num_train = len(train_data) + indices = list(range(num_train)) + split = int(np.floor(args.train_portion * num_train)) + + batch_size = args.batch_size + train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[:split]) + valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]) + train_queue = torch.utils.data.DataLoader( + train_data, batch_size=batch_size, + sampler=train_sampler, + pin_memory=False, num_workers=0) + + valid_queue = torch.utils.data.DataLoader( + train_data, batch_size=batch_size, + sampler=valid_sampler, + pin_memory=False, num_workers=0) + + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, float(args.epochs), eta_min=args.learning_rate_min) + + architect = Architect(model, args) + model.load_state_dict(torch.load(model_path)) + + start_time = time.time() + best_arch_search(valid_queue, model) + print(model) +''' + for epoch in range(args.epochs): + scheduler.step() + lr = scheduler.get_lr()[0] + logging.info('epoch %d lr %e', epoch, lr) + + #genotype = model.genotype() + #logging.info('genotype = %s', genotype) + + #print(model.alphas_normal) + #print(model.alphas_reduce) + #normal_weights, reduce_weights = model._compute_weight_from_alphas() + #normal_weights = torch.from_numpy(normal_weights) + #reduce_weights = torch.from_numpy(reduce_weights) + #print(normal_weights) + #print(reduce_weights) + + # training + train_acc, train_obj = train(train_queue, valid_queue, model, criterion, optimizer, lr) + logging.info('train_acc %f', train_acc) + + valid_acc = train_arch(train_queue, valid_queue, model, architect, criterion, optimizer, lr) + logging.info('valid_acc %f', valid_acc) + + # validation + #if (epoch+1) % 1 == 0: + #valid_acc, valid_obj = infer(valid_queue, model, criterion) + #logging.info('valid_acc %f', valid_acc) + + utils.save(model, os.path.join(args.save, 'weights.pt')) +''' + + #best_arch_search(valid_queue, model) + +''' + k = sum(1 for i in range(node_num) for n in range(2+i)) + num_ops = len(PRIMITIVES) + acc_mat_normal = np.zeros((k, num_ops)) + cnt_mat_normal = np.zeros((k, num_ops)) + acc_mat_reduce = np.zeros((k, num_ops)) + cnt_mat_reduce = np.zeros((k, num_ops)) + np.set_printoptions(precision=3) + #for epoch in range(search_epoch): + nop = True + if nop: + print ('Search epoch: ', epoch) + #model.generate_random_alphas() + model.generate_fixed_alphas() + valid_acc = infer_for_search(valid_queue, model) + + #valid_acc = valid_acc.data.cpu().numpy() + + normal_gene = model.alphas_normal.data.cpu().numpy() + cnt_mat_normal = cnt_mat_normal + normal_gene + acc_mat_normal = acc_mat_normal + normal_gene * valid_acc + + reduce_gene = model.alphas_reduce.data.cpu().numpy() + cnt_mat_reduce = cnt_mat_reduce + reduce_gene + acc_mat_reduce = acc_mat_reduce + reduce_gene * valid_acc + + avg_acc_normal = np.divide(acc_mat_normal, cnt_mat_normal) + avg_acc_normal[np.isnan(avg_acc_normal)] = 0 + avg_acc_normal[np.isinf(avg_acc_normal)] = 0 + + avg_acc_reduce = np.divide(acc_mat_reduce, cnt_mat_reduce) + avg_acc_reduce[np.isnan(avg_acc_reduce)] = 0 + avg_acc_reduce[np.isinf(avg_acc_reduce)] = 0 + + if (epoch+1) % 1 == 0: + print ('Avg accuracies') + print (avg_acc_normal) + print (avg_acc_reduce, '\n') + + gene_normal = w_parse(avg_acc_normal) + gene_reduce = w_parse(avg_acc_reduce) + print ('Normal genotype: ', gene_normal) + print ('Reduce genotype: ', gene_reduce) + elapsed_time = time.time() - start_time + elapsed_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time)) + print('Elapsed time: ', elapsed_time, '\n') + + #with open(os.path.join(args.save, 'search_results.pkl'), 'wb') as f: + #pickle.dump((acc_mat_normal, cnt_mat_normal, acc_mat_reduce, cnt_mat_reduce), f) ''' + +def train(train_queue, valid_queue, model, criterion, optimizer, lr): + objs = utils.AvgrageMeter() + top1 = utils.AvgrageMeter() + top5 = utils.AvgrageMeter() + + for step, (input, target) in enumerate(train_queue): + model.train() + n = input.size(0) + + input = Variable(input, requires_grad=False).cuda() + target = Variable(target, requires_grad=False).cuda(async=True) + + # get a random minibatch from the search queue with replacement + #input_search, target_search = next(iter(valid_queue)) + #input_search = Variable(input_search, requires_grad=False).cuda() + #target_search = Variable(target_search, requires_grad=False).cuda(async=True) + + #architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) + + optimizer.zero_grad() + #model.generate_droppath_alphas() + model.sample_new_architecture() + #print(model.sampled_weight_normal) + #print(model.sampled_weight_reduce) + logits = model(input) + loss = criterion(logits, target) + + loss.backward() + nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) + optimizer.step() + + prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) + objs.update(loss.data[0], n) + top1.update(prec1.data[0], n) + top5.update(prec5.data[0], n) + + if step % args.report_freq == 0: + logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) + #if step >= 50: + #break + + return top1.avg, objs.avg + +def train_arch(train_queue, valid_queue, model, architect, criterion, optimizer, lr): + top1 = utils.AvgrageMeter() + for step, (input, target) in enumerate(valid_queue): + model.eval() + n = input.size(0) + + input = Variable(input, requires_grad=False).cuda() + target = Variable(target, requires_grad=False).cuda(async=True) + + # get a random minibatch from the search queue with replacement + model.sample_new_architecture() + if step == 0: + for e in model.edge_alphas_normal: + print(F.softmax(e, dim=-1)) + for e in model.edge_alphas_reduce: + print(F.softmax(e, dim=-1)) + print(F.softmax(model.alphas_normal, dim=-1)) + print(F.softmax(model.alphas_reduce, dim=-1)) + print(architect.baseline) + logits = model(input) + architect.step(input, target, lr, optimizer, unrolled=args.unrolled) + prec1, _ = utils.accuracy(logits, target, topk=(1, 5)) + n = input.size(0) + top1.update(prec1.data[0], n) + + return top1.avg + + +def infer(valid_queue, model, criterion): + objs = utils.AvgrageMeter() + top1 = utils.AvgrageMeter() + top5 = utils.AvgrageMeter() + model.eval() + + #with torch.no_grad(): + for step, (input, target) in enumerate(valid_queue): + input = Variable(input).cuda() + target = Variable(target).cuda(async=True) + + #model.generate_random_alphas() + #model.generate_droppath_alphas() + model.sample_new_architecture() + logits = model(input) + loss = criterion(logits, target) + + prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) + n = input.size(0) + objs.update(loss.data[0], n) + top1.update(prec1.data[0], n) + top5.update(prec5.data[0], n) + + if step % args.report_freq == 0: + logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) + #if step >= 50: + #break + + return top1.avg, objs.avg + +def infer_for_search(valid_queue, model): + top1 = utils.AvgrageMeter() + model.eval() + + #with torch.no_grad(): + for step, (input, target) in enumerate(valid_queue): + input = Variable(input).cuda() + target = Variable(target).cuda(async=True) + + #model.generate_random_alphas() + logits = model(input) + + prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) + n = input.size(0) + top1.update(prec1.data[0], n) + + #if step >= 50: + #break + + return top1.avg + +def best_arch_search(valid_queue, model): + model.eval() + steps = 4 + k = sum(1 for i in range(steps) for n in range(2 + i)) + num_ops = len(PRIMITIVES) + + avg_count_normal = np.zeros((k, num_ops)) + avg_count_reduce = np.zeros((k, num_ops)) + avg_weight_normal = np.zeros((k, num_ops)) + avg_weight_reduce = np.zeros((k, num_ops)) + result_df = pd.DataFrame(columns = ['Normal', 'Reduce', 'Val_acc']) + for i in range(search_arch_num): + input_search, target_search = next(iter(valid_queue)) + if input_search.size(0) != args.batch_size: + input_search, target_search = next(iter(valid_queue)) + input_search = Variable(input_search, requires_grad=False).cuda() + target_search = Variable(target_search, requires_grad=False).cuda(async=True) + + model.sample_new_architecture() + logits = model(input_search) + + prec1, _ = utils.accuracy(logits, target_search, topk=(1, 5)) + + + + tmp_row_normal = np.where(model.sampled_weight_normal.data.cpu().numpy() == 1)[0] + tmp_col_normal = np.where(model.sampled_weight_normal.data.cpu().numpy() == 1)[1] + tmp_row_reduce = np.where(model.sampled_weight_reduce.data.cpu().numpy() == 1)[0] + tmp_col_reduce = np.where(model.sampled_weight_reduce.data.cpu().numpy() == 1)[1] + for j in range(len(tmp_row_normal)): + tmp_acc = prec1.data[0] + avg_count_normal[tmp_row_normal[j], tmp_col_normal[j]] += 1 + avg_count_reduce[tmp_row_reduce[j], tmp_col_reduce[j]] += 1 + + avg_weight_normal[tmp_row_normal[j],tmp_col_normal[j]] += tmp_acc + avg_weight_reduce[tmp_row_reduce[j],tmp_col_reduce[j]] += tmp_acc + + #gene_normal = w_parse(model.sampled_weight_normal.data.cpu().numpy()) + #gene_reduce = w_parse(model.sampled_weight_reduce.data.cpu().numpy()) + #temp_df = pd.DataFrame([[gene_normal, gene_reduce, prec1.data[0]]], columns = ['Normal', 'Reduce', 'Val_acc']) + #result_df = result_df.append(temp_df, ignore_index=True) + avg_weight_normal = avg_weight_normal / avg_count_normal + avg_weight_reduce = avg_weight_reduce / avg_count_reduce + + gene_normal = w_parse(avg_weight_normal) + gene_reduce = w_parse(avg_weight_reduce) + temp_df = pd.DataFrame([[gene_normal, gene_reduce, prec1.data[0]]], columns=['Normal', 'Reduce', 'Val_acc']) + result_df = result_df.append(temp_df, ignore_index=True) + + result_df = result_df.sort_values(by='Val_acc', ascending=False) + result_df.to_csv('search_result.csv') + + +if __name__ == '__main__': + main(2) \ No newline at end of file