diff --git a/.ipynb_checkpoints/CIFAR10-checkpoint.ipynb b/.ipynb_checkpoints/CIFAR10-checkpoint.ipynb index 8ab06c9..f9d5547 100644 --- a/.ipynb_checkpoints/CIFAR10-checkpoint.ipynb +++ b/.ipynb_checkpoints/CIFAR10-checkpoint.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -34,7 +34,7 @@ "\n", "from tqdm import tqdm as pbar\n", "from torch.utils.tensorboard import SummaryWriter\n", - "from models import *" + "from cifar10_models import *" ] }, { @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -65,16 +65,16 @@ " transforms.ToTensor(),\n", " transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])\n", " \n", - "# transform_validation = transforms.Compose([transforms.ToTensor(),\n", - "# transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])\n", + " transform_validation = transforms.Compose([transforms.ToTensor(),\n", + " transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])\n", " \n", " transform_validation = transforms.Compose([transforms.ToTensor()])\n", " \n", " trainset = torchvision.datasets.CIFAR10(root=params['path'], train=True, transform=transform_train)\n", " testset = torchvision.datasets.CIFAR10(root=params['path'], train=False, transform=transform_validation)\n", " \n", - " trainloader = torch.utils.data.DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, num_workers=params['num_workers'])\n", - " testloader = torch.utils.data.DataLoader(testset, batch_size=params['batch_size'], shuffle=False, num_workers=params['num_workers'])\n", + " trainloader = torch.utils.data.DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, num_workers=4)\n", + " testloader = torch.utils.data.DataLoader(testset, batch_size=params['batch_size'], shuffle=False, num_workers=4)\n", " return trainloader, testloader" ] }, @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -95,17 +95,14 @@ " \n", " writer = SummaryWriter('runs/' + params['description'])\n", " model = model.to(params['device'])\n", - " optimizer = optim.SGD(model.parameters(), lr=params['max_lr'], weight_decay=params['weight_decay'], momentum=0.9, nesterov=True)\n", + " optimizer = optim.AdamW(model.parameters())\n", " total_updates = params['num_epochs']*len(params['train_loader'])\n", - " scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_updates, eta_min=params['min_lr'])\n", " \n", " criterion = nn.CrossEntropyLoss()\n", " best_accuracy = test_model(model, params)\n", " best_model = copy.deepcopy(model.state_dict())\n", " \n", " for epoch in pbar(range(params['num_epochs'])):\n", - " scheduler.step()\n", - " \n", " # Each epoch has a training and validation phase\n", " for phase in ['train', 'validation']:\n", " \n", @@ -153,7 +150,7 @@ " \n", " # Write best weights to disk\n", " if epoch % params['check_point'] == 0 or epoch == params['num_epochs']-1:\n", - " torch.save(best_model, params['state_dict_path'] + params['description'] + '.pt')\n", + " torch.save(best_model, params['description'] + '.pt')\n", " \n", " final_accuracy = test_model(model, params)\n", " writer.add_text('Final_Accuracy', str(final_accuracy), 0)\n", @@ -169,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -203,23 +200,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "model = resnet18()\n", - "model.load_state_dict(torch.load('/tmp/checkpoint_12000.pth'))" + "model = resnet18()" ] }, { @@ -231,74 +216,45 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using cuda:2\n" - ] - } - ], + "outputs": [], "source": [ "# Train on cuda if available\n", - "device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')\n", + "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", "print(\"Using\", device)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "data_params = {'path': '/raid/data/pytorch_dataset/cifar10',\n", - " 'batch_size': 256, 'num_workers': 4}\n", + "data_params = {'path': '/raid/data/pytorch_dataset/cifar10', 'batch_size': 256}\n", "\n", "train_loader, validation_loader = make_dataloaders(data_params)\n", "\n", - "train_params = {'description': 'ResNet18',\n", + "train_params = {'description': 'Test',\n", " 'num_epochs': 300,\n", - " 'max_lr': 5e-2, 'min_lr': 1e-5, 'weight_decay': 1e-3,\n", " 'check_point': 50, 'device': device,\n", - " 'state_dict_path': 'trained_models/',\n", " 'train_loader': train_loader, 'validation_loader': validation_loader}" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# train_model(model, train_params)" + "train_model(model, train_params)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 40/40 [00:01<00:00, 23.87it/s]\n" - ] - }, - { - "data": { - "text/plain": [ - "0.7538" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "test_model(model, train_params)" ] @@ -327,7 +283,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.7.5" } }, "nbformat": 4, diff --git a/CIFAR10.ipynb b/CIFAR10.ipynb index f516f6d..f9d5547 100644 --- a/CIFAR10.ipynb +++ b/CIFAR10.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -34,7 +34,7 @@ "\n", "from tqdm import tqdm as pbar\n", "from torch.utils.tensorboard import SummaryWriter\n", - "from models import *" + "from cifar10_models import *" ] }, { @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -65,16 +65,16 @@ " transforms.ToTensor(),\n", " transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])\n", " \n", - "# transform_validation = transforms.Compose([transforms.ToTensor(),\n", - "# transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])\n", + " transform_validation = transforms.Compose([transforms.ToTensor(),\n", + " transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])\n", " \n", " transform_validation = transforms.Compose([transforms.ToTensor()])\n", " \n", " trainset = torchvision.datasets.CIFAR10(root=params['path'], train=True, transform=transform_train)\n", " testset = torchvision.datasets.CIFAR10(root=params['path'], train=False, transform=transform_validation)\n", " \n", - " trainloader = torch.utils.data.DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, num_workers=params['num_workers'])\n", - " testloader = torch.utils.data.DataLoader(testset, batch_size=params['batch_size'], shuffle=False, num_workers=params['num_workers'])\n", + " trainloader = torch.utils.data.DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, num_workers=4)\n", + " testloader = torch.utils.data.DataLoader(testset, batch_size=params['batch_size'], shuffle=False, num_workers=4)\n", " return trainloader, testloader" ] }, @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -95,17 +95,14 @@ " \n", " writer = SummaryWriter('runs/' + params['description'])\n", " model = model.to(params['device'])\n", - " optimizer = optim.SGD(model.parameters(), lr=params['max_lr'], weight_decay=params['weight_decay'], momentum=0.9, nesterov=True)\n", + " optimizer = optim.AdamW(model.parameters())\n", " total_updates = params['num_epochs']*len(params['train_loader'])\n", - " scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=total_updates, eta_min=params['min_lr'])\n", " \n", " criterion = nn.CrossEntropyLoss()\n", " best_accuracy = test_model(model, params)\n", " best_model = copy.deepcopy(model.state_dict())\n", " \n", " for epoch in pbar(range(params['num_epochs'])):\n", - " scheduler.step()\n", - " \n", " # Each epoch has a training and validation phase\n", " for phase in ['train', 'validation']:\n", " \n", @@ -153,7 +150,7 @@ " \n", " # Write best weights to disk\n", " if epoch % params['check_point'] == 0 or epoch == params['num_epochs']-1:\n", - " torch.save(best_model, params['state_dict_path'] + params['description'] + '.pt')\n", + " torch.save(best_model, params['description'] + '.pt')\n", " \n", " final_accuracy = test_model(model, params)\n", " writer.add_text('Final_Accuracy', str(final_accuracy), 0)\n", @@ -169,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -203,23 +200,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "model = resnet18()\n", - "model.load_state_dict(torch.load('/tmp/checkpoint_36000.pth'))" + "model = resnet18()" ] }, { @@ -231,74 +216,45 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using cuda:2\n" - ] - } - ], + "outputs": [], "source": [ "# Train on cuda if available\n", - "device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')\n", + "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", "print(\"Using\", device)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "data_params = {'path': '/raid/data/pytorch_dataset/cifar10',\n", - " 'batch_size': 256, 'num_workers': 4}\n", + "data_params = {'path': '/raid/data/pytorch_dataset/cifar10', 'batch_size': 256}\n", "\n", "train_loader, validation_loader = make_dataloaders(data_params)\n", "\n", - "train_params = {'description': 'ResNet18',\n", + "train_params = {'description': 'Test',\n", " 'num_epochs': 300,\n", - " 'max_lr': 5e-2, 'min_lr': 1e-5, 'weight_decay': 1e-3,\n", " 'check_point': 50, 'device': device,\n", - " 'state_dict_path': 'trained_models/',\n", " 'train_loader': train_loader, 'validation_loader': validation_loader}" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# train_model(model, train_params)" + "train_model(model, train_params)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 40/40 [00:01<00:00, 35.53it/s]\n" - ] - }, - { - "data": { - "text/plain": [ - "0.876" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "test_model(model, train_params)" ] @@ -327,7 +283,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.7.5" } }, "nbformat": 4, diff --git a/cifar10_models/__pycache__/__init__.cpython-37.pyc b/cifar10_models/__pycache__/__init__.cpython-37.pyc index 00cdefc..9abed23 100644 Binary files a/cifar10_models/__pycache__/__init__.cpython-37.pyc and b/cifar10_models/__pycache__/__init__.cpython-37.pyc differ diff --git a/cifar10_models/__pycache__/densenet.cpython-37.pyc b/cifar10_models/__pycache__/densenet.cpython-37.pyc index 0a9c509..840f3e9 100644 Binary files a/cifar10_models/__pycache__/densenet.cpython-37.pyc and b/cifar10_models/__pycache__/densenet.cpython-37.pyc differ diff --git a/cifar10_models/__pycache__/googlenet.cpython-37.pyc b/cifar10_models/__pycache__/googlenet.cpython-37.pyc index db5a07d..b4581dc 100644 Binary files a/cifar10_models/__pycache__/googlenet.cpython-37.pyc and b/cifar10_models/__pycache__/googlenet.cpython-37.pyc differ diff --git a/cifar10_models/__pycache__/inception.cpython-37.pyc b/cifar10_models/__pycache__/inception.cpython-37.pyc index 240cb00..124f627 100644 Binary files a/cifar10_models/__pycache__/inception.cpython-37.pyc and b/cifar10_models/__pycache__/inception.cpython-37.pyc differ diff --git a/cifar10_models/__pycache__/mobilenetv2.cpython-37.pyc b/cifar10_models/__pycache__/mobilenetv2.cpython-37.pyc index 768a950..28a08b2 100644 Binary files a/cifar10_models/__pycache__/mobilenetv2.cpython-37.pyc and b/cifar10_models/__pycache__/mobilenetv2.cpython-37.pyc differ diff --git a/cifar10_models/__pycache__/resnet.cpython-37.pyc b/cifar10_models/__pycache__/resnet.cpython-37.pyc index 86fb637..36ec7c9 100644 Binary files a/cifar10_models/__pycache__/resnet.cpython-37.pyc and b/cifar10_models/__pycache__/resnet.cpython-37.pyc differ diff --git a/cifar10_models/__pycache__/resnet_orig.cpython-37.pyc b/cifar10_models/__pycache__/resnet_orig.cpython-37.pyc index de79f51..d87b68c 100644 Binary files a/cifar10_models/__pycache__/resnet_orig.cpython-37.pyc and b/cifar10_models/__pycache__/resnet_orig.cpython-37.pyc differ diff --git a/cifar10_models/__pycache__/vgg.cpython-37.pyc b/cifar10_models/__pycache__/vgg.cpython-37.pyc index ac256eb..9810170 100644 Binary files a/cifar10_models/__pycache__/vgg.cpython-37.pyc and b/cifar10_models/__pycache__/vgg.cpython-37.pyc differ