From 25b835bbc2407b358923789c06fb4d43586b39de Mon Sep 17 00:00:00 2001 From: Vivswan Shah <58091053+Vivswan@users.noreply.github.com> Date: Mon, 8 May 2023 00:24:13 -0400 Subject: [PATCH] v1.0.3 * Added support for no loss function in `Model` class * If no loss function is provided, the `Model` object will use outputs for gradient computation * Added support for multiple loss outputs from loss function Signed-off-by: Vivswan Shah <58091053+Vivswan@users.noreply.github.com> --- CHANGELOG.md | 10 +- analogvnn/graph/ArgsKwargs.py | 4 +- analogvnn/graph/BackwardGraph.py | 19 +-- analogvnn/graph/ModelGraphState.py | 3 - pyproject.toml | 2 +- sample_code_non_analog.py | 180 +++++++++++++++++++++++++++++ 6 files changed, 203 insertions(+), 15 deletions(-) create mode 100644 sample_code_non_analog.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1eefad4..c413e00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,5 +11,11 @@ ## 1.0.2 * Bugfix: removed `graph` from `Layer` class - * `graph` was causing issues with nested `Model` objects - * Now `_use_autograd_graph` is directly set while compiling the `Model` object \ No newline at end of file + * `graph` was causing issues with nested `Model` objects + * Now `_use_autograd_graph` is directly set while compiling the `Model` object + +## 1.0.3 + +* Added support for no loss function in `Model` class + * If no loss function is provided, the `Model` object will use outputs for gradient computation +* Added support for multiple loss outputs from loss function \ No newline at end of file diff --git a/analogvnn/graph/ArgsKwargs.py b/analogvnn/graph/ArgsKwargs.py index 7164006..8616e99 100644 --- a/analogvnn/graph/ArgsKwargs.py +++ b/analogvnn/graph/ArgsKwargs.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import List, Dict, Any, Union, Optional +from typing import List, Dict, Any, Union, Optional, Tuple __all__ = ['InputOutput', 'ArgsKwargs', 'ArgsKwargsInput', 'ArgsKwargsOutput'] @@ -109,5 +109,5 @@ def from_args_kwargs_object(outputs: ArgsKwargs) -> ArgsKwargsOutput: ArgsKwargsInput = Union[ArgsKwargs, Dict, List, Any, None] """ArgsKwargsInput is the input type for ArgsKwargs""" -ArgsKwargsOutput = Union[ArgsKwargs, List, Any, None] +ArgsKwargsOutput = Union[ArgsKwargs, List, Tuple, Any, None] """ArgsKwargsOutput is the output type for ArgsKwargs""" diff --git a/analogvnn/graph/BackwardGraph.py b/analogvnn/graph/BackwardGraph.py index fe0813b..9d16686 100644 --- a/analogvnn/graph/BackwardGraph.py +++ b/analogvnn/graph/BackwardGraph.py @@ -33,24 +33,29 @@ def __call__(self, gradient: TENSORS = None) -> ArgsKwargsOutput: self.graph_state.ready_for_backward(exception=True) + loss = self.graph_state.loss + self.graph_state.set_loss(None) + + if loss is None: + loss = self.graph_state.outputs.args + + if not isinstance(loss, (tuple, list)): + loss = [loss] + if len(gradient) == 0: - gradient = None - elif len(gradient) == 1: - gradient = gradient[0] + gradient = (None,) * len(loss) if self.graph_state.use_autograd_graph: - result = self.graph_state.loss.backward(gradient=gradient) + result = tuple(v.backward(gradient=gradient[i]) for i, v in enumerate(loss)) else: grad_outputs = torch.autograd.grad( - outputs=self.graph_state.loss, + outputs=loss, inputs=self.graph_state.outputs.args, grad_outputs=gradient, retain_graph=True ) result = self.calculate(*grad_outputs) - self.graph_state.set_loss(None) - return result def compile(self, is_static=True): diff --git a/analogvnn/graph/ModelGraphState.py b/analogvnn/graph/ModelGraphState.py index c33b44a..e8e08ea 100644 --- a/analogvnn/graph/ModelGraphState.py +++ b/analogvnn/graph/ModelGraphState.py @@ -86,9 +86,6 @@ def ready_for_backward(self, exception: bool = False) -> bool: if self.outputs is None: raise RuntimeError('output is not set.') - if self._loss is None: - raise RuntimeError('loss is not set.') - return not (self.outputs is None or self._loss is None) @property diff --git a/pyproject.toml b/pyproject.toml index 82c47d3..47e3b84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ py-modules = ['analogvnn'] [project] # $ pip install analogvnn name = "analogvnn" -version = "1.0.2" +version = "1.0.3" description = "A fully modular framework for modeling and optimizing analog/photonic neural networks" # Optional readme = "README.md" requires-python = ">=3.7" diff --git a/sample_code_non_analog.py b/sample_code_non_analog.py new file mode 100644 index 0000000..dfabc6f --- /dev/null +++ b/sample_code_non_analog.py @@ -0,0 +1,180 @@ +import torch.backends.cudnn +import torchvision +from torch import optim, nn +from torch.utils.data import DataLoader +from torchvision.transforms import transforms + +from analogvnn.nn.Linear import Linear +from analogvnn.nn.activation.Gaussian import GeLU +from analogvnn.nn.module.FullSequential import FullSequential +from analogvnn.nn.noise.GaussianNoise import GaussianNoise +from analogvnn.nn.normalize.Clamp import Clamp +from analogvnn.nn.precision.ReducePrecision import ReducePrecision +from analogvnn.utils.is_cpu_cuda import is_cpu_cuda + + +def load_vision_dataset(dataset, path, batch_size, is_cuda=False, grayscale=True): + """ + + Loads a vision dataset with optional grayscale conversion and CUDA support. + + Args: + dataset (Type[torchvision.datasets.VisionDataset]): the dataset class to use (e.g. torchvision.datasets.MNIST) + path (str): the path to the dataset files + batch_size (int): the batch size to use for the data loader + is_cuda (bool): a flag indicating whether to use CUDA support (defaults to False) + grayscale (bool): a flag indicating whether to convert the images to grayscale (defaults to True) + + Returns: + A tuple containing the train and test data loaders, the input shape, and a tuple of class labels. + """ + + dataset_kwargs = { + 'batch_size': batch_size, + 'shuffle': True + } + + if is_cuda: + cuda_kwargs = { + 'num_workers': 1, + 'pin_memory': True, + } + dataset_kwargs.update(cuda_kwargs) + + if grayscale: + use_transform = transforms.Compose([ + transforms.Grayscale(), + transforms.ToTensor(), + ]) + else: + use_transform = transforms.Compose([transforms.ToTensor()]) + + train_set = dataset(root=path, train=True, download=True, transform=use_transform) + test_set = dataset(root=path, train=False, download=True, transform=use_transform) + train_loader = DataLoader(train_set, **dataset_kwargs) + test_loader = DataLoader(test_set, **dataset_kwargs) + + zeroth_element = next(iter(test_loader))[0] + input_shape = list(zeroth_element.shape) + + return train_loader, test_loader, input_shape, tuple(train_set.classes) + + +def cross_entropy_accuracy(output, target) -> float: + """Cross Entropy accuracy function. + + Args: + output (torch.Tensor): output of the model from passing inputs + target (torch.Tensor): correct labels for the inputs + + Returns: + float: accuracy from 0 to 1 + """ + + _, preds = torch.max(output.data, 1) + correct = (preds == target).sum().item() + return correct / len(output) + + +class LinearModel(FullSequential): + def __init__(self, activation_class, norm_class, precision_class, precision, noise_class, leakage): + """Initialise LinearModel with 3 Dense layers. + + Args: + activation_class: Activation Class + norm_class: Normalization Class + precision_class: Precision Class (ReducePrecision or StochasticReducePrecision) + precision (int): precision of the weights and biases + noise_class: Noise Class + leakage (float): leakage is the probability that a reduced precision digital value (e.g., “1011”) will + acquire a different digital value (e.g., “1010” or “1100”) after passing through the noise layer + (i.e., the probability that the digital values transmitted and detected are different after passing through + the analog channel). + """ + + super().__init__() + + self.activation_class = activation_class + self.norm_class = norm_class + self.precision_class = precision_class + self.precision = precision + self.noise_class = noise_class + self.leakage = leakage + + self.all_layers = [] + self.all_layers.append(nn.Flatten(start_dim=1)) + self.add_layer(Linear(in_features=28 * 28, out_features=256)) + self.add_layer(Linear(in_features=256, out_features=128)) + self.add_layer(Linear(in_features=128, out_features=10)) + + self.add_sequence(*self.all_layers) + + def add_layer(self, layer): + """To add the analog layer. + + Args: + layer (BaseLayer): digital layer module + """ + + self.all_layers.append(layer) + self.all_layers.append(self.activation_class()) + self.activation_class.initialise_(layer.weight) + + +def run_linear3_model(): + """The main function to train photonics image classifier with 3 linear/dense nn for MNIST dataset.""" + + is_cpu_cuda.use_cuda_if_available() + torch.backends.cudnn.benchmark = True + torch.manual_seed(0) + device, is_cuda = is_cpu_cuda.is_using_cuda + print(f'Device: {device}') + print() + + # Loading Data + print('Loading Data...') + train_loader, test_loader, input_shape, classes = load_vision_dataset( + dataset=torchvision.datasets.MNIST, + path='_data/', + batch_size=128, + is_cuda=is_cuda + ) + + # Creating Models + print('Creating Models...') + nn_model = LinearModel( + activation_class=GeLU, + norm_class=Clamp, + precision_class=ReducePrecision, + precision=2 ** 4, + noise_class=GaussianNoise, + leakage=0.5 + ) + nn_model.use_autograd_graph = True + + # Setting Model Parameters + nn_model.loss_function = nn.CrossEntropyLoss() + nn_model.accuracy_function = cross_entropy_accuracy + nn_model.optimizer = optim.Adam(params=nn_model.parameters()) + + # Compile Model + nn_model.compile(device=device) + + # Training + print('Starting Training...') + for epoch in range(10): + train_loss, train_accuracy = nn_model.train_on(train_loader, epoch=epoch) + test_loss, test_accuracy = nn_model.test_on(test_loader, epoch=epoch) + + str_epoch = str(epoch + 1).zfill(1) + print_str = f'({str_epoch})' \ + f' Training Loss: {train_loss:.4f},' \ + f' Training Accuracy: {100. * train_accuracy:.0f}%,' \ + f' Testing Loss: {test_loss:.4f},' \ + f' Testing Accuracy: {100. * test_accuracy:.0f}%\n' + print(print_str) + print('Run Completed Successfully...') + + +if __name__ == '__main__': + run_linear3_model()