From 25b835bbc2407b358923789c06fb4d43586b39de Mon Sep 17 00:00:00 2001
From: Vivswan Shah <58091053+Vivswan@users.noreply.github.com>
Date: Mon, 8 May 2023 00:24:13 -0400
Subject: [PATCH] v1.0.3

* Added support for no loss function in `Model` class
    * If no loss function is provided, the `Model` object will use outputs for gradient computation
* Added support for multiple loss outputs from loss function

Signed-off-by: Vivswan Shah <58091053+Vivswan@users.noreply.github.com>
---
 CHANGELOG.md                       |  10 +-
 analogvnn/graph/ArgsKwargs.py      |   4 +-
 analogvnn/graph/BackwardGraph.py   |  19 +--
 analogvnn/graph/ModelGraphState.py |   3 -
 pyproject.toml                     |   2 +-
 sample_code_non_analog.py          | 180 +++++++++++++++++++++++++++++
 6 files changed, 203 insertions(+), 15 deletions(-)
 create mode 100644 sample_code_non_analog.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1eefad4..c413e00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,5 +11,11 @@
 ## 1.0.2
 
 * Bugfix: removed  `graph` from `Layer` class
-    * `graph` was causing issues with nested `Model` objects
-    * Now `_use_autograd_graph` is directly set while compiling the `Model` object
\ No newline at end of file
+  * `graph` was causing issues with nested `Model` objects
+  * Now `_use_autograd_graph` is directly set while compiling the `Model` object
+
+## 1.0.3
+
+* Added support for no loss function in `Model` class
+  * If no loss function is provided, the `Model` object will use outputs for gradient computation
+* Added support for multiple loss outputs from loss function
\ No newline at end of file
diff --git a/analogvnn/graph/ArgsKwargs.py b/analogvnn/graph/ArgsKwargs.py
index 7164006..8616e99 100644
--- a/analogvnn/graph/ArgsKwargs.py
+++ b/analogvnn/graph/ArgsKwargs.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import List, Dict, Any, Union, Optional
+from typing import List, Dict, Any, Union, Optional, Tuple
 
 __all__ = ['InputOutput', 'ArgsKwargs', 'ArgsKwargsInput', 'ArgsKwargsOutput']
 
@@ -109,5 +109,5 @@ def from_args_kwargs_object(outputs: ArgsKwargs) -> ArgsKwargsOutput:
 ArgsKwargsInput = Union[ArgsKwargs, Dict, List, Any, None]
 """ArgsKwargsInput is the input type for ArgsKwargs"""
 
-ArgsKwargsOutput = Union[ArgsKwargs, List, Any, None]
+ArgsKwargsOutput = Union[ArgsKwargs, List, Tuple, Any, None]
 """ArgsKwargsOutput is the output type for ArgsKwargs"""
diff --git a/analogvnn/graph/BackwardGraph.py b/analogvnn/graph/BackwardGraph.py
index fe0813b..9d16686 100644
--- a/analogvnn/graph/BackwardGraph.py
+++ b/analogvnn/graph/BackwardGraph.py
@@ -33,24 +33,29 @@ def __call__(self, gradient: TENSORS = None) -> ArgsKwargsOutput:
 
         self.graph_state.ready_for_backward(exception=True)
 
+        loss = self.graph_state.loss
+        self.graph_state.set_loss(None)
+
+        if loss is None:
+            loss = self.graph_state.outputs.args
+
+        if not isinstance(loss, (tuple, list)):
+            loss = [loss]
+
         if len(gradient) == 0:
-            gradient = None
-        elif len(gradient) == 1:
-            gradient = gradient[0]
+            gradient = (None,) * len(loss)
 
         if self.graph_state.use_autograd_graph:
-            result = self.graph_state.loss.backward(gradient=gradient)
+            result = tuple(v.backward(gradient=gradient[i]) for i, v in enumerate(loss))
         else:
             grad_outputs = torch.autograd.grad(
-                outputs=self.graph_state.loss,
+                outputs=loss,
                 inputs=self.graph_state.outputs.args,
                 grad_outputs=gradient,
                 retain_graph=True
             )
             result = self.calculate(*grad_outputs)
 
-        self.graph_state.set_loss(None)
-
         return result
 
     def compile(self, is_static=True):
diff --git a/analogvnn/graph/ModelGraphState.py b/analogvnn/graph/ModelGraphState.py
index c33b44a..e8e08ea 100644
--- a/analogvnn/graph/ModelGraphState.py
+++ b/analogvnn/graph/ModelGraphState.py
@@ -86,9 +86,6 @@ def ready_for_backward(self, exception: bool = False) -> bool:
             if self.outputs is None:
                 raise RuntimeError('output is not set.')
 
-            if self._loss is None:
-                raise RuntimeError('loss is not set.')
-
         return not (self.outputs is None or self._loss is None)
 
     @property
diff --git a/pyproject.toml b/pyproject.toml
index 82c47d3..47e3b84 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ py-modules = ['analogvnn']
 [project]
 # $ pip install analogvnn
 name = "analogvnn"
-version = "1.0.2"
+version = "1.0.3"
 description = "A fully modular framework for modeling and optimizing analog/photonic neural networks"  # Optional
 readme = "README.md"
 requires-python = ">=3.7"
diff --git a/sample_code_non_analog.py b/sample_code_non_analog.py
new file mode 100644
index 0000000..dfabc6f
--- /dev/null
+++ b/sample_code_non_analog.py
@@ -0,0 +1,180 @@
+import torch.backends.cudnn
+import torchvision
+from torch import optim, nn
+from torch.utils.data import DataLoader
+from torchvision.transforms import transforms
+
+from analogvnn.nn.Linear import Linear
+from analogvnn.nn.activation.Gaussian import GeLU
+from analogvnn.nn.module.FullSequential import FullSequential
+from analogvnn.nn.noise.GaussianNoise import GaussianNoise
+from analogvnn.nn.normalize.Clamp import Clamp
+from analogvnn.nn.precision.ReducePrecision import ReducePrecision
+from analogvnn.utils.is_cpu_cuda import is_cpu_cuda
+
+
+def load_vision_dataset(dataset, path, batch_size, is_cuda=False, grayscale=True):
+    """
+
+    Loads a vision dataset with optional grayscale conversion and CUDA support.
+
+    Args:
+        dataset (Type[torchvision.datasets.VisionDataset]): the dataset class to use (e.g. torchvision.datasets.MNIST)
+        path (str): the path to the dataset files
+        batch_size (int): the batch size to use for the data loader
+        is_cuda (bool): a flag indicating whether to use CUDA support (defaults to False)
+        grayscale (bool): a flag indicating whether to convert the images to grayscale (defaults to True)
+
+    Returns:
+        A tuple containing the train and test data loaders, the input shape, and a tuple of class labels.
+    """
+
+    dataset_kwargs = {
+        'batch_size': batch_size,
+        'shuffle': True
+    }
+
+    if is_cuda:
+        cuda_kwargs = {
+            'num_workers': 1,
+            'pin_memory': True,
+        }
+        dataset_kwargs.update(cuda_kwargs)
+
+    if grayscale:
+        use_transform = transforms.Compose([
+            transforms.Grayscale(),
+            transforms.ToTensor(),
+        ])
+    else:
+        use_transform = transforms.Compose([transforms.ToTensor()])
+
+    train_set = dataset(root=path, train=True, download=True, transform=use_transform)
+    test_set = dataset(root=path, train=False, download=True, transform=use_transform)
+    train_loader = DataLoader(train_set, **dataset_kwargs)
+    test_loader = DataLoader(test_set, **dataset_kwargs)
+
+    zeroth_element = next(iter(test_loader))[0]
+    input_shape = list(zeroth_element.shape)
+
+    return train_loader, test_loader, input_shape, tuple(train_set.classes)
+
+
+def cross_entropy_accuracy(output, target) -> float:
+    """Cross Entropy accuracy function.
+
+    Args:
+        output (torch.Tensor): output of the model from passing inputs
+        target (torch.Tensor): correct labels for the inputs
+
+    Returns:
+        float: accuracy from 0 to 1
+    """
+
+    _, preds = torch.max(output.data, 1)
+    correct = (preds == target).sum().item()
+    return correct / len(output)
+
+
+class LinearModel(FullSequential):
+    def __init__(self, activation_class, norm_class, precision_class, precision, noise_class, leakage):
+        """Initialise LinearModel with 3 Dense layers.
+
+        Args:
+            activation_class: Activation Class
+            norm_class: Normalization Class
+            precision_class: Precision Class (ReducePrecision or StochasticReducePrecision)
+            precision (int): precision of the weights and biases
+            noise_class: Noise Class
+            leakage (float): leakage is the probability that a reduced precision digital value (e.g., “1011”) will
+            acquire a different digital value (e.g., “1010” or “1100”) after passing through the noise layer
+            (i.e., the probability that the digital values transmitted and detected are different after passing through
+            the analog channel).
+        """
+
+        super().__init__()
+
+        self.activation_class = activation_class
+        self.norm_class = norm_class
+        self.precision_class = precision_class
+        self.precision = precision
+        self.noise_class = noise_class
+        self.leakage = leakage
+
+        self.all_layers = []
+        self.all_layers.append(nn.Flatten(start_dim=1))
+        self.add_layer(Linear(in_features=28 * 28, out_features=256))
+        self.add_layer(Linear(in_features=256, out_features=128))
+        self.add_layer(Linear(in_features=128, out_features=10))
+
+        self.add_sequence(*self.all_layers)
+
+    def add_layer(self, layer):
+        """To add the analog layer.
+
+        Args:
+            layer (BaseLayer): digital layer module
+        """
+
+        self.all_layers.append(layer)
+        self.all_layers.append(self.activation_class())
+        self.activation_class.initialise_(layer.weight)
+
+
+def run_linear3_model():
+    """The main function to train photonics image classifier with 3 linear/dense nn for MNIST dataset."""
+
+    is_cpu_cuda.use_cuda_if_available()
+    torch.backends.cudnn.benchmark = True
+    torch.manual_seed(0)
+    device, is_cuda = is_cpu_cuda.is_using_cuda
+    print(f'Device: {device}')
+    print()
+
+    # Loading Data
+    print('Loading Data...')
+    train_loader, test_loader, input_shape, classes = load_vision_dataset(
+        dataset=torchvision.datasets.MNIST,
+        path='_data/',
+        batch_size=128,
+        is_cuda=is_cuda
+    )
+
+    # Creating Models
+    print('Creating Models...')
+    nn_model = LinearModel(
+        activation_class=GeLU,
+        norm_class=Clamp,
+        precision_class=ReducePrecision,
+        precision=2 ** 4,
+        noise_class=GaussianNoise,
+        leakage=0.5
+    )
+    nn_model.use_autograd_graph = True
+
+    # Setting Model Parameters
+    nn_model.loss_function = nn.CrossEntropyLoss()
+    nn_model.accuracy_function = cross_entropy_accuracy
+    nn_model.optimizer = optim.Adam(params=nn_model.parameters())
+
+    # Compile Model
+    nn_model.compile(device=device)
+
+    # Training
+    print('Starting Training...')
+    for epoch in range(10):
+        train_loss, train_accuracy = nn_model.train_on(train_loader, epoch=epoch)
+        test_loss, test_accuracy = nn_model.test_on(test_loader, epoch=epoch)
+
+        str_epoch = str(epoch + 1).zfill(1)
+        print_str = f'({str_epoch})' \
+                    f' Training Loss: {train_loss:.4f},' \
+                    f' Training Accuracy: {100. * train_accuracy:.0f}%,' \
+                    f' Testing Loss: {test_loss:.4f},' \
+                    f' Testing Accuracy: {100. * test_accuracy:.0f}%\n'
+        print(print_str)
+    print('Run Completed Successfully...')
+
+
+if __name__ == '__main__':
+    run_linear3_model()