guillaume-chevalier · alexbrillant · Nov 1, 2019 · Nov 1, 2019 · Nov 2, 2019 · Nov 3, 2019
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,12 @@
 .ipynb_checkpoints
 ___*
+neuraxle/**
+steps/__pycache__/**
+savers/__pycache__/**
+__pycache__/**
+.idea/**
+steps/one_hot_encoder.py
+steps/transform_expected_output_only_wrapper.py
+venv/**
+cache/**
+neuraxle_tensorflow/**
diff --git a/1_train_and_save_LSTM.ipynb b/1_train_and_save_LSTM.ipynb
diff --git a/2_call_rest_api_and_eval.ipynb b/2_call_rest_api_and_eval.ipynb
diff --git a/LSTM.ipynb b/LSTM.ipynb
diff --git a/LSTM_files/LSTM_16_0.png b/LSTM_files/LSTM_16_0.png
diff --git a/LSTM_files/LSTM_18_1.png b/LSTM_files/LSTM_18_1.png
diff --git a/README.md b/README.md
diff --git a/data_reading.py b/data_reading.py
@@ -0,0 +1,94 @@
+import os
+
+import numpy as np
+
+INPUT_SIGNAL_TYPES = [
+    "body_acc_x_",
+    "body_acc_y_",
+    "body_acc_z_",
+    "body_gyro_x_",
+    "body_gyro_y_",
+    "body_gyro_z_",
+    "total_acc_x_",
+    "total_acc_y_",
+    "total_acc_z_"
+]
+
+LABELS = [
+    "WALKING",
+    "WALKING_UPSTAIRS",
+    "WALKING_DOWNSTAIRS",
+    "SITTING",
+    "STANDING",
+    "LAYING"
+]
+
+DATA_PATH = "data/"
+DATASET_PATH = DATA_PATH + "UCI HAR Dataset/"
+
+TRAIN = "train/"
+TEST = "test/"
+
+X_train_signals_paths = [
+    DATASET_PATH + TRAIN + "Inertial Signals/" + signal + "train.txt" for signal in INPUT_SIGNAL_TYPES
+]
+
+X_test_signals_paths = [
+    DATASET_PATH + TEST + "Inertial Signals/" + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES
+]
+
+TEST_FILE_NAME = "y_test.txt"
+TRAIN_FILE_NAME = "y_train.txt"
+
+
+def load_X(X_signals_paths):
+    X_signals = []
+
+    for signal_type_path in X_signals_paths:
+        file = open(signal_type_path, 'r')
+        # Read dataset from disk, dealing with text files' syntax
+        X_signals.append(
+            [np.array(serie, dtype=np.float32) for serie in [
+                row.replace('  ', ' ').strip().split(' ') for row in file
+            ]]
+        )
+        file.close()
+
+    return np.transpose(np.array(X_signals), (1, 2, 0))
+
+
+def load_y(y_path):
+    file = open(y_path, 'r')
+    # Read dataset from disk, dealing with text file's syntax
+    y_ = np.array(
+        [elem for elem in [
+            row.replace('  ', ' ').strip().split(' ') for row in file
+        ]],
+        dtype=np.int32
+    )
+    file.close()
+
+    # Substract 1 to each output class for friendly 0-based indexing
+    return y_ - 1
+
+
+def load_data():
+    # Load "X" (the neural network's training and testing inputs)
+
+    X_train = load_X(X_train_signals_paths)
+    # X_test = load_X(X_test_signals_paths)
+
+    # Load "y" (the neural network's training and testing outputs)
+
+    y_train_path = os.path.join(DATASET_PATH, TRAIN, TRAIN_FILE_NAME)
+    # y_test_path = os.path.join(DATASET_PATH, TEST, TEST_FILE_NAME)
+
+    y_train = load_y(y_train_path)
+    # y_test = load_y(y_test_path)
+
+    print("Some useful info to get an insight on dataset's shape and normalisation:")
+    print("(data_inputs shape, expected_outputs shape, every data input mean, every data input standard deviation)")
+    print(X_train.shape, y_train.shape, np.mean(X_train), np.std(X_train))
+    print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")
+
+    return X_train, y_train
diff --git a/model.py b/model.py
@@ -0,0 +1,149 @@
+import numpy as np
+from neuraxle.base import BaseStep, ExecutionContext
+from neuraxle.data_container import DataContainer
+from neuraxle.hyperparams.space import HyperparameterSamples
+import tensorflow as tf
+
+from neuraxle_tensorflow.tensorflow_v1 import TensorflowV1ModelStep
+
+N_HIDDEN = 32
+N_STEPS = 128
+N_INPUTS = 9
+LAMBDA_LOSS_AMOUNT = 0.0015
+LEARNING_RATE = 0.0025
+N_CLASSES = 6
+BATCH_SIZE = 1500
+
+
+def create_graph(step: TensorflowV1ModelStep):
+    # Function returns a tensorflow LSTM (RNN) artificial neural network from given parameters.
+    # Moreover, two LSTM cells are stacked which adds deepness to the neural network.
+    # Note, some code of this notebook is inspired from an slightly different
+    # RNN architecture used on another dataset, some of the credits goes to
+    # "aymericdamien" under the MIT license.
+    # (NOTE: This step could be greatly optimised by shaping the dataset once
+    # input shape: (batch_size, n_steps, n_input)
+
+    # Graph input/output
+    data_inputs = tf.placeholder(tf.float32, [None, step.hyperparams['n_steps'], step.hyperparams['n_inputs']],
+                                 name='data_inputs')
+    expected_outputs = tf.placeholder(tf.float32, [None, step.hyperparams['n_classes']], name='expected_outputs')
+
+    # Graph weights
+    weights = {
+        'hidden': tf.Variable(
+            tf.random_normal([step.hyperparams['n_inputs'], step.hyperparams['n_hidden']])
+        ),  # Hidden layer weights
+        'out': tf.Variable(
+            tf.random_normal([step.hyperparams['n_hidden'], step.hyperparams['n_classes']], mean=1.0)
+        )
+    }
+
+    biases = {
+        'hidden': tf.Variable(
+            tf.random_normal([step.hyperparams['n_hidden']])
+        ),
+        'out': tf.Variable(
+            tf.random_normal([step.hyperparams['n_classes']])
+        )
+    }
+
+    data_inputs = tf.transpose(
+        data_inputs,
+        [1, 0, 2])  # permute n_steps and batch_size
+
+    # Reshape to prepare input to hidden activation
+    data_inputs = tf.reshape(data_inputs, [-1, step.hyperparams['n_inputs']])
+    # new shape: (n_steps*batch_size, n_input)
+
+    # ReLU activation, thanks to Yu Zhao for adding this improvement here:
+    _X = tf.nn.relu(
+        tf.matmul(data_inputs, weights['hidden']) + biases['hidden']
+    )
+
+    # Split data because rnn cell needs a list of inputs for the RNN inner loop
+    _X = tf.split(_X, step.hyperparams['n_steps'], 0)
+    # new shape: n_steps * (batch_size, n_hidden)
+
+    # Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
+    lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(step.hyperparams['n_hidden'], forget_bias=1.0, state_is_tuple=True)
+    lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(step.hyperparams['n_hidden'], forget_bias=1.0, state_is_tuple=True)
+    lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
+
+    # Get LSTM cell output
+    outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
+
+    # Get last time step's output feature for a "many-to-one" style classifier,
+    # as in the image describing RNNs at the top of this page
+    lstm_last_output = outputs[-1]
+
+    # Linear activation
+    return tf.matmul(lstm_last_output, weights['out']) + biases['out']
+
+
+def create_optimizer(step: TensorflowV1ModelStep):
+    return tf.train.AdamOptimizer(learning_rate=step.hyperparams['learning_rate'])
+
+
+def create_loss(step: TensorflowV1ModelStep):
+    # Loss, optimizer and evaluation
+    # L2 loss prevents this overkill neural network to overfit the data
+    l2 = step.hyperparams['lambda_loss_amount'] * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
+
+    # Softmax loss
+    return tf.reduce_mean(
+        tf.nn.softmax_cross_entropy_with_logits(
+            labels=step['expected_outputs'],
+            logits=step['output']
+        )
+    ) + l2
+
+
+class ClassificationRNNTensorFlowModel(TensorflowV1ModelStep):
+    def setup(self) -> BaseStep:
+        TensorflowV1ModelStep.setup(self)
+
+        self.losses = []
+        self.accuracies = []
+
+        return self
+
+    def _will_process(self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer):
+        if not isinstance(data_container.data_inputs, np.ndarray):
+            data_container.data_inputs = np.array(data_container.data_inputs)
+
+        if data_container.expected_outputs is not None:
+            if not isinstance(data_container.expected_outputs, np.ndarray):
+                data_container.expected_outputs = np.array(data_container.expected_outputs)
+
+            if data_container.expected_outputs.shape != (len(data_container.data_inputs), self.hyperparams['n_classes']):
+                data_container.expected_outputs = np.reshape(data_container.expected_outputs, (len(data_container.data_inputs), self.hyperparams['n_classes']))
+
+        return data_container, context
+
+    def _did_fit_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer:
+        accuracy = np.mean(np.argmax(data_container.data_inputs, axis=1) == np.argmax(data_container.expected_outputs, axis=1))
+
+        self.accuracies.append(accuracy)
+        self.losses.append(self.loss)
+
+        print("Batch Loss = " + "{:.6f}".format(self.losses[-1]) + ", Accuracy = {}".format(self.accuracies[-1]))
+
+        return data_container
+
+
+model_step = ClassificationRNNTensorFlowModel(
+    create_graph=create_graph,
+    create_loss=create_loss,
+    create_optimizer=create_optimizer
+).set_hyperparams(
+    HyperparameterSamples({
+        'n_steps': N_STEPS,  # 128 timesteps per series
+        'n_inputs': N_INPUTS,  # 9 input parameters per timestep
+        'n_hidden': N_HIDDEN,  # Hidden layer num of features
+        'n_classes': N_CLASSES,  # Total classes (should go up, or should go down)
+        'learning_rate': LEARNING_RATE,
+        'lambda_loss_amount': LAMBDA_LOSS_AMOUNT,
+        'batch_size': BATCH_SIZE
+    })
+)
diff --git a/pipeline.py b/pipeline.py
@@ -0,0 +1,17 @@
+from neuraxle.pipeline import MiniBatchSequentialPipeline, Joiner
+from neuraxle.steps.numpy import OneHotEncoder
+from neuraxle.steps.output_handlers import OutputTransformerWrapper
+
+from steps.lstm_rnn_tensorflow_model_wrapper import ClassificationRNNTensorFlowModel, N_CLASSES, BATCH_SIZE
+
+
+# TODO: wrap by a validation split wrapper as issue #174
+# ValidationSplitWrapper(HumanActivityRecognitionPipeline)
+
+class HumanActivityRecognitionPipeline(MiniBatchSequentialPipeline):
+    def __init__(self):
+        MiniBatchSequentialPipeline.__init__(self, [
+            OutputTransformerWrapper(OneHotEncoder(nb_columns=N_CLASSES, name='one_hot_encoded_label')),
+            ClassificationRNNTensorFlowModel(),
+            Joiner(batch_size=BATCH_SIZE)
+        ])
diff --git a/plotting.py b/plotting.py
@@ -0,0 +1,17 @@
+from matplotlib import pyplot as plt
+
+
+def plot_metric(metric_train, metric_validation=None, xlabel='x', ylabel='y', title='Metric'):
+    plt.plot(range(len(metric_train)), metric_train)
+
+    legend = ['training']
+    if metric_validation is not None:
+        plt.plot(range(len(metric_validation)), metric_validation)
+        legend.append('validation')
+
+    plt.xlabel(xlabel)
+    plt.xlabel(ylabel)
+    plt.title(title)
+
+    plt.legend(legend, loc='upper left')
+    plt.show()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,4 @@
+tensorflow==1.15
+tensorflow-gpu==1.15
+conv==0.2
+-e git://github.com/alexbrillant/Neuraxle.git@bb9bba79dbbf93c9b60c6c5036e9d6bf6021e5c5#egg=neuraxle
diff --git a/steps/__init__.py b/steps/__init__.py
diff --git a/steps/custom_json_decoder_for_2darray.py b/steps/custom_json_decoder_for_2darray.py
@@ -0,0 +1,16 @@
+import numpy as np
+
+from neuraxle.api.flask import JSONDataBodyDecoder
+
+
+class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder):
+    """This is a custom JSON decoder class that precedes the pipeline's transformation."""
+
+    def decode(self, data_inputs):
+        """
+        Transform a JSON list object into an np.array object.
+
+        :param data_inputs: json object
+        :return: np array for data inputs
+        """
+        return np.array(data_inputs)
diff --git a/steps/custom_json_encoder_of_outputs.py b/steps/custom_json_encoder_of_outputs.py
@@ -0,0 +1,16 @@
+from neuraxle.api.flask import JSONDataResponseEncoder
+
+
+class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder):
+    """This is a custom JSON response encoder class for converting the pipeline's transformation outputs."""
+
+    def encode(self, data_inputs) -> dict:
+        """
+        Convert predictions to a dict for creating a JSON Response object.
+
+        :param data_inputs:
+        :return:
+        """
+        return {
+            'predictions': list(data_inputs)
+        }
diff --git a/steps/forma_data.py b/steps/forma_data.py
@@ -0,0 +1,26 @@
+import numpy as np
+from neuraxle.base import BaseStep, NonFittableMixin
+from neuraxle.steps.output_handlers import InputAndOutputTransformerMixin
+
+
+class FormatData(NonFittableMixin, InputAndOutputTransformerMixin, BaseStep):
+    def __init__(self, n_classes):
+        NonFittableMixin.__init__(self)
+        InputAndOutputTransformerMixin.__init__(self)
+        BaseStep.__init__(self)
+        self.n_classes = n_classes
+
+    def transform(self, data_inputs):
+        data_inputs, expected_outputs = data_inputs
+
+        if not isinstance(data_inputs, np.ndarray):
+            data_inputs = np.array(data_inputs)
+
+        if expected_outputs is not None:
+            if not isinstance(expected_outputs, np.ndarray):
+                expected_outputs = np.array(expected_outputs)
+
+            if expected_outputs.shape != (len(data_inputs), self.n_classes):
+                expected_outputs = np.reshape(expected_outputs, (len(data_inputs), self.n_classes))
+
+        return data_inputs, expected_outputs