-
Notifications
You must be signed in to change notification settings - Fork 938
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Neuraxle refactor #32
base: master
Are you sure you want to change the base?
Changes from all commits
f355cee
fc5b7a9
c354c5c
4688416
db28a90
5c80cb0
654c433
6c115ef
44ccad8
4ba8e5a
53e54de
85d8ced
d5e4020
84b360b
5c6ec1d
348f28a
e288133
7b34230
b0a0654
27565e3
e200823
16a8856
67144b9
2fceeb6
1c74c61
e4d2c99
6856be6
d6aa641
cfdd545
09e8e09
866619b
cb08c95
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,12 @@ | ||
.ipynb_checkpoints | ||
___* | ||
neuraxle/** | ||
steps/__pycache__/** | ||
savers/__pycache__/** | ||
__pycache__/** | ||
.idea/** | ||
steps/one_hot_encoder.py | ||
steps/transform_expected_output_only_wrapper.py | ||
venv/** | ||
cache/** | ||
neuraxle_tensorflow/** |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
This file was deleted.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import os | ||
|
||
import numpy as np | ||
|
||
INPUT_SIGNAL_TYPES = [ | ||
"body_acc_x_", | ||
"body_acc_y_", | ||
"body_acc_z_", | ||
"body_gyro_x_", | ||
"body_gyro_y_", | ||
"body_gyro_z_", | ||
"total_acc_x_", | ||
"total_acc_y_", | ||
"total_acc_z_" | ||
] | ||
|
||
LABELS = [ | ||
"WALKING", | ||
"WALKING_UPSTAIRS", | ||
"WALKING_DOWNSTAIRS", | ||
"SITTING", | ||
"STANDING", | ||
"LAYING" | ||
] | ||
|
||
DATA_PATH = "data/" | ||
DATASET_PATH = DATA_PATH + "UCI HAR Dataset/" | ||
|
||
TRAIN = "train/" | ||
TEST = "test/" | ||
|
||
X_train_signals_paths = [ | ||
DATASET_PATH + TRAIN + "Inertial Signals/" + signal + "train.txt" for signal in INPUT_SIGNAL_TYPES | ||
] | ||
|
||
X_test_signals_paths = [ | ||
DATASET_PATH + TEST + "Inertial Signals/" + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES | ||
] | ||
|
||
TEST_FILE_NAME = "y_test.txt" | ||
TRAIN_FILE_NAME = "y_train.txt" | ||
|
||
|
||
def load_X(X_signals_paths): | ||
X_signals = [] | ||
|
||
for signal_type_path in X_signals_paths: | ||
file = open(signal_type_path, 'r') | ||
# Read dataset from disk, dealing with text files' syntax | ||
X_signals.append( | ||
[np.array(serie, dtype=np.float32) for serie in [ | ||
row.replace(' ', ' ').strip().split(' ') for row in file | ||
]] | ||
) | ||
file.close() | ||
|
||
return np.transpose(np.array(X_signals), (1, 2, 0)) | ||
|
||
|
||
def load_y(y_path): | ||
file = open(y_path, 'r') | ||
# Read dataset from disk, dealing with text file's syntax | ||
y_ = np.array( | ||
[elem for elem in [ | ||
row.replace(' ', ' ').strip().split(' ') for row in file | ||
]], | ||
dtype=np.int32 | ||
) | ||
file.close() | ||
|
||
# Substract 1 to each output class for friendly 0-based indexing | ||
return y_ - 1 | ||
|
||
|
||
def load_data(): | ||
# Load "X" (the neural network's training and testing inputs) | ||
|
||
X_train = load_X(X_train_signals_paths) | ||
# X_test = load_X(X_test_signals_paths) | ||
|
||
# Load "y" (the neural network's training and testing outputs) | ||
|
||
y_train_path = os.path.join(DATASET_PATH, TRAIN, TRAIN_FILE_NAME) | ||
# y_test_path = os.path.join(DATASET_PATH, TEST, TEST_FILE_NAME) | ||
|
||
y_train = load_y(y_train_path) | ||
# y_test = load_y(y_test_path) | ||
|
||
print("Some useful info to get an insight on dataset's shape and normalisation:") | ||
print("(data_inputs shape, expected_outputs shape, every data input mean, every data input standard deviation)") | ||
print(X_train.shape, y_train.shape, np.mean(X_train), np.std(X_train)) | ||
print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.") | ||
|
||
return X_train, y_train |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
import numpy as np | ||
from neuraxle.base import BaseStep, ExecutionContext | ||
from neuraxle.data_container import DataContainer | ||
from neuraxle.hyperparams.space import HyperparameterSamples | ||
import tensorflow as tf | ||
|
||
from neuraxle_tensorflow.tensorflow_v1 import TensorflowV1ModelStep | ||
|
||
N_HIDDEN = 32 | ||
N_STEPS = 128 | ||
N_INPUTS = 9 | ||
LAMBDA_LOSS_AMOUNT = 0.0015 | ||
LEARNING_RATE = 0.0025 | ||
N_CLASSES = 6 | ||
BATCH_SIZE = 1500 | ||
|
||
|
||
def create_graph(step: TensorflowV1ModelStep): | ||
# Function returns a tensorflow LSTM (RNN) artificial neural network from given parameters. | ||
# Moreover, two LSTM cells are stacked which adds deepness to the neural network. | ||
# Note, some code of this notebook is inspired from an slightly different | ||
# RNN architecture used on another dataset, some of the credits goes to | ||
# "aymericdamien" under the MIT license. | ||
# (NOTE: This step could be greatly optimised by shaping the dataset once | ||
# input shape: (batch_size, n_steps, n_input) | ||
|
||
# Graph input/output | ||
data_inputs = tf.placeholder(tf.float32, [None, step.hyperparams['n_steps'], step.hyperparams['n_inputs']], | ||
name='data_inputs') | ||
expected_outputs = tf.placeholder(tf.float32, [None, step.hyperparams['n_classes']], name='expected_outputs') | ||
|
||
# Graph weights | ||
weights = { | ||
'hidden': tf.Variable( | ||
tf.random_normal([step.hyperparams['n_inputs'], step.hyperparams['n_hidden']]) | ||
), # Hidden layer weights | ||
'out': tf.Variable( | ||
tf.random_normal([step.hyperparams['n_hidden'], step.hyperparams['n_classes']], mean=1.0) | ||
) | ||
} | ||
|
||
biases = { | ||
'hidden': tf.Variable( | ||
tf.random_normal([step.hyperparams['n_hidden']]) | ||
), | ||
'out': tf.Variable( | ||
tf.random_normal([step.hyperparams['n_classes']]) | ||
) | ||
} | ||
|
||
data_inputs = tf.transpose( | ||
data_inputs, | ||
[1, 0, 2]) # permute n_steps and batch_size | ||
|
||
# Reshape to prepare input to hidden activation | ||
data_inputs = tf.reshape(data_inputs, [-1, step.hyperparams['n_inputs']]) | ||
# new shape: (n_steps*batch_size, n_input) | ||
|
||
# ReLU activation, thanks to Yu Zhao for adding this improvement here: | ||
_X = tf.nn.relu( | ||
tf.matmul(data_inputs, weights['hidden']) + biases['hidden'] | ||
) | ||
|
||
# Split data because rnn cell needs a list of inputs for the RNN inner loop | ||
_X = tf.split(_X, step.hyperparams['n_steps'], 0) | ||
# new shape: n_steps * (batch_size, n_hidden) | ||
|
||
# Define two stacked LSTM cells (two recurrent layers deep) with tensorflow | ||
lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(step.hyperparams['n_hidden'], forget_bias=1.0, state_is_tuple=True) | ||
lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(step.hyperparams['n_hidden'], forget_bias=1.0, state_is_tuple=True) | ||
lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True) | ||
|
||
# Get LSTM cell output | ||
outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32) | ||
|
||
# Get last time step's output feature for a "many-to-one" style classifier, | ||
# as in the image describing RNNs at the top of this page | ||
lstm_last_output = outputs[-1] | ||
|
||
# Linear activation | ||
return tf.matmul(lstm_last_output, weights['out']) + biases['out'] | ||
|
||
|
||
def create_optimizer(step: TensorflowV1ModelStep): | ||
return tf.train.AdamOptimizer(learning_rate=step.hyperparams['learning_rate']) | ||
|
||
|
||
def create_loss(step: TensorflowV1ModelStep): | ||
# Loss, optimizer and evaluation | ||
# L2 loss prevents this overkill neural network to overfit the data | ||
l2 = step.hyperparams['lambda_loss_amount'] * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()) | ||
|
||
# Softmax loss | ||
return tf.reduce_mean( | ||
tf.nn.softmax_cross_entropy_with_logits( | ||
labels=step['expected_outputs'], | ||
logits=step['output'] | ||
) | ||
) + l2 | ||
|
||
|
||
class ClassificationRNNTensorFlowModel(TensorflowV1ModelStep): | ||
def setup(self) -> BaseStep: | ||
TensorflowV1ModelStep.setup(self) | ||
|
||
self.losses = [] | ||
self.accuracies = [] | ||
|
||
return self | ||
|
||
def _will_process(self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer): | ||
if not isinstance(data_container.data_inputs, np.ndarray): | ||
data_container.data_inputs = np.array(data_container.data_inputs) | ||
|
||
if data_container.expected_outputs is not None: | ||
if not isinstance(data_container.expected_outputs, np.ndarray): | ||
data_container.expected_outputs = np.array(data_container.expected_outputs) | ||
|
||
if data_container.expected_outputs.shape != (len(data_container.data_inputs), self.hyperparams['n_classes']): | ||
data_container.expected_outputs = np.reshape(data_container.expected_outputs, (len(data_container.data_inputs), self.hyperparams['n_classes'])) | ||
|
||
return data_container, context | ||
|
||
def _did_fit_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: | ||
accuracy = np.mean(np.argmax(data_container.data_inputs, axis=1) == np.argmax(data_container.expected_outputs, axis=1)) | ||
|
||
self.accuracies.append(accuracy) | ||
self.losses.append(self.loss) | ||
|
||
print("Batch Loss = " + "{:.6f}".format(self.losses[-1]) + ", Accuracy = {}".format(self.accuracies[-1])) | ||
|
||
return data_container | ||
|
||
|
||
model_step = ClassificationRNNTensorFlowModel( | ||
create_graph=create_graph, | ||
create_loss=create_loss, | ||
create_optimizer=create_optimizer | ||
).set_hyperparams( | ||
HyperparameterSamples({ | ||
'n_steps': N_STEPS, # 128 timesteps per series | ||
'n_inputs': N_INPUTS, # 9 input parameters per timestep | ||
'n_hidden': N_HIDDEN, # Hidden layer num of features | ||
'n_classes': N_CLASSES, # Total classes (should go up, or should go down) | ||
'learning_rate': LEARNING_RATE, | ||
'lambda_loss_amount': LAMBDA_LOSS_AMOUNT, | ||
'batch_size': BATCH_SIZE | ||
}) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from neuraxle.pipeline import MiniBatchSequentialPipeline, Joiner | ||
from neuraxle.steps.numpy import OneHotEncoder | ||
from neuraxle.steps.output_handlers import OutputTransformerWrapper | ||
|
||
from steps.lstm_rnn_tensorflow_model_wrapper import ClassificationRNNTensorFlowModel, N_CLASSES, BATCH_SIZE | ||
|
||
|
||
# TODO: wrap by a validation split wrapper as issue #174 | ||
# ValidationSplitWrapper(HumanActivityRecognitionPipeline) | ||
|
||
class HumanActivityRecognitionPipeline(MiniBatchSequentialPipeline): | ||
def __init__(self): | ||
MiniBatchSequentialPipeline.__init__(self, [ | ||
OutputTransformerWrapper(OneHotEncoder(nb_columns=N_CLASSES, name='one_hot_encoded_label')), | ||
ClassificationRNNTensorFlowModel(), | ||
Joiner(batch_size=BATCH_SIZE) | ||
]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from matplotlib import pyplot as plt | ||
|
||
|
||
def plot_metric(metric_train, metric_validation=None, xlabel='x', ylabel='y', title='Metric'): | ||
plt.plot(range(len(metric_train)), metric_train) | ||
|
||
legend = ['training'] | ||
if metric_validation is not None: | ||
plt.plot(range(len(metric_validation)), metric_validation) | ||
legend.append('validation') | ||
|
||
plt.xlabel(xlabel) | ||
plt.xlabel(ylabel) | ||
plt.title(title) | ||
|
||
plt.legend(legend, loc='upper left') | ||
plt.show() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
tensorflow==1.15 | ||
tensorflow-gpu==1.15 | ||
conv==0.2 | ||
-e git://github.com/alexbrillant/Neuraxle.git@bb9bba79dbbf93c9b60c6c5036e9d6bf6021e5c5#egg=neuraxle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import numpy as np | ||
|
||
from neuraxle.api.flask import JSONDataBodyDecoder | ||
|
||
|
||
class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder): | ||
"""This is a custom JSON decoder class that precedes the pipeline's transformation.""" | ||
|
||
def decode(self, data_inputs): | ||
""" | ||
Transform a JSON list object into an np.array object. | ||
|
||
:param data_inputs: json object | ||
:return: np array for data inputs | ||
""" | ||
return np.array(data_inputs) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from neuraxle.api.flask import JSONDataResponseEncoder | ||
|
||
|
||
class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder): | ||
"""This is a custom JSON response encoder class for converting the pipeline's transformation outputs.""" | ||
|
||
def encode(self, data_inputs) -> dict: | ||
""" | ||
Convert predictions to a dict for creating a JSON Response object. | ||
|
||
:param data_inputs: | ||
:return: | ||
""" | ||
return { | ||
'predictions': list(data_inputs) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import numpy as np | ||
from neuraxle.base import BaseStep, NonFittableMixin | ||
from neuraxle.steps.output_handlers import InputAndOutputTransformerMixin | ||
|
||
|
||
class FormatData(NonFittableMixin, InputAndOutputTransformerMixin, BaseStep): | ||
def __init__(self, n_classes): | ||
NonFittableMixin.__init__(self) | ||
InputAndOutputTransformerMixin.__init__(self) | ||
BaseStep.__init__(self) | ||
self.n_classes = n_classes | ||
|
||
def transform(self, data_inputs): | ||
data_inputs, expected_outputs = data_inputs | ||
|
||
if not isinstance(data_inputs, np.ndarray): | ||
data_inputs = np.array(data_inputs) | ||
|
||
if expected_outputs is not None: | ||
if not isinstance(expected_outputs, np.ndarray): | ||
expected_outputs = np.array(expected_outputs) | ||
|
||
if expected_outputs.shape != (len(data_inputs), self.n_classes): | ||
expected_outputs = np.reshape(expected_outputs, (len(data_inputs), self.n_classes)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This if should not be needed. Use a |
||
|
||
return data_inputs, expected_outputs |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be replaced by this?