Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sequential model #5

Merged
merged 6 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion include/ShkyeraTensor.hpp
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
#include "src/Value.hpp"
#pragma once

#include "src/core/Type.hpp"
#include "src/core/Value.hpp"
#include "src/core/Vector.hpp"
#include "src/nn/Activation.hpp"
#include "src/nn/Layer.hpp"
#include "src/nn/Loss.hpp"
#include "src/nn/Module.hpp"
#include "src/nn/Neuron.hpp"
#include "src/nn/Optimizer.hpp"
#include "src/nn/Sequential.hpp"
10 changes: 10 additions & 0 deletions include/src/core/Type.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#pragma once

namespace shkyera::Type {

using float32 = float;
using float64 = double;
using f32 = float;
using f64 = double;

} // namespace shkyera::Type
42 changes: 42 additions & 0 deletions include/src/core/Utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#pragma once

#include <random>

namespace shkyera::utils {

std::random_device rand_dev;
std::mt19937 generator(rand_dev());

template <typename T> std::enable_if_t<!std::is_integral_v<T>, T> sample(T from, T to) {
std::uniform_real_distribution<T> distribution(from, to);
return distribution(generator);
}

template <typename T> std::enable_if_t<!std::is_integral_v<T>, std::vector<T>> sample(T from, T to, size_t size) {
std::uniform_real_distribution<T> distribution(from, to);

std::vector<T> sampled(size);
for (size_t i = 0; i < size; i++) {
sampled[i] = distribution(generator);
}

return sampled;
}

template <typename T> std::enable_if_t<std::is_integral_v<T>, T> sample(T from, T to) {
std::uniform_int_distribution<T> distribution(from, to);
return distribution(generator);
}

template <typename T> std::enable_if_t<std::is_integral_v<T>, std::vector<T>> sample(T from, T to, size_t size) {
std::uniform_int_distribution<T> distribution(from, to);

std::vector<T> sampled(size);
for (size_t i = 0; i < size; i++) {
sampled[i] = distribution(generator);
}

return sampled;
}

} // namespace shkyera::utils
57 changes: 50 additions & 7 deletions include/src/Value.hpp → include/src/core/Value.hpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
#pragma once

#include <cmath>
#include <iostream>
#include <memory>
#include <unordered_set>
#include <vector>

#include "Type.hpp"

namespace shkyera {

template <typename T> class Optimizer;
template <typename T> class Value;
template <typename T> using ValuePtr = std::shared_ptr<Value<T>>;

using Val32 = Value<Type::float32>;
using Val64 = Value<Type::float64>;

template <typename T> class Value : public std::enable_shared_from_this<Value<T>> {
private:
T _data = 0;
Expand All @@ -22,19 +30,24 @@ template <typename T> class Value : public std::enable_shared_from_this<Value<T>
std::vector<ValuePtr<T>> topologicalSort(std::vector<ValuePtr<T>> &sorted, std::unordered_set<Value<T> *> &visited);

public:
friend class Optimizer<T>;

static ValuePtr<T> create(T data);

void backward();
T getGradient() { return _gradient; }
T getGradient();

ValuePtr<T> tanh();
ValuePtr<T> relu();
ValuePtr<T> sigmoid();
ValuePtr<T> exp();
ValuePtr<T> pow(ValuePtr<T> exponent);

template <typename U> friend ValuePtr<U> operator+(ValuePtr<U> a, ValuePtr<U> b);
template <typename U> friend ValuePtr<U> operator-(ValuePtr<U> a, ValuePtr<U> b);
template <typename U> friend ValuePtr<U> operator*(ValuePtr<U> a, ValuePtr<U> b);
template <typename U> friend ValuePtr<U> operator/(ValuePtr<U> a, ValuePtr<U> b);
template <typename U> friend ValuePtr<U> operator-(ValuePtr<U> a);

template <typename U> friend std::ostream &operator<<(std::ostream &os, const ValuePtr<U> &value);
};
Expand All @@ -43,6 +56,8 @@ template <typename T> Value<T>::Value(T data) : _data(data) {}

template <typename T> ValuePtr<T> Value<T>::create(T data) { return std::shared_ptr<Value<T>>(new Value<T>(data)); }

template <typename T> T Value<T>::getGradient() { return _gradient; }

template <typename T> ValuePtr<T> operator+(ValuePtr<T> a, ValuePtr<T> b) {
ValuePtr<T> result = Value<T>::create(a->_data + b->_data);
result->_children = {a, b};
Expand All @@ -54,6 +69,8 @@ template <typename T> ValuePtr<T> operator+(ValuePtr<T> a, ValuePtr<T> b) {
return result;
}

template <typename T> ValuePtr<T> operator-(ValuePtr<T> a, ValuePtr<T> b) { return a + (-b); }

template <typename T> ValuePtr<T> operator*(ValuePtr<T> a, ValuePtr<T> b) {
ValuePtr<T> result = Value<T>::create(a->_data * b->_data);
result->_children = {a, b};
Expand All @@ -67,13 +84,39 @@ template <typename T> ValuePtr<T> operator*(ValuePtr<T> a, ValuePtr<T> b) {

template <typename T> ValuePtr<T> operator/(ValuePtr<T> a, ValuePtr<T> b) { return a * (b->pow(Value<T>::create(-1))); }

template <typename T> ValuePtr<T> operator-(ValuePtr<T> a) { return Value<T>::create(-1) * a; }

template <typename T> ValuePtr<T> Value<T>::tanh() {
auto thisValue = this->shared_from_this();

ValuePtr<T> result = Value<T>::create((std::exp(2 * thisValue->_data) - 1) / (std::exp(2 * thisValue->_data) + 1));
result->_children = {thisValue};
result->_backward = [thisValue, result]() {
thisValue->_gradient += (1 - (thisValue->_data * thisValue->_data)) * result->_gradient;
thisValue->_gradient += (1 - (result->_data * result->_data)) * result->_gradient;
};

return result;
}

template <typename T> ValuePtr<T> Value<T>::sigmoid() {
auto thisValue = this->shared_from_this();

ValuePtr<T> result = Value<T>::create(1 / (std::exp(-thisValue->_data) + 1));
result->_children = {thisValue};
result->_backward = [thisValue, result]() {
thisValue->_gradient += result->_data * (1 - result->_data) * result->_gradient;
};

return result;
}

template <typename T> ValuePtr<T> Value<T>::relu() {
auto thisValue = this->shared_from_this();

ValuePtr<T> result = Value<T>::create(_data > 0 ? _data : 0);
result->_children = {thisValue};
result->_backward = [thisValue, result]() {
thisValue->_gradient += (result->_data > 0 ? 1 : 0) * result->_gradient;
};

return result;
Expand Down Expand Up @@ -103,11 +146,6 @@ template <typename T> ValuePtr<T> Value<T>::pow(ValuePtr<T> exponent) {
return result;
}

template <typename T> std::ostream &operator<<(std::ostream &os, const ValuePtr<T> &value) {
os << "Value(data=" << value->_data << ")";
return os;
}

template <typename T> std::vector<ValuePtr<T>> Value<T>::topologicalSort() {
std::vector<ValuePtr<T>> sorted;
std::unordered_set<Value<T> *> visited;
Expand Down Expand Up @@ -138,4 +176,9 @@ template <typename T> void Value<T>::backward() {
}
}

template <typename T> std::ostream &operator<<(std::ostream &os, const ValuePtr<T> &value) {
os << "Value(data=" << value->_data << ")";
return os;
}

} // namespace shkyera
70 changes: 70 additions & 0 deletions include/src/core/Vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#pragma once

#include <exception>

#include "Type.hpp"
#include "Value.hpp"

namespace shkyera {

template <typename T> class Vector;
using Vec32 = Vector<Type::float32>;
using Vec64 = Vector<Type::float64>;

template <typename T> class Vector {
private:
std::vector<ValuePtr<T>> _values;

public:
Vector() = default;
Vector(std::vector<ValuePtr<T>> values);
static Vector<T> of(const std::vector<T> &values);

ValuePtr<T> dot(const Vector<T> &other) const;
ValuePtr<T> operator[](size_t index) const;

size_t size() const;

template <typename U> friend std::ostream &operator<<(std::ostream &os, const Vector<U> &vector);
};

template <typename T> Vector<T>::Vector(std::vector<ValuePtr<T>> values) { _values = values; }

template <typename T> Vector<T> Vector<T>::of(const std::vector<T> &values) {
std::vector<ValuePtr<T>> valuePtrs;
valuePtrs.reserve(values.size());

std::for_each(values.begin(), values.end(),
[&valuePtrs](const T &v) { valuePtrs.emplace_back(Value<T>::create(v)); });

return valuePtrs;
}

template <typename T> size_t Vector<T>::size() const { return _values.size(); }

template <typename T> ValuePtr<T> Vector<T>::dot(const Vector<T> &other) const {
if (other.size() != size()) {
throw std::invalid_argument("Vectors need to be of the same size to compute the dot product. Sizes are " +
std::to_string(size()) + " and " + std::to_string(other.size()) + ".");
}

ValuePtr<T> result = Value<T>::create(0);
for (size_t i = 0; i < size(); ++i)
result = result + (_values[i] * other[i]);

return result;
}

template <typename T> ValuePtr<T> Vector<T>::operator[](size_t index) const { return _values[index]; }

template <typename T> std::ostream &operator<<(std::ostream &os, const Vector<T> &vector) {
os << "Vector(size=" << vector.size() << ", data={";

for (const ValuePtr<T> val : vector._values)
os << val << ' ';

os << "})";
return os;
}

} // namespace shkyera
16 changes: 16 additions & 0 deletions include/src/nn/Activation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "../core/Type.hpp"
#include "../core/Value.hpp"

namespace shkyera::Activation {

template <typename T> using Function = std::function<ValuePtr<T>(ValuePtr<T>)>;

template <typename T> Function<T> tanh = [](ValuePtr<T> a) { return a->tanh(); };
template <typename T> Function<T> relu = [](ValuePtr<T> a) { return a->relu(); };
template <typename T> Function<T> exp = [](ValuePtr<T> a) { return a->exp(); };
template <typename T> Function<T> sigmoid = [](ValuePtr<T> a) { return a->sigmoid(); };
template <typename T> Function<T> linear = [](ValuePtr<T> a) { return a; };

} // namespace shkyera::Activation
60 changes: 60 additions & 0 deletions include/src/nn/Layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma once

#include "../core/Type.hpp"
#include "Activation.hpp"
#include "Module.hpp"
#include "Neuron.hpp"

namespace shkyera {

template <typename T> class Layer;
template <typename T> using LayerPtr = std::shared_ptr<Layer<T>>;

using Layer32 = Layer<Type::float32>;
using Layer64 = Layer<Type::float32>;

template <typename T> class Layer : public Module<T> {
private:
std::vector<Neuron<T>> _neurons;

Layer(size_t input, size_t size, Activation::Function<T> activation = Activation::relu<T>);

public:
static LayerPtr<T> create(size_t input, size_t size, Activation::Function<T> activation = Activation::relu<T>);

virtual Vector<T> operator()(const Vector<T> &x) const override;
virtual std::vector<ValuePtr<T>> parameters() const override;
};

template <typename T> Layer<T>::Layer(size_t input, size_t size, Activation::Function<T> activation) {
_neurons.reserve(size);
for (size_t i = 0; i < size; ++i) {
_neurons.emplace_back(Neuron<T>(input, activation));
}
}

template <typename T> LayerPtr<T> Layer<T>::create(size_t input, size_t size, Activation::Function<T> activation) {
return std::shared_ptr<Layer<T>>(new Layer<T>(input, size, activation));
}

template <typename T> Vector<T> Layer<T>::operator()(const Vector<T> &x) const {
std::vector<ValuePtr<T>> output(_neurons.size());

for (size_t i = 0; i < _neurons.size(); i++) {
output[i] = _neurons[i](x)[0];
}

return Vector<T>(output);
}

template <typename T> std::vector<ValuePtr<T>> Layer<T>::parameters() const {
std::vector<ValuePtr<T>> params;
for (const Neuron<T> &n : _neurons) {
std::vector<ValuePtr<T>> neuronParams = n.parameters();
params.insert(params.end(), neuronParams.begin(), neuronParams.end());
}

return params;
}

} // namespace shkyera
29 changes: 29 additions & 0 deletions include/src/nn/Loss.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#pragma once

#include "../core/Value.hpp"

namespace shkyera::Loss {

template <typename T> using Function = std::function<ValuePtr<T>(Vector<T> a, Vector<T> b)>;

using Function32 = Function<Type::float32>;
using Function64 = Function<Type::float64>;

template <typename T>
Function<T> MSE = [](Vector<T> a, Vector<T> b) {
if (a.size() != b.size()) {
throw std::invalid_argument("Vectors need to be of the same size to compute the MSE loss. Sizes are " +
std::to_string(a.size()) + " and " + std::to_string(b.size()) + ".");
}

ValuePtr<T> loss = Value<T>::create(0);
for (size_t i = 0; i < a.size(); ++i) {
loss = loss + ((a[i] - b[i])->pow(Value<T>::create(2)));
}

loss->backward();

return loss;
};

} // namespace shkyera::Loss
20 changes: 20 additions & 0 deletions include/src/nn/Module.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#include "../core/Vector.hpp"

namespace shkyera {

template <typename T> class Module;
template <typename T> using ModulePtr = std::shared_ptr<Module<T>>;

template <typename T> class Module {
protected:
Module() = default;

public:
Vector<T> forward(const Vector<T> &x) const { return (*this)(x); }
virtual Vector<T> operator()(const Vector<T> &x) const { return x; }
virtual std::vector<ValuePtr<T>> parameters() const { return {}; }
};

} // namespace shkyera
Loading
Loading