From f6df88c30b5813d0f30000c4590218bb30911e8b Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Fri, 7 Feb 2025 12:16:23 +0100 Subject: [PATCH 01/19] added the ctensor class and changes to cmake --- CMakeLists.txt | 13 ++ examples/example_CTensor.cpp | 13 ++ include/SplineNetLib/CTensor.hpp | 155 +++++++++++++ include/SplineNetLib/CTensorFunc.hpp | 97 +++++++++ include/SplineNetLib/CTensorUtils.hpp | 95 ++++++++ include/SplineNetLib/splines.hpp | 1 + src/CTensor.tpp | 303 ++++++++++++++++++++++++++ src/CTensorFunc.tpp | 300 +++++++++++++++++++++++++ src/CTensorUtils.tpp | 205 +++++++++++++++++ 9 files changed, 1182 insertions(+) create mode 100644 examples/example_CTensor.cpp create mode 100644 include/SplineNetLib/CTensor.hpp create mode 100644 include/SplineNetLib/CTensorFunc.hpp create mode 100644 include/SplineNetLib/CTensorUtils.hpp create mode 100644 src/CTensor.tpp create mode 100644 src/CTensorFunc.tpp create mode 100644 src/CTensorUtils.tpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5424b75..1f9c63b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,13 @@ add_library(SplineNetLib src/splines.cpp ) +# Add the new template-based class headers and implementations +target_sources(SplineNetLib PRIVATE + src/CTensor.tpp + src/CTensorFunc.tpp + src/CTensorUtils.tpp +) + # Specify the include directories for the library target target_include_directories(SplineNetLib PUBLIC ${PROJECT_SOURCE_DIR}/include) @@ -85,3 +92,9 @@ write_basic_package_version_file( install(FILES "${CMAKE_CURRENT_BINARY_DIR}/SplineNetLibConfigVersion.cmake" DESTINATION lib/cmake/SplineNetLib) + + + + + + diff --git a/examples/example_CTensor.cpp b/examples/example_CTensor.cpp new file mode 100644 index 0000000..89a5d33 --- /dev/null +++ b/examples/example_CTensor.cpp @@ -0,0 +1,13 @@ +#include "../include/SplineNetLib/layers.hpp" + +using namespace SplineNetLib; + +int main() { + + //this will create a CTensor that holds a data vector and shape vector, all other member variables are uninitialized + auto a = CTensor({1,1,1,2,2,2},{2,3}); + + std::cout<<"created CTensor a with data : "<, +// +// This file is part of the PySplineNetLib project, which is licensed under the +// Mozilla Public License, Version 2.0 (MPL-2.0). +// +// SPDX-License-Identifier: MPL-2.0 +// For the full text of the licenses, see: +// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0 + + + + +#ifndef CTENSOR_HPP +#define CTENSOR_HPP + +#include "CTensorFunc.hpp" + +namespace SplineNetLib { + +template +requires Scalar +class Function; + +template +class DTensor{ +public: + std::vector _data; + std::vector _shape; + std::vector _grad; + std::vector>> _grad_fn; + int _ref_c; + + DTensor(const std::vector& data, const std::vector& shape) : + _data(data), _shape(shape), _ref_c(1) {} + + DTensor(const std::initializer_list& data, const std::initializer_list& shape) : + _data(data), _shape(shape), _ref_c(1) {} + + void add_ref(){ + _ref_c++; + } + + void rmf_ref(){ + _ref_c--; + if (_ref_c == 0){ + delete this; + } + } +}; + + +template +class CTensor { +private: + + + +public: + + DTensor* _tensor_data; + + bool requires_grad = true; + + CTensor(const std::initializer_list& init, const std::initializer_list& shape) { + _tensor_data = new DTensor(init, shape); + } + + CTensor(const std::vector& data, const std::vector& shape) { + _tensor_data = new DTensor(data, shape); + } + + CTensor(const CTensor& other){ + _tensor_data = other._tensor_data; + _tensor_data->_ref_c++; + } + + + ~CTensor(){ + _tensor_data->rmf_ref(); + } + + //-----getters----- + + std::vector data() const { return this->_tensor_data->_data; } + + std::vector shape() const { return this->_tensor_data->_shape; } + + std::vector grad() const { return this->_tensor_data->_grad; } + + std::vector>> grad_fn() const { return this->_tensor_data->grad_fn; } + + void zero_grad(); + + //-----shape-utils----- + + void squeeze(const size_t &dim) ; + + void unsqueeze(const size_t &dim) ; + + void expand(const size_t &dim, const size_t &factor) ; + + void permute(const std::vector &permutation_indecies) ; + + void transpose() ; + + //-----auto_grad----- + + void clear_history() ; + + void clear_graph() ; + //maybe add overload o this so that f no arg was passed propagated grad is set to {}, than this function below could use all by ref + void backward(std::vector prop_grad = {}) ; + + + //-----operator----- + + auto operator[](size_t idx) ; + + auto operator+(CTensor &other) ; + + auto operator-(CTensor &other) ; + + auto operator*(CTensor &other) ; + + //CTensor& operator=(const CTensor &other) noexcept; + + //CTensor& operator=(CTensor &&other) ; + + +}; +/* +template +CTensor zeros(std::vector shape) ; + +template +CTensor ones(std::vector shape) ; + +template +CTensor random(std::vector shape, T min, T max) ; + +template +CTensor Tensor(U data) ; + +template +CTensor Tensor(U data) ; + +template +CTensor Tensor(std::vector data, std::vector shape) ; +*/ +} //namespace + +#include "../src/CTensor.tpp" + + +#endif \ No newline at end of file diff --git a/include/SplineNetLib/CTensorFunc.hpp b/include/SplineNetLib/CTensorFunc.hpp new file mode 100644 index 0000000..b3fb503 --- /dev/null +++ b/include/SplineNetLib/CTensorFunc.hpp @@ -0,0 +1,97 @@ +// Copyright (c) <2025>, +// +// This file is part of the PySplineNetLib project, which is licensed under the +// Mozilla Public License, Version 2.0 (MPL-2.0). +// +// SPDX-License-Identifier: MPL-2.0 +// For the full text of the licenses, see: +// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0 + + + + +#ifndef CTENSORFUNC_HPP +#define CTENSORFUNC_HPP + +#include "CTensorUtils.hpp" + +namespace SplineNetLib { + +template +class CTensor; + +//base function class for specialization +template +requires Scalar +class Function { +public: + //pointers to this functions "parents" (like : a operator b) + std::shared_ptr> a; + std::shared_ptr> b; + + Function(std::shared_ptr> A, std::shared_ptr> B) : a(A), b(B) {} + + //virtual desctructor + virtual ~Function() = default; + + virtual std::vector fwd() = 0; + + virtual void backward(std::vector &prop_grad, CTensor *result) = 0; + + static std::unordered_set*> global_chain; + + void clear_graph_f(); +}; + +template +requires Scalar +std::unordered_set*> Function::global_chain; + +//addition class for CTensor::operator+ +template +requires Scalar +class AddFunction : public Function { +public: + + //construct base class + AddFunction(std::shared_ptr> a, std::shared_ptr> b) : Function(a, b) {} + + std::vector fwd() override ; + + void backward(std::vector &prop_grad, CTensor *result) override; +}; + +//subtractor function class for CTensor::operator- +template +requires Scalar +class SubFunction : public Function { +public: + + //construct base class + SubFunction(std::shared_ptr> a, std::shared_ptr> b) : Function(a, b) {} + + std::vector fwd() override; + + void backward(std::vector &prop_grad, CTensor *result) override; + +}; + +//matrix multiplication function class for CTensor::operator* +template +requires Scalar +class MatMulFunction : public Function { +public: + + //construct base class + MatMulFunction(std::shared_ptr> a, std::shared_ptr> b) : Function(a, b) {} + + std::vector fwd() override; + + void backward(std::vector &prop_grad, CTensor *result) override; +}; + +} //namepace + +#include "../src/CTensorFunc.tpp" + +#endif \ No newline at end of file diff --git a/include/SplineNetLib/CTensorUtils.hpp b/include/SplineNetLib/CTensorUtils.hpp new file mode 100644 index 0000000..fba2654 --- /dev/null +++ b/include/SplineNetLib/CTensorUtils.hpp @@ -0,0 +1,95 @@ +// Copyright (c) <2025>, +// +// This file is part of the PySplineNetLib project, which is licensed under the +// Mozilla Public License, Version 2.0 (MPL-2.0). +// +// SPDX-License-Identifier: MPL-2.0 +// For the full text of the licenses, see: +// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0 + + + + +#ifndef CTENSORUTILS_HPP +#define CTENSORUTILS_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace SplineNetLib { + +template +std::string vectorToString(const std::vector& vec); + +template +concept Container = requires(T t) { + typename T::value_type; // Requires a nested `value_type` (if T::value_type fails T is not a Container) + typename T::iterator; // Requires a nested `iterator` + typename T::const_iterator; // Also requires a nested `const_iterator` for const containers + { t.begin() } -> std::input_iterator; // Requires a `begin()` function that has return type std::input_iterator + { t.end() } -> std::input_iterator; // Requires an `end()` function that has return type std::input_iterator + { t.size() } -> std::convertible_to; //also requires a `size()` function thatvhas return type std::convertible_to +}; + +template +concept Scalar = std::is_arithmetic_v; // Requires T to be is_arithmetic_v + +// Function to generate a std::vector with random values +template +std::vector randomVector(size_t size, T min, T max) ; + + +//base case for recursive n_dims check +template +int get_depth(const T &scalar) ; + +//Recursive case for the n_dims check will return the number of dimensions od the input +template +int get_depth (const T &vec) ; + +//base Recursive case for the get_shape func will return the shape +template +std::vector get_shape(const T &scalar, std::vector Shape = {}) ; + +//Recursive function to get shape of container (assumes uniform dims) pushes back the size of the container at current recursion depth +template +std::vector get_shape(const T &vec, std::vector Shape = {}) ; + +//base case if input is scalar type (will in place push back to the result) +template +void Flatten(const T &in_scalar, std::vector &result) ; + +//Recursive case will move down one dim into the input and recursively call itself for all "values" in input +template +void Flatten(const T &in_vector, std::vector &result) ; + +// Flatten controll function will create the result variable and initialize the recursion +template +std::vector Flatten(const T& in_vector) ; + +// calculate the stride length to get to next index in dim forvthe projected vector +size_t stride(size_t idx, const std::vector &shape) ; + +//math ------------------- + +template +std::vector matmul(const std::vector &A, const std::vector &B, const std::vector &A_shape, const std::vector &B_shape) ; + +template +requires Scalar +std::vector permute_vec(const std::vector& A, const std::vector& A_shape, const std::vector& permutation_indices) ; + +} //namespace + +#include "../src/CTensorUtils.tpp" + +#endif \ No newline at end of file diff --git a/include/SplineNetLib/splines.hpp b/include/SplineNetLib/splines.hpp index 76a315c..c8b7577 100644 --- a/include/SplineNetLib/splines.hpp +++ b/include/SplineNetLib/splines.hpp @@ -17,6 +17,7 @@ #include #include #include +#include "CTensor.hpp" /* #include #include diff --git a/src/CTensor.tpp b/src/CTensor.tpp new file mode 100644 index 0000000..1fe0ce2 --- /dev/null +++ b/src/CTensor.tpp @@ -0,0 +1,303 @@ +// Copyright (c) <2025>, +// +// This file is part of the PySplineNetLib project, which is licensed under the +// Mozilla Public License, Version 2.0 (MPL-2.0). +// +// SPDX-License-Identifier: MPL-2.0 +// For the full text of the licenses, see: +// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0 + + + + + +#ifndef CTENSOR_TPP +#define CTENSOR_TPP + + +#include "../include/SplineNetLib/CTensor.hpp" + +namespace SplineNetLib { + +template +void CTensor::zero_grad(){ + this->_tensor_data->_grad = std::vector(this->_tensor_data->_data.size(),static_cast(0)); +} + + +template +void CTensor::squeeze(const size_t& dim) { + auto n_dims = this->_tensor_data->_shape.size(); + if (n_dims == 1) { + throw std::invalid_argument("CTensor with 1 Dim can not be squeezed to be 0D\n"); + } else if (dim >= n_dims) { + throw std::invalid_argument("target Dim: "+std::to_string(dim)+"is out of range of CTensor with n_dims: "+std::to_string(n_dims)+"\n"); + } else if (dim == n_dims-1){ + this->_tensor_data->_shape[dim-1] *= this->_tensor_data->_shape[dim]; + this->_tensor_data->_shape.pop_back(); + } else { + this->_tensor_data->_shape[dim] *= this->_tensor_data->_shape[dim+1]; + this->_tensor_data->_shape.erase(this->_tensor_data->_shape.begin() + dim + 1); + } +} + +template +void CTensor::unsqueeze(const size_t &dim) { + auto n_dims = this->_tensor_data->_shape.size(); + auto* shape = &(this->_tensor_data->_shape);//make a temp ptr to the shape vector for easier syntax + if (dim > n_dims) { + (*shape).push_back(1); + } else { + (*shape).insert((*shape).begin() + dim, 1); + } +} + +template +void CTensor::expand(const size_t &dim, const size_t &factor) { + if (factor <= 1) { + return; // No expansion needed + } + + auto* shape = &(this->_tensor_data->_shape);//make a temp ptr to the shape vector for easier syntax + auto* data = &(this->_tensor_data->_data); + auto n_dims = (*shape).size(); + + + // Check if the specified dimension is valid + if (dim >= n_dims) { + throw std::invalid_argument("Input dim: " + std::to_string(dim) + " cannot be larger than _n_dims: " + std::to_string(n_dims)); + } + + // Calculate the size of the sub-vectors to repeat + size_t sub_vector_size = 1; + for (size_t i = dim + 1; i < n_dims; i++) { + sub_vector_size *= (*shape)[i]; + } + + size_t data_size_per_expansion = (*shape)[dim] * sub_vector_size; + + // Repeat the data by the specified factor + size_t idx = 0; + while (idx < (*data).size()) { + std::vector sub_vector((*data).begin() + idx, (*data).begin() + idx + data_size_per_expansion); + + // Insert the sub-vector factor times + for (size_t i = 1; i < factor; i++) { + (*data).insert((*data).begin() + idx, sub_vector.begin(), sub_vector.end()); + idx += data_size_per_expansion; + } + + idx += data_size_per_expansion; + } + + // Update the shape and number of dimensions + (*shape)[dim] *= factor; + +} + +template +void CTensor::permute(const std::vector &permutation_indecies) { + //renamed global func permute to permute_vec so that func czll in class is nolonger ::permute now permute_vec + this->_tensor_data->_data = permute_vec(this->_tensor_data->_data, this->_tensor_data->_shape, permutation_indecies); + + auto shape_copy = this->shape(); + for (size_t i = 0; i < permutation_indecies.size(); i++) { + this->_tensor_data->_shape[i] = shape_copy[permutation_indecies[i]]; + } +} + +template +void CTensor::transpose() { + if (this->_tensor_data->_shape.size()>=2) { + std::vector transpose_idx; + for (size_t i = 0; i < this->_tensor_data->_shape.size()-2; i++) { + transpose_idx.push_back(i); + } + + transpose_idx.push_back(this->_tensor_data->_shape.size() - 1); + transpose_idx.push_back(this->_tensor_data->_shape.size() - 2); + + this->permute(transpose_idx); + } +} + + + +//-----operator-----/ +template +auto CTensor::operator[](size_t idx){ + std::vector Shape = this->shape(); + //check if index should exist in multi dim space + if (idx >= Shape[0]) { + throw std::invalid_argument("index ["+std::to_string(idx)+"] is out of range with dim of size : "+std::to_string(Shape[0])+"\n"); + } + //if vector is 1D to begin with + if (Shape.size() == 1) { + //create sub vector with scalar data at data[idx] + std::vectordata())::value_type> sub_vector = {this->data()[idx]}; + //std::cout<<"operator[] scalar case debug data[idx]="<data() in range constructor (could likely also be used in decltype) + auto data = this->data(); + //creates a vector of same type as stored in CTensor using range constructor from flat_idx to flat_idx + size_sub_vector + std::vectordata())::value_type> sub_vector(data.begin() + flat_idx, data.begin() + flat_idx + size_sub_vector); + + + auto new_CT = CTensor(sub_vector, Shape); + return new_CT; +} + +template +auto CTensor::operator+(CTensor& other){ + //create new addfunction with shared ptr to this and other + auto new_fn = std::make_unique>(std::make_shared>(*this), + std::make_shared>(other)); + auto res_vec = new_fn->fwd(); //add this data and other data + auto result = CTensor(res_vec, this->shape());//create the result CTensor + if (this->requires_grad || other.requires_grad) { + result.requires_grad = true; + + result._tensor_data->_grad_fn.push_back(std::move(new_fn)); + } else { + result.requires_grad = false; + } + return result; +} + + +template +auto CTensor::operator-(CTensor &other) { + //create new SubFunction with shared ptr to this and other + auto new_fn = std::make_unique>(std::make_shared>(*this), + std::make_shared>(other)); + auto res_vec = new_fn->fwd(); + auto result = CTensor(res_vec, this->shape()); + if (this->requires_grad || other.requires_grad) { + result.requires_grad = true; + + result._tensor_data->_grad_fn.push_back(std::move(new_fn)); + } else { + result.requires_grad = false; + } + return result; +} + +template +auto CTensor::operator* (CTensor &other) { + //create the parent function for the result using parents this and other + //this will make a shared ptr of the base class. this works since the functions in tje derived classes are all overrides + //this is doen so that all grad fns of a CTensor can be stored in the same std::vector>> _grad_fn + auto new_fn = std::make_unique>(std::make_shared>(*this), + std::make_shared>(other)); + //use new_fn.forward() to perfo5m the addition + auto res_vec = new_fn->fwd(); + + std::vector result_shape; + auto this_shape = this->shape(); + auto other_shape = other.shape(); + for (size_t i = 0; i < this_shape.size() -1; i++){ + result_shape.push_back(this_shape[i]); + } + result_shape.push_back(other_shape[other_shape.size()-1]); + + auto result = CTensor(res_vec, result_shape); + //assign parent function to the result._grad_fn + if (this->requires_grad || other.requires_grad) { + result.requires_grad = true; + + result._tensor_data->_grad_fn.push_back(std::move(new_fn)); + } else { + result.requires_grad = false; + } + + return result; +} + + + + + +template +void CTensor::clear_history() { + this->_tensor_data->_grad_fn.clear(); + //this should be safe since Function uses pointers to Ctensor and the Tensor will survive the _grad_fn clear +} + +template +void CTensor::clear_graph() { + //recursive call to traverse grad graph + for (auto &fn : this->_tensor_data->_grad_fn) { + + fn->clear_graph_f(); + } + //clear this CTensor history when sub graph is cleared + this->clear_history(); +} + +//can be improved with overload if no arg is passe to use {} so that this function below can use refernces +template +void CTensor::backward(std::vector prop_grad) { + + //go through all parent Functions + for (auto &fn : this->_tensor_data->_grad_fn) { + if (fn) { + //std::cout<backward(prop_grad, this); + //std::cout<<"debug Ct bwd fn bwd finish\n"; + } + } + //std::cout<<"debug Ct bwd fn all bwd finish\n"; +} +/* untestee +template +CTensor zeros(std::vector shape) { + std::vector data(stride(-1,shape),T(0)); + return CTensor(data, shape); +} + +template +CTensor ones(std::vector shape) { + std::vector data(stride(-1,shape),T(1)); + return CTensor(data, shape); +} + +template +CTensor random(std::vector shape, T min, T max) { + return CTensor(randomVector(stride(-1,shape),min,max),shape); +} + +template +CTensor Tensor(U data) { + return CTensor(data); +} + +template +CTensor Tensor(U data) { + return CTensor(data); +} + +template +CTensor Tensor(std::vector data, std::vector shape) { + return CTensor(data,shape); +} +*/ + +} //namespace + +#endif \ No newline at end of file diff --git a/src/CTensorFunc.tpp b/src/CTensorFunc.tpp new file mode 100644 index 0000000..cf7c506 --- /dev/null +++ b/src/CTensorFunc.tpp @@ -0,0 +1,300 @@ +// Copyright (c) <2025>, +// +// This file is part of the PySplineNetLib project, which is licensed under the +// Mozilla Public License, Version 2.0 (MPL-2.0). +// +// SPDX-License-Identifier: MPL-2.0 +// For the full text of the licenses, see: +// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0 + + + + +#ifndef CTENSORFUNC_TPP +#define CTENSORFUNC_TPP + + +#include "../include/SplineNetLib/CTensorFunc.hpp" + +namespace SplineNetLib { + +template +requires Scalar +void Function::clear_graph_f() { + a->clear_graph(); + b->clear_graph(); +} + +template +requires Scalar +std::vector AddFunction::fwd() { + + auto* a_data = &(this->a->_tensor_data->_data); + auto* b_data = &(this->b->_tensor_data->_data); + + T l; + T r; + + std::vector res_vec; + for (size_t i = 0; i < (*a_data).size() || i < (*b_data).size(); i++){ + l = (i < (*a_data).size()) ? (*a_data)[i] : 0 ; + r = (i < (*b_data).size()) ? (*b_data)[i] : 0 ; + res_vec.push_back(l + r); + } + return res_vec; +} + + + +template +requires Scalar +void AddFunction::backward(std::vector &prop_grad, CTensor *result) { + //std::cout<<"debug add bwd call\n"; + //check if func already exists in the recursive chain + if (Function::global_chain.find(this) != Function::global_chain.end()) { + std::cout<<"cyle detected in grad backward, ensure no incorrect reassignments to to Ctensors that were previously used in the computation graph\n"; + return; + } + //std::cout<<"debug add bwd cycle check\n"; + //insert this func into the chain for cycle detection + Function::global_chain.insert(this); + //std::cout<<"debug add bwd chain insert\n"; + if (prop_grad.empty()){ + for (size_t i=0; i < this->a->data().size(); i++) { + prop_grad.push_back(1); + //std::cout<<"debug add bwd empty grad set to 1s \n"; + } + } + //std::cout<<"debug add bwd grad add\n"; + //ensure self dependend gradients arent added twice + if (result != this->a.get()) { + std::cout<<"debug add bwd this->a gradient propagation initialized\n"; + //std::cout<<"debug add bwd this a grad size:"<a->grad().size()<<"prop_grad size: "<a->requires_grad == true) { + if (this->a->grad().empty()){ + std::cout<<"a grqd empty "<a->grad().size()<<"\n"; + this->a->zero_grad(); + } + std::cout<<"working on grad of a at "<a<<" "<a->grad())<<" "<a->_tensor_data->_grad[i] += prop_grad[i]; + //std::cout<<"debug add bwd accumulation step\n"; + } + } + std::cout<<"debug add bwd this a grad accumulated\n"; + this->a->backward(prop_grad); + std::cout<<"debug add bwd this a recursion finished\n"; + } + //ensure self dependend gradients arent added twice + if (result != this->b.get()) { + std::cout<<"debug add bwd this->b gradient propagation initialized\n"; + //std::cout<<"debug add bwd this b grad size:"<b->grad().size()<<"prop_grad size: "<b->requires_grad == true) { + if (this->b->grad().empty()){ + std::cout<<"b grqd empty "<b->grad().size()<<"\n"; + this->b->zero_grad(); + } + std::cout<<"working on grad of b at "<b<<" "<b->grad())<<" "<b->_tensor_data->_grad[i] += prop_grad[i]; + //std::cout<<"debug add bwd accumulation step\n"; + } + } + //std::cout<<"debug add bwd this b grad accumulated\n"; + this->b->backward(prop_grad); + //std::cout<<"debug add bwd this b recursion finished\n"; + } + //std::cout<<"debug add bwd recursive propagation\n"; + //remove this func from the chain if all its recursive processes finished + Function::global_chain.erase(this); + //std::cout<<"debug add bwd chain erase\n"; +} + + +template +requires Scalar +std::vector SubFunction::fwd() { + + auto* a_data = &(this->a->_tensor_data->_data); + auto* b_data = &(this->b->_tensor_data->_data); + + T l; + T r; + + std::vector res_vec; + for (size_t i = 0; i < (*a_data).size() || i < (*b_data).size(); i++){ + l = (i < (*a_data).size()) ? (*a_data)[i] : 0 ; + r = (i < (*b_data).size()) ? (*b_data)[i] : 0 ; + res_vec.push_back(l - r); + } + return res_vec; +} + + +template +requires Scalar +void SubFunction::backward(std::vector &prop_grad, CTensor *result) { + + //check if func already exists in the recursive chain + if (Function::global_chain.find(this) != Function::global_chain.end()) { + std::cout<<"cyle detected in Ctensor.backward(), ensure no incorrect reassignments to to Ctensors that were previously used in the computation graph\n"; + return; + } + + //insert this func into the chain for cycle detection + Function::global_chain.insert(this); + + if (prop_grad.empty()){ + for (size_t i=0; i < this->a->data().size(); i++) { + prop_grad.push_back(1); + } + } + + //ensure self dependend gradients arent added twice + if (result != this->a.get()) { + if (this->a->requires_grad == true) { + if (this->a->grad().empty()){ + std::cout<<"a grqd empty "<a->grad().size()<<"\n"; + this->a->zero_grad(); + } + for (size_t i = 0; i < prop_grad.size(); i++) { + + this->a->_tensor_data->_grad[i] += prop_grad[i]; + + } + } + this->a->backward(prop_grad); + } + //ensure self dependend gradients arent added twice + if (result != this->b.get()) { + if (this->b->requires_grad == true) { + if (this->b->grad().empty()){ + std::cout<<"b grqd empty "<b->grad().size()<<"\n"; + this->b->zero_grad(); + } + for (size_t i = 0; i < prop_grad.size(); i++) { + this->b->_tensor_data->_grad[i] -= prop_grad[i]; + } + } + this->b->backward(prop_grad); + } + //remove this func from the chain if all its recursive processes finished + Function::global_chain.erase(this); +} + + +template +requires Scalar +std::vector MatMulFunction::fwd() { + + std::vector a_shape = this->a->shape(); + std::vector b_shape = this->b->shape(); + + size_t a_n_dims = a_shape.size(); + size_t b_n_dims = b_shape.size(); + + auto a_copy = *(this->a); + auto b_copy = *(this->b); + + if (a_n_dims != b_n_dims) { + throw std::invalid_argument("operator (*) expects both opperants to have the same num of dimensions but got:"+std::to_string(a_n_dims)+"and "+std::to_string(b_n_dims)+",please ensure opperants dims match by using squeeze or unsqueeze beforehand\n"); + } + if (a_n_dims > 2) { + //Create sub vectors for the batch dimensions + std::vector a_batch_shape; + std::vector b_batch_shape; + //get only the batch dimension shapes + for (size_t i = 0; i < a_shape.size()-2; i++ ){ + a_batch_shape.push_back(a_shape[i]); + b_batch_shape.push_back(b_shape[i]); + } + for (size_t i = 0; i < a_batch_shape.size(); i++) { + //expand dims so that batch dimensions are the same + if (a_batch_shape[i] != b_batch_shape[i]) { + a_copy.expand(i,b_batch_shape[i]); + b_copy.expand(i,a_batch_shape[i]); + } + } + } + std::vector result_vector = matmul(a_copy.data(), b_copy.data(), a_copy.shape(), b_copy.shape()); + return result_vector; + +} + +template +requires Scalar +void MatMulFunction::backward(std::vector &prop_grad, CTensor *result) { + + //check if func already exists in the recursive chain + if (Function::global_chain.find(this) != Function::global_chain.end()) { + std::cout<<"cyle detected in Ctensor.backward(), ensure no incorrect reassignments to to Ctensors that were previously used in the computation graph\n"; + return; + } + + //insert this func into the chain for cycle detection + Function::global_chain.insert(this); + + + auto prop_grad_shape = result->shape(); + std::cout<<"matmul bwd prop shape : "<data().size(); i++) { + prop_grad.push_back(1); + } + } + + //ensure self dependend gradients arent added twice + if (result != this->a.get()) { + auto prop_grad_a = this->a->grad(); //needs to be deeply checked + if (this->a->requires_grad == true) { + if (this->a->_tensor_data->_grad.empty()){ + std::cout<<"a grqd empty "<a->grad().size()<<"\n"; + this->a->zero_grad(); + } + //create a copy of b and transpose it + auto b_copy = *(this->b); + b_copy.transpose(); + + prop_grad_a = matmul(prop_grad, b_copy.data(), prop_grad_shape, b_copy.shape()); + + //assign grad + for (size_t i = 0; i < prop_grad_a.size(); i++) { + this->a->_tensor_data->_grad[i] += prop_grad_a[i]; + } + } + this->a->backward(prop_grad_a); + } + + //ensure self dependend gradients arent added twice + if (result != this->b.get()) { + auto prop_grad_b = this->b->grad(); + if (this->b->requires_grad == true) { + if (this->b->_tensor_data->_grad.empty()){ + std::cout<<"b grad empty "<b->grad().size()<<"\n"; + this->b->zero_grad(); + } + //create a copy of b and transpose it + auto a_copy = *(this->a); + a_copy.transpose(); + std::cout<<"b bwd a_copy shape :"<b->_tensor_data->_grad[i] += prop_grad_b[i]; + } + } + this->b->backward(prop_grad_b); + } + + //remove this func from the chain if all its recursive processes finished + Function::global_chain.erase(this); +} + +}//namespace + +#endif \ No newline at end of file diff --git a/src/CTensorUtils.tpp b/src/CTensorUtils.tpp new file mode 100644 index 0000000..985de1a --- /dev/null +++ b/src/CTensorUtils.tpp @@ -0,0 +1,205 @@ +// Copyright (c) <2025>, +// +// This file is part of the PySplineNetLib project, which is licensed under the +// Mozilla Public License, Version 2.0 (MPL-2.0). +// +// SPDX-License-Identifier: MPL-2.0 +// For the full text of the licenses, see: +// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0 + + + + +#ifndef CTENSORUTILS_TPP +#define CTENSORUTILS_TPP + +#include "../include/SplineNetLib/CTensorUtils.hpp" + +namespace SplineNetLib { + +template +std::vector randomVector(size_t size, T min, T max) { + // Random number generator + std::random_device rd; + std::mt19937 gen(rd()); + + // Distribution depending on type T + typename std::conditional::value, + std::uniform_int_distribution, + std::uniform_real_distribution>::type dist(min, max); + + std::vector vec(size); + for (auto& v : vec) { + v = dist(gen); + } + return vec; +} + + +template +int get_depth(const T &scalar) { + return 0; +} + +template +int get_depth(const T &vec) { + int max_depth = 1; + for (const auto &element : vec) { + max_depth = std::max(max_depth, 1 + get_depth(element)); + } + return max_depth; +} + +template +std::string vectorToString(const std::vector& vec) { + std::ostringstream oss; + oss << "("; + for (size_t i = 0; i < vec.size(); ++i) { + oss << vec[i]; + if (i < vec.size() - 1) { + oss << ", "; + } + } + oss << ")"; + return oss.str(); +} + +template +std::vector get_shape(const T &scalar, std::vector Shape) { + return Shape; +} + +template +std::vector get_shape(const T &vec, std::vector Shape) { + Shape.push_back(vec.size()); + return get_shape(vec[0], Shape); +} + +template +void Flatten(const T &in_scalar, std::vector &result) { + result.push_back(in_scalar); +} + +template +void Flatten(const T &in_vector, std::vector &result) { + for (const auto &vec : in_vector) { + Flatten(vec, result); + } +} + +template +std::vector Flatten(const T& in_vector) { + std::vector result; + Flatten(in_vector, result); + return result; +} + +inline size_t stride(size_t idx, const std::vector &shape) { + size_t stride = 1; + for (size_t i = idx + 1; i < shape.size(); i++) { + stride *= shape[i]; + } + return stride; +} + +// Math functions +/* see readable version below +template +requires Scalar +std::vector matmul(const std::vector &A, const std::vector &B, const std::vector &A_shape, const std::vector &B_shape) { + size_t batch_size = 1; + if (B_shape.size() != A_shape.size()) { + throw std::invalid_argument("A_shape.size() and B_shape.size() must be equal"); + return std::vector(1, 0); + } + + if (A_shape.size() > 2) { + for (size_t i = 0; i < A_shape.size() - 2; i++) { + batch_size *= A_shape[i]; + } + } + + size_t M = A_shape[A_shape.size() - 2], K = A_shape[A_shape.size() - 1], N = B_shape[B_shape.size() - 1]; + std::vector result(batch_size * M * N); + + for (size_t batch_dim = 0; batch_dim < batch_size; batch_dim++) { + for (size_t row = 0; row < M; row++) { + for (size_t col = 0; col < N; col++) { + T sum = 0.0; + for (size_t shared = 0; shared < K; shared++) { + sum += A[batch_dim * M * K + row * K + shared] * B[batch_dim * K * N + shared * N + col]; + } + result[batch_dim * M * N + row * N + col] = sum; + } + } + } + return result; +}*/ + +template // Template function that accepts any scalar type 'T' (e.g., float, double) +requires Scalar // This constraint ensures that the type 'T' is a scalar (e.g., not a matrix, vector, etc.) +std::vector matmul(const std::vector &A, const std::vector &B, const std::vector &A_shape, const std::vector &B_shape) { + size_t batch_size = 1; // Variable to store the number of batches (default to 1) + //std::cout<<"debug : matmul : a shape = "<(1, 0); // This return statement is unreachable due to the exception, but just in case. + } + + + // If A has more than 2 dimensions (e.g., batching is involved), calculate the batch size + if (A_shape.size() > 2) { + for (size_t i = 0; i < A_shape.size() - 2; i++) { + batch_size *= A_shape[i]; // Multiply the sizes of the leading dimensions (batch dimensions) + } + } + + // Get the dimensions for matrix multiplication + size_t M = A_shape[A_shape.size() - 2]; // Rows of A + size_t K = A_shape[A_shape.size() - 1]; // Columns of A and rows of B + size_t N = B_shape[B_shape.size() - 1]; // Columns of B + + // Initialize the result vector with a size to hold all results (batch_size * M * N) + std::vector result(batch_size * M * N); + + // Perform matrix multiplication for each batch + for (size_t batch_dim = 0; batch_dim < batch_size; batch_dim++) { + for (size_t row = 0; row < M; row++) { // Iterate over each row of A + for (size_t col = 0; col < N; col++) { // Iterate over each column of B + T sum = 0.0; // Initialize the sum for the current element in the result matrix + for (size_t shared = 0; shared < K; shared++) { // Iterate over the shared dimension (columns of A, rows of B) + // Perform the dot product between the row of A and the column of B + sum += A[batch_dim * M * K + row * K + shared] * B[batch_dim * K * N + shared * N + col]; + } + // Store the computed value in the result vector at the appropriate position + result[batch_dim * M * N + row * N + col] = sum; + } + } + } + return result; // Return the final result of the matrix multiplication +} + +template +requires Scalar +std::vector permute_vec(const std::vector& A, const std::vector& A_shape, const std::vector& permutation_indices) { + std::vector B(A.size(), 0); + std::vector B_shape; + + for (const auto& idx : permutation_indices) { + B_shape.push_back(A_shape[idx]); + } + + for (size_t i = 0; i < A.size(); i++) { + size_t idx = 0; + for (size_t k = 0; k < A_shape.size(); k++) { + idx += ((i / stride(permutation_indices[k], A_shape)) % B_shape[k]) * stride(k, B_shape); + } + B[idx] = A[i]; + } + return B; +} + +}//namespace + +#endif \ No newline at end of file From 64e574cd13fe27e1a4728cceb1630ebe9fe45748 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Fri, 7 Feb 2025 12:47:32 +0100 Subject: [PATCH 02/19] changed cpp version from 17 to 20 to use 'concepts'(cmake now works Note that a lot of warnings are created when running cmake) --- CMakeLists.txt | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f9c63b..ee2e5cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,20 +4,9 @@ cmake_minimum_required(VERSION 3.10) project(SplineNetLib VERSION 1.0) # Set the C++ standard -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) -#include fetch content for catch2 -include(FetchContent) - -# fetch catch2 and make usable -FetchContent_Declare( - Catch2 - GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.4.0 -) -FetchContent_MakeAvailable(Catch2) - # Optionally enable warnings for all compilers if(MSVC) add_compile_options(/W4) @@ -48,6 +37,16 @@ target_include_directories(SplineNetLib PUBLIC ${PROJECT_SOURCE_DIR}/include) option(ENABLE_TESTS "allow catch2 install and tests to run" OFF) if(ENABLE_TESTS) + #include fetch content for catch2 + include(FetchContent) + + # fetch catch2 and make usable + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG v3.4.0 + ) + FetchContent_MakeAvailable(Catch2) #enable testing enable_testing() @@ -74,6 +73,12 @@ add_executable(SplineNetExample examples/example_network.cpp) # Link the example with the library target_link_libraries(SplineNetExample PRIVATE SplineNetLib) +# Add an example or test executable +add_executable(SplineNetExampleTensor examples/example_CTensor.cpp) + +# Link the example with the library +target_link_libraries(SplineNetExampleTensor PRIVATE SplineNetLib) + # Optional: Install the library and headers install(TARGETS SplineNetLib DESTINATION lib) install(DIRECTORY include/SplineNetLib DESTINATION include) From 8f43990b5d75b52134f39ae2413304a73dc5dbae Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Fri, 7 Feb 2025 13:24:04 +0100 Subject: [PATCH 03/19] added new CTensor constructor use nested vectors as init arg. call like : CTensor(arg) --- examples/example_CTensor.cpp | 10 ++++++++++ include/SplineNetLib/CTensor.hpp | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/examples/example_CTensor.cpp b/examples/example_CTensor.cpp index 89a5d33..aa87409 100644 --- a/examples/example_CTensor.cpp +++ b/examples/example_CTensor.cpp @@ -9,5 +9,15 @@ int main() { std::cout<<"created CTensor a with data : "<> data_b({{1,1,1},{2,2,2}}); + + auto b = CTensor(data_b); + + std::cout<<"created CTensor b with data : "< + CTensor(const U& data) { + _tensor_data = new DTensor(Flatten(data), get_shape(data)); + } + CTensor(const CTensor& other){ _tensor_data = other._tensor_data; _tensor_data->_ref_c++; From 3154e6452c33e5cb793c524551046fe5fff284a6 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Tue, 11 Feb 2025 12:15:15 +0100 Subject: [PATCH 04/19] created pybinds for Ctensor int and double also fixed some bugs and hid some debug outputs --- PySplineNetLib.egg-info/PKG-INFO | 5 + PySplineNetLib.egg-info/SOURCES.txt | 8 + PySplineNetLib.egg-info/dependency_links.txt | 1 + PySplineNetLib.egg-info/requires.txt | 1 + PySplineNetLib.egg-info/top_level.txt | 1 + include/SplineNetLib/CTensor.hpp | 14 +- setup.py | 1 + src/CTensorFunc.tpp | 28 ++-- src/SplineNetLib_py.cpp | 152 +++++++++++++------ 9 files changed, 143 insertions(+), 68 deletions(-) create mode 100644 PySplineNetLib.egg-info/PKG-INFO create mode 100644 PySplineNetLib.egg-info/SOURCES.txt create mode 100644 PySplineNetLib.egg-info/dependency_links.txt create mode 100644 PySplineNetLib.egg-info/requires.txt create mode 100644 PySplineNetLib.egg-info/top_level.txt diff --git a/PySplineNetLib.egg-info/PKG-INFO b/PySplineNetLib.egg-info/PKG-INFO new file mode 100644 index 0000000..ef7e153 --- /dev/null +++ b/PySplineNetLib.egg-info/PKG-INFO @@ -0,0 +1,5 @@ +Metadata-Version: 2.1 +Name: PySplineNetLib +Version: 0.1 +License-File: LICENSE +Requires-Dist: pybind11>=2.6.0 diff --git a/PySplineNetLib.egg-info/SOURCES.txt b/PySplineNetLib.egg-info/SOURCES.txt new file mode 100644 index 0000000..07ef532 --- /dev/null +++ b/PySplineNetLib.egg-info/SOURCES.txt @@ -0,0 +1,8 @@ +LICENSE +setup.py +PySplineNetLib.egg-info/PKG-INFO +PySplineNetLib.egg-info/SOURCES.txt +PySplineNetLib.egg-info/dependency_links.txt +PySplineNetLib.egg-info/requires.txt +PySplineNetLib.egg-info/top_level.txt +src/SplineNetLib_py.cpp \ No newline at end of file diff --git a/PySplineNetLib.egg-info/dependency_links.txt b/PySplineNetLib.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/PySplineNetLib.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/PySplineNetLib.egg-info/requires.txt b/PySplineNetLib.egg-info/requires.txt new file mode 100644 index 0000000..d89789f --- /dev/null +++ b/PySplineNetLib.egg-info/requires.txt @@ -0,0 +1 @@ +pybind11>=2.6.0 diff --git a/PySplineNetLib.egg-info/top_level.txt b/PySplineNetLib.egg-info/top_level.txt new file mode 100644 index 0000000..4457d5c --- /dev/null +++ b/PySplineNetLib.egg-info/top_level.txt @@ -0,0 +1 @@ +PySplineNetLib diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp index 9b60103..862b915 100644 --- a/include/SplineNetLib/CTensor.hpp +++ b/include/SplineNetLib/CTensor.hpp @@ -98,20 +98,22 @@ class CTensor { //-----shape-utils----- - void squeeze(const size_t &dim) ; + void squeeze(const size_t &dim) ;//squeezes / removes the input dim and changes the internal projection shape - void unsqueeze(const size_t &dim) ; + void unsqueeze(const size_t &dim) ; //adds a new dim at the input dim - void expand(const size_t &dim, const size_t &factor) ; + void expand(const size_t &dim, const size_t &factor) ; //expands the dimension by factor so that shape 3,2 expanded(1,3) + //becomes: (3,6) (will duplicate values at the dimension to match new projected shape) - void permute(const std::vector &permutation_indecies) ; + void permute(const std::vector &permutation_indecies) ; //will swap dimesnions at the permutation indecies + //shape (2,3,4) permute(2,0,1) becomes: (4,2,3) void transpose() ; //-----auto_grad----- - + //delete all grad fns of this void clear_history() ; - + //recursive delete of grad fns for all tensors in the graph with this as root void clear_graph() ; //maybe add overload o this so that f no arg was passed propagated grad is set to {}, than this function below could use all by ref void backward(std::vector prop_grad = {}) ; diff --git a/setup.py b/setup.py index af120ca..3ca8869 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ def build_python_extension(): libraries=["SplineNetLib"], # Link with your precompiled library library_dirs=[get_library_path()], # Directory containing the precompiled library language="c++", # Ensure it's compiled as C++ + extra_compile_args=["-std=c++20"], ) ], install_requires=[ diff --git a/src/CTensorFunc.tpp b/src/CTensorFunc.tpp index cf7c506..4640897 100644 --- a/src/CTensorFunc.tpp +++ b/src/CTensorFunc.tpp @@ -68,34 +68,34 @@ void AddFunction::backward(std::vector &prop_grad, CTensor *result) { //std::cout<<"debug add bwd grad add\n"; //ensure self dependend gradients arent added twice if (result != this->a.get()) { - std::cout<<"debug add bwd this->a gradient propagation initialized\n"; + //std::cout<<"debug add bwd this->a gradient propagation initialized\n"; //std::cout<<"debug add bwd this a grad size:"<a->grad().size()<<"prop_grad size: "<a->requires_grad == true) { if (this->a->grad().empty()){ - std::cout<<"a grqd empty "<a->grad().size()<<"\n"; + //std::cout<<"a grqd empty "<a->grad().size()<<"\n"; this->a->zero_grad(); } - std::cout<<"working on grad of a at "<a<<" "<a->grad())<<" "<a<<" "<a->grad())<<" "<a->_tensor_data->_grad[i] += prop_grad[i]; //std::cout<<"debug add bwd accumulation step\n"; } } - std::cout<<"debug add bwd this a grad accumulated\n"; + //std::cout<<"debug add bwd this a grad accumulated\n"; this->a->backward(prop_grad); - std::cout<<"debug add bwd this a recursion finished\n"; + //std::cout<<"debug add bwd this a recursion finished\n"; } //ensure self dependend gradients arent added twice if (result != this->b.get()) { - std::cout<<"debug add bwd this->b gradient propagation initialized\n"; + //std::cout<<"debug add bwd this->b gradient propagation initialized\n"; //std::cout<<"debug add bwd this b grad size:"<b->grad().size()<<"prop_grad size: "<b->requires_grad == true) { if (this->b->grad().empty()){ - std::cout<<"b grqd empty "<b->grad().size()<<"\n"; + //std::cout<<"b grqd empty "<b->grad().size()<<"\n"; this->b->zero_grad(); } - std::cout<<"working on grad of b at "<b<<" "<b->grad())<<" "<b<<" "<b->grad())<<" "<b->_tensor_data->_grad[i] += prop_grad[i]; @@ -156,7 +156,7 @@ void SubFunction::backward(std::vector &prop_grad, CTensor *result) { if (result != this->a.get()) { if (this->a->requires_grad == true) { if (this->a->grad().empty()){ - std::cout<<"a grqd empty "<a->grad().size()<<"\n"; + //std::cout<<"a grqd empty "<a->grad().size()<<"\n"; this->a->zero_grad(); } for (size_t i = 0; i < prop_grad.size(); i++) { @@ -171,7 +171,7 @@ void SubFunction::backward(std::vector &prop_grad, CTensor *result) { if (result != this->b.get()) { if (this->b->requires_grad == true) { if (this->b->grad().empty()){ - std::cout<<"b grqd empty "<b->grad().size()<<"\n"; + //std::cout<<"b grqd empty "<b->grad().size()<<"\n"; this->b->zero_grad(); } for (size_t i = 0; i < prop_grad.size(); i++) { @@ -238,7 +238,7 @@ void MatMulFunction::backward(std::vector &prop_grad, CTensor *result) auto prop_grad_shape = result->shape(); - std::cout<<"matmul bwd prop shape : "<::backward(std::vector &prop_grad, CTensor *result) auto prop_grad_a = this->a->grad(); //needs to be deeply checked if (this->a->requires_grad == true) { if (this->a->_tensor_data->_grad.empty()){ - std::cout<<"a grqd empty "<a->grad().size()<<"\n"; + //std::cout<<"a grqd empty "<a->grad().size()<<"\n"; this->a->zero_grad(); } //create a copy of b and transpose it @@ -274,13 +274,13 @@ void MatMulFunction::backward(std::vector &prop_grad, CTensor *result) auto prop_grad_b = this->b->grad(); if (this->b->requires_grad == true) { if (this->b->_tensor_data->_grad.empty()){ - std::cout<<"b grad empty "<b->grad().size()<<"\n"; + //std::cout<<"b grad empty "<b->grad().size()<<"\n"; this->b->zero_grad(); } //create a copy of b and transpose it auto a_copy = *(this->a); a_copy.transpose(); - std::cout<<"b bwd a_copy shape :"< #include #include // To handle STL types like std::string, std::vector +#include #include "SplineNetLib/SplineNet.hpp" // Header for the library + namespace py = pybind11; +// Function to handle nested Python lists and convert them to std::vector +template +void flatten_pylist(const py::handle &obj, std::vector &result) { + if (py::isinstance(obj)) { + for (const auto &item : obj.cast()) { + flatten_pylist(item, result); + } + } else { + result.push_back(obj.cast()); + } +} + +// Wrapper function to create a new vector +template +std::vector convert_pylist_to_vector(const py::list &py_list) { + std::vector result; + flatten_pylist(py_list, result); + return result; +} + +void get_shape_recursive(const py::list& py_list, std::vector& shape) { + // Base case: when the list is empty, do nothing + if (py_list.size() == 0) return; + + // Push the size of the current level + shape.push_back(py_list.size()); + + // Check if the first element is a list (nested) + if (py::isinstance(py_list[0])) { + // Recursively call get_shape_recursive for nested lists + get_shape_recursive(py::cast(py_list[0]), shape); + } +} + +std::vector get_shape(const py::list& py_list) { + std::vector shape; + // Use the recursive get_shape implementation for vectors + get_shape_recursive(py_list, shape); + return shape; +} + + PYBIND11_MODULE(PySplineNetLib, m) { py::class_(m, "spline") .def(py::init>&, const std::vector < std::vector < double>>& >()) // Bind constructor @@ -40,56 +83,69 @@ PYBIND11_MODULE(PySplineNetLib, m) { .def("backward",py::overload_cast,std::vector , bool>(&SplineNetLib::layer::backward),"[double] ([double] x,[double]d_y,bool normalize), takes input x, loss gradient d_y and bool apply_grad,returns propageted loss (applies grad to all splines if True)") .def("backward",py::overload_cast> &,std::vector> >(&SplineNetLib::layer::backward),"backward but for batches (will always apply gradients)") .def("get_splines",&SplineNetLib::layer::get_splines,"[[SplineNetLib::spline]] (None), returns all splines in the layer"); -} + //int tensor + py::class_>(m, "CTensor") -/*to be checked -#include -#include // To handle STL types like std::string, std::vector -#include "SplineNetLib/SplineNet.hpp" // Header for the library + .def(py::init&, const std::initializer_list&>()) + .def(py::init&, const std::vector&>()) + .def(py::init&>()) + .def(py::init([](const py::list &py_list) {//only for py module to turn nested lists and turn them to nested vector + auto nested_vector = convert_pylist_to_vector(py_list); + std::vector shape = get_shape(py_list); + return new SplineNetLib::CTensor(nested_vector,shape); + })) + .def("data",&SplineNetLib::CTensor::data,"std::vector, (None), returns the stored data vector as a copy") + .def("shape",&SplineNetLib::CTensor::shape,"std::vector, (None), returns the shape of the tensor like (dim0, dim1, ..., dimN)") + .def("grad",&SplineNetLib::CTensor::grad, "std::vector, (None), returns the grad as flat 1D projected vector (internally using tensor.shape)") + .def("zero_grad",&SplineNetLib::CTensor::zero_grad, "None, (None), sets the gradient of this tensor to 0" ) + .def("squeeze",&SplineNetLib::CTensor::squeeze, "None, (size_t dim), removes the dim and projects the data to the new shape") + .def("unsqueeze",&SplineNetLib::CTensor::unsqueeze, "None, (size_t dim), adds new dim at input dim index") + .def("expand",&SplineNetLib::CTensor::expand, "None, (size_t dim, size_t factor), expands the dimesnion at dim by factor -> shape: (2,2) expand(0,3) becomes: shape(6,2), (note this WILL affect the data)") + .def("permute",&SplineNetLib::CTensor::permute, "None, (std::vector), swaps dimesnions at input indecies -> shape(2,1,3) permute([2,0,1] becomes: shape(3,2,1))") + .def("transpose",&SplineNetLib::CTensor::transpose, "None, (None), transposes the tensor (swaps the innermost two dimesnions)") + .def("clear_history",&SplineNetLib::CTensor::clear_history, "None, (None), clears all grad fns from the tensor (gradient propagatuon WILL NOT work after this so use carefully)") + .def("clear_graph",&SplineNetLib::CTensor::clear_graph,"None, (None), clears full computational graph for all tensors conected to this one") + //.def("backward",&SplineNetLib::CTensor::backward, "None, (None), backwards pass through this and connected graph") + .def("backward", &SplineNetLib::CTensor::backward, + py::arg("prop_grad") = std::vector(), "Backward pass, takes an optional gradient vector (defaults to empty).") + .def("__mul__", [](SplineNetLib::CTensor& self, SplineNetLib::CTensor& other) {return self * other;}) + .def("__add__", [](SplineNetLib::CTensor& self, SplineNetLib::CTensor& other) {return self + other; }) + .def("__sub__", [](SplineNetLib::CTensor& self, SplineNetLib::CTensor& other) {return self - other; }) -namespace py = pybind11; + .def("__getitem__", [](SplineNetLib::CTensor& self, size_t idx)->SplineNetLib::CTensor { return self[idx]; }); + + py::class_>(m, "CTensor") -PYBIND11_MODULE(mylibrary, m) { - // Binding the spline class - py::class_(m, "spline") - .def(py::init>&, const std::vector>&>(), // Constructor - "Constructs the spline with control points and parameters") - .def("interpolation", &SplineNetLib::spline::interpolation, - "None -> Interpolates the spline based on its points") - .def("forward", &SplineNetLib::spline::forward, - "double (double x) -> Evaluates the spline at x (if x is in bounds)") - .def("backward", &SplineNetLib::spline::backward, - "double (double in, double d_y, double out) -> Uses previous input, loss gradient, and last output for gradient descent") - .def("apply_grad", &SplineNetLib::spline::apply_grad, - "None (double lr) -> Applies gradient from backward * learning rate (lr)") - .def("get_points", &SplineNetLib::spline::get_points, - "[[double]] -> Returns spline points like [[x0, y0], ..., [xn, yn]]") - .def("get_params", &SplineNetLib::spline::get_params, - "[[double]] -> Returns spline parameters/coefficients like [[a0, b0, c0, d0], ..., [an, bn, cn, dn]]"); - - // Binding the layer class - py::class_(m, "layer") - .def(py::init(), // Constructor with size and learning rate - "Constructs a layer with the specified number of splines and learning rate") - .def(py::init>>>, - std::vector>>>>(), // Constructor for nested vector input - "Constructs a layer with nested vector inputs for spline initialization") - .def("interpolate_splines", &SplineNetLib::layer::interpolate_splines, - "None -> Calls interpolation on all splines in the layer") - - // Overloaded 'forward' methods - .def("forward", py::overload_cast, bool>(&SplineNetLib::layer::forward), - "[double] (x, bool normalize) -> Forward call for single input sample, applies normalization if needed") - .def("forward", py::overload_cast>&, bool>(&SplineNetLib::layer::forward), - "[[double]] (x, bool normalize) -> Forward call for batch inputs, applies normalization if needed") + .def(py::init&, const std::initializer_list&>()) + .def(py::init&, const std::vector&>()) + .def(py::init&>()) + .def(py::init([](const py::list &py_list) {//only for py module to turn nested lists and turn them to nested vector + auto nested_vector = convert_pylist_to_vector(py_list); + std::vector shape = get_shape(py_list); + return new SplineNetLib::CTensor(nested_vector,shape); + })) + .def("data",&SplineNetLib::CTensor::data,"std::vector, (None), returns the stored data vector as a copy") + .def("shape",&SplineNetLib::CTensor::shape,"std::vector, (None), returns the shape of the tensor like (dim0, dim1, ..., dimN)") + .def("grad",&SplineNetLib::CTensor::grad, "std::vector, (None), returns the grad as flat 1D projected vector (internally using tensor.shape)") + .def("zero_grad",&SplineNetLib::CTensor::zero_grad, "None, (None), sets the gradient of this tensor to 0" ) + .def("squeeze",&SplineNetLib::CTensor::squeeze, "None, (size_t dim), removes the dim and projects the data to the new shape") + .def("unsqueeze",&SplineNetLib::CTensor::unsqueeze, "None, (size_t dim), adds new dim at input dim index") + .def("expand",&SplineNetLib::CTensor::expand, "None, (size_t dim, size_t factor), expands the dimesnion at dim by factor -> shape: (2,2) expand(0,3) becomes: shape(6,2), (note this WILL affect the data)") + .def("permute",&SplineNetLib::CTensor::permute, "None, (std::vector), swaps dimesnions at input indecies -> shape(2,1,3) permute([2,0,1] becomes: shape(3,2,1))") + .def("transpose",&SplineNetLib::CTensor::transpose, "None, (None), transposes the tensor (swaps the innermost two dimesnions)") + .def("clear_history",&SplineNetLib::CTensor::clear_history, "None, (None), clears all grad fns from the tensor (gradient propagatuon WILL NOT work after this so use carefully)") + .def("clear_graph",&SplineNetLib::CTensor::clear_graph,"None, (None), clears full computational graph for all tensors conected to this one") + //.def("backward",&SplineNetLib::CTensor::backward, "None, (None), backwards pass through this and connected graph") + .def("backward", &SplineNetLib::CTensor::backward, + py::arg("prop_grad") = std::vector(), "None, (None), backwards pass through this and connected graph") + .def("__mul__", [](SplineNetLib::CTensor& self, SplineNetLib::CTensor& other) {return self * other;}) + .def("__add__", [](SplineNetLib::CTensor& self, SplineNetLib::CTensor& other) {return self + other; }) + .def("__sub__", [](SplineNetLib::CTensor& self, SplineNetLib::CTensor& other) {return self - other; }) + + .def("__getitem__", [](SplineNetLib::CTensor& self, size_t idx)->SplineNetLib::CTensor { return self[idx]; }); - // Overloaded 'backward' methods - .def("backward", py::overload_cast, std::vector, bool>(&SplineNetLib::layer::backward), - "[double] (x, d_y, bool normalize) -> Backward propagation for single input sample, applies grad if normalize is True") - .def("backward", py::overload_cast>&, std::vector>>(&SplineNetLib::layer::backward), - "[[double]] (x, d_y) -> Backward propagation for batch inputs, always applies gradients") - .def("get_splines", &SplineNetLib::layer::get_splines, - "[[SplineNetLib::spline]] -> Returns all splines in the layer"); } -*/ \ No newline at end of file + + + From a9781b14a7863f736336ad013ac7218132e73948 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Wed, 12 Feb 2025 21:57:54 +0100 Subject: [PATCH 05/19] fixed error in the setup.py file (sys wasnt included) --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 3ca8869..f6f7032 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,7 @@ import os import subprocess import pybind11 +import sys def build_cpp_library(): #make sure cmake is installed From 01db9bcc985330c0bc8e7f650fedeee71f06140c Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Thu, 13 Feb 2025 11:35:08 +0100 Subject: [PATCH 06/19] testing .md changes --- docs/README.md | 20 ++++++++++++++------ src/SplineNetLib_py.cpp | 5 +++-- tests/unit_tests/py_spline_tests.py | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 tests/unit_tests/py_spline_tests.py diff --git a/docs/README.md b/docs/README.md index 3dbbb03..386b59e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -206,6 +206,12 @@ parameters : list = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0]] spline_instance = PySplineNetLib.spline(points,parameters) ``` +or alternatively do: + +```python +spline_instance = PySplineNetLib.spline([[0.0,0.0],[0.5,0.25],[1.0,1.0]],[[0.0]*4]*2) +``` + - spline interpolation: to properly init a spline call .interpolation() @@ -234,7 +240,9 @@ to find the splines gradient based on a give loss grad at spline point (x,y) cal d_y : float = spline_instance.backward(x, d_y, y) ``` x : float = point that was last evaluated -y : float = spline prediction at x + +y : float = actual target + d_y : float = gradient of loss with (x,target) with respect to spline (x,y) (=> loss.backward() or d_y of next layer) **Note :** @@ -252,7 +260,12 @@ lr : float = learning rate (controls how strong the gradient affects the splines ### layer documentation comming soon +layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every inputi there exist m splines. +For example, given a layer with input size 3 and output size 2 a 3 by 2 matrix of splines is created. Now inputi is given to the spline vectori so that all splinesj get evaluated. the results of splinei,j is added to outputj. + +mathematically the output is defined like this: + ## install for c++ @@ -307,11 +320,6 @@ g++ -std=c++17 -I/path_to_include -L/path_to_lib -lSplineNetLib main.cpp -o Your ```txt git clone https://github.com/K-T0BIAS/Spline-based-DeepLearning.git cd Spline-based-DeepLearning -mkdir -p build -cd build -cmake .. -make -cd .. pip install . ``` diff --git a/src/SplineNetLib_py.cpp b/src/SplineNetLib_py.cpp index a3c45f2..674e15a 100644 --- a/src/SplineNetLib_py.cpp +++ b/src/SplineNetLib_py.cpp @@ -74,8 +74,9 @@ PYBIND11_MODULE(PySplineNetLib, m) { .def("apply_grad",&SplineNetLib::spline::apply_grad,"None (double lr),apply grad from backward * lr") .def("get_points",&SplineNetLib::spline::get_points,"[[double]] (None),return spline points like [[x0,y0],...,[xn,yn]]") .def("get_params",&SplineNetLib::spline::get_params,"[[double]] (None),return spline parameters/coefficients like [[a0,b0,c0,d0],...,[an,bn,cn,dn]]"); + py::class_(m, "layer") - .def(py::init()) + .def(py::init())//in size, out size, detail (num of parameters -2), max (maximum input value that spline processes) .def(py::init>>>, std::vector>>> >()) .def("interpolate_splines",&SplineNetLib::layer::interpolate_splines,"None (None), calls interpolation on all splines in the layer") .def("forward",py::overload_cast, bool>(&SplineNetLib::layer::forward),"[double] ([double] x, bool normalize), forward call for single input sample") @@ -84,7 +85,7 @@ PYBIND11_MODULE(PySplineNetLib, m) { .def("backward",py::overload_cast> &,std::vector> >(&SplineNetLib::layer::backward),"backward but for batches (will always apply gradients)") .def("get_splines",&SplineNetLib::layer::get_splines,"[[SplineNetLib::spline]] (None), returns all splines in the layer"); //int tensor - py::class_>(m, "CTensor") + py::class_>(m, "IntCTensor") .def(py::init&, const std::initializer_list&>()) .def(py::init&, const std::vector&>()) diff --git a/tests/unit_tests/py_spline_tests.py b/tests/unit_tests/py_spline_tests.py new file mode 100644 index 0000000..0811244 --- /dev/null +++ b/tests/unit_tests/py_spline_tests.py @@ -0,0 +1,18 @@ +import PySplineNetLib +import unittest + +class Spline_Test(unittest.TestCase): + + def Spline_init_Test(self): + A = PySplineNetLib.spline([[0,0],[0.5,1],[1,2]],[[0,0,0,0],[0,0,0,0]]) + A.interpolation() + a : float = A.forward(0.25) + self.assertAlmostEqual(0.5, a, delta = 0.000001) + a_y : float = A.backward(0.25, 0, 1) + #returns A.forward(0.25)=0.5 - y = 0 + d_y = 0 -> 0.5 - 1 = -0.5 + self.assertAlmostEqual(-0.5, a_y, delta = 0.000001) + A.apply_grad(1) #applies the gradient with factor 1.0 (moves y_i at x_i > 0.25 by -1 *grad {same as sign(grad)}) + A.interpolation() #fimds new params for the new spline + self.assertAlmostEqual([0.0, 3.5, 0.0, -2.0],[1.5, 2.0, -3.0, 2.0], A.get_params()) + self.assertAlmostEqual([[0.0, 0.0], [0.5, 1.5], [1.0, 2.0]], A.get_points()) + \ No newline at end of file From a4e676349d52dedb746b8952c1b1a4ea7b6fa237 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Thu, 13 Feb 2025 11:38:14 +0100 Subject: [PATCH 07/19] testing .md changes --- docs/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/README.md b/docs/README.md index 386b59e..977cc6f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -265,7 +265,9 @@ For example, given a layer with input size 3 and output size 2 a 3 by 2 matrix o mathematically the output is defined like this: - +$$ +y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\} +$$ ## install for c++ From 61ba244716f2ff08172988c2c584a2484014a312 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Thu, 13 Feb 2025 12:20:49 +0100 Subject: [PATCH 08/19] layer documentation in the python part added --- docs/README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/docs/README.md b/docs/README.md index 977cc6f..1f9e623 100644 --- a/docs/README.md +++ b/docs/README.md @@ -124,7 +124,7 @@ vector> pred = layer_instance.forward(X, normalize); **assuming namespace std** ```cpp -vector loss_gradient = layer_instance(X,d_y); +vector loss_gradient = layer_instance.backward(X,d_y); ``` * vector X = input (either from previous layer or from dataset) @@ -133,7 +133,7 @@ vector loss_gradient = layer_instance(X,d_y); - batched backward pass: ```cpp -vector> loss_gradient = layer_instance(X, d_y); +vector> loss_gradient = layer_instance.backward(X, d_y); ``` * vector> X = batched input (either from previous layer or from dataset) @@ -258,17 +258,71 @@ spline_instance.apply_grad(lr) lr : float = learning rate (controls how strong the gradient affects the splines points) -### layer documentation comming soon +## layer -layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every inputi there exist m splines. -For example, given a layer with input size 3 and output size 2 a 3 by 2 matrix of splines is created. Now inputi is given to the spline vectori so that all splinesj get evaluated. the results of splinei,j is added to outputj. +layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every inputi there exist n splines. -mathematically the output is defined like this: +mathematically the output $y$ is defined like this: $$ y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\} $$ +for example given input size 3 and output size 2, output1 is the sum of splinesi,1 with i from 0 to 3 (input size) + +To create a new layer do: + +```python +layer_instance = PySplineNetLib.layer(input_size, output_size, detail, max) +``` + +where: + +input_size : int = the size of the input vector +output_size : int = the expected size of the output vector +detail : int = the number of controlpoints for ecah spline (NOTE that the spline has detail + 2 points so to get 10 points detail shouod be 8) +max : float = the maximum value that any spline in the layer can evaluate (recomended 1.0 combined with activations that map input and output to range(0,1)) + +alternatively you can create a spline with start values for points and parameters like this: + +```python +spline_instance = PySplineNetLib(points, parameters) +``` + +with: +points : list = nested list of points like : (input_size, output_size, detail +2, 2 = x,y) +parameters : list = nested list of points like : (input_size, output_size, detail +1, 4) + +to fully init the layer call: + +```python +layer_instance.interpolate_splines() +``` + +### forward pass + +```python +pred = layer_instance.forward(X) +``` + +where + +X : list = single input vector or batched input vector +pred : list = prediction vector (also with batch dimension if the input was batched) + +### backward pass + +```python +d_y = layer_instance.backward(X, d_y) +``` + +where: + +X is the last inputvthis layer recieved +d_y is the propagated gradient of the previous layer + +Note that backward will apply the gradient to all splines in the layer automatically + ## install for c++ ```txt From 7a664fa1bdcb29ac14c6b403ae2ac1b4cd001217 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Thu, 13 Feb 2025 18:17:35 +0100 Subject: [PATCH 09/19] fixed bug where the CTensor would change after matmul backward. added clone mem function in CTensor,DTensor and all Functin classes for this --- include/SplineNetLib/CTensor.hpp | 13 +++++++++- include/SplineNetLib/CTensorFunc.hpp | 8 ++++++ src/CTensor.tpp | 6 +++++ src/CTensorFunc.tpp | 26 ++++++++++++++++--- tests/unit_tests/py_spline_tests.py | 38 +++++++++++++++++++++++++++- 5 files changed, 85 insertions(+), 6 deletions(-) diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp index 862b915..272f40b 100644 --- a/include/SplineNetLib/CTensor.hpp +++ b/include/SplineNetLib/CTensor.hpp @@ -36,6 +36,13 @@ class DTensor{ DTensor(const std::initializer_list& data, const std::initializer_list& shape) : _data(data), _shape(shape), _ref_c(1) {} + DTensor(const DTensor& other) : _data(other._data), _shape(other._shape), _grad(other._grad), _ref_c(1) { + // Deep copy unique_ptrs to grad fns by calling clone() + for (const auto& fn : other._grad_fn) { + _grad_fn.push_back(fn ? fn->clone() : nullptr); + } + } + void add_ref(){ _ref_c++; } @@ -53,7 +60,7 @@ template class CTensor { private: - + CTensor(DTensor* _t_data) : _tensor_data(_t_data){} public: @@ -80,6 +87,8 @@ class CTensor { } + + ~CTensor(){ _tensor_data->rmf_ref(); } @@ -96,6 +105,8 @@ class CTensor { void zero_grad(); + CTensor clone(); + //-----shape-utils----- void squeeze(const size_t &dim) ;//squeezes / removes the input dim and changes the internal projection shape diff --git a/include/SplineNetLib/CTensorFunc.hpp b/include/SplineNetLib/CTensorFunc.hpp index b3fb503..8b686f7 100644 --- a/include/SplineNetLib/CTensorFunc.hpp +++ b/include/SplineNetLib/CTensorFunc.hpp @@ -38,6 +38,8 @@ class Function { virtual void backward(std::vector &prop_grad, CTensor *result) = 0; + virtual std::unique_ptr> clone() const = 0; + static std::unordered_set*> global_chain; void clear_graph_f(); @@ -59,6 +61,8 @@ class AddFunction : public Function { std::vector fwd() override ; void backward(std::vector &prop_grad, CTensor *result) override; + + virtual std::unique_ptr> clone() const override; }; //subtractor function class for CTensor::operator- @@ -74,6 +78,8 @@ class SubFunction : public Function { void backward(std::vector &prop_grad, CTensor *result) override; + virtual std::unique_ptr> clone() const override; + }; //matrix multiplication function class for CTensor::operator* @@ -88,6 +94,8 @@ class MatMulFunction : public Function { std::vector fwd() override; void backward(std::vector &prop_grad, CTensor *result) override; + + virtual std::unique_ptr> clone() const override; }; } //namepace diff --git a/src/CTensor.tpp b/src/CTensor.tpp index 1fe0ce2..834ea71 100644 --- a/src/CTensor.tpp +++ b/src/CTensor.tpp @@ -264,6 +264,12 @@ void CTensor::backward(std::vector prop_grad) { } //std::cout<<"debug Ct bwd fn all bwd finish\n"; } + +template +CTensor CTensor::clone() { + CTensor Cloned_CTensor(new DTensor(*_tensor_data)); + return Cloned_CTensor; +} /* untestee template CTensor zeros(std::vector shape) { diff --git a/src/CTensorFunc.tpp b/src/CTensorFunc.tpp index 4640897..ebb1b6a 100644 --- a/src/CTensorFunc.tpp +++ b/src/CTensorFunc.tpp @@ -112,6 +112,12 @@ void AddFunction::backward(std::vector &prop_grad, CTensor *result) { //std::cout<<"debug add bwd chain erase\n"; } +template +requires Scalar +std::unique_ptr> AddFunction::clone() const { + return std::make_unique>(*this); +} + template requires Scalar @@ -185,6 +191,12 @@ void SubFunction::backward(std::vector &prop_grad, CTensor *result) { } +template +requires Scalar +std::unique_ptr> SubFunction::clone() const { + return std::make_unique>(*this); +} + template requires Scalar std::vector MatMulFunction::fwd() { @@ -195,8 +207,8 @@ std::vector MatMulFunction::fwd() { size_t a_n_dims = a_shape.size(); size_t b_n_dims = b_shape.size(); - auto a_copy = *(this->a); - auto b_copy = *(this->b); + auto a_copy = this->a->clone(); + auto b_copy = this->b->clone(); if (a_n_dims != b_n_dims) { throw std::invalid_argument("operator (*) expects both opperants to have the same num of dimensions but got:"+std::to_string(a_n_dims)+"and "+std::to_string(b_n_dims)+",please ensure opperants dims match by using squeeze or unsqueeze beforehand\n"); @@ -256,7 +268,7 @@ void MatMulFunction::backward(std::vector &prop_grad, CTensor *result) this->a->zero_grad(); } //create a copy of b and transpose it - auto b_copy = *(this->b); + auto b_copy = this->b->clone(); b_copy.transpose(); prop_grad_a = matmul(prop_grad, b_copy.data(), prop_grad_shape, b_copy.shape()); @@ -278,7 +290,7 @@ void MatMulFunction::backward(std::vector &prop_grad, CTensor *result) this->b->zero_grad(); } //create a copy of b and transpose it - auto a_copy = *(this->a); + auto a_copy = this->a->clone(); a_copy.transpose(); //std::cout<<"b bwd a_copy shape :"<::backward(std::vector &prop_grad, CTensor *result) Function::global_chain.erase(this); } +template +requires Scalar +std::unique_ptr> MatMulFunction::clone() const { + return std::make_unique>(*this); +} + }//namespace #endif \ No newline at end of file diff --git a/tests/unit_tests/py_spline_tests.py b/tests/unit_tests/py_spline_tests.py index 0811244..b324de9 100644 --- a/tests/unit_tests/py_spline_tests.py +++ b/tests/unit_tests/py_spline_tests.py @@ -15,4 +15,40 @@ def Spline_init_Test(self): A.interpolation() #fimds new params for the new spline self.assertAlmostEqual([0.0, 3.5, 0.0, -2.0],[1.5, 2.0, -3.0, 2.0], A.get_params()) self.assertAlmostEqual([[0.0, 0.0], [0.5, 1.5], [1.0, 2.0]], A.get_points()) - \ No newline at end of file + +class CTensor_Test(unittest.TestCase): + + def CTensor_init_Test(self): + a = PySplineNetLib.CTensor([[1,2,3],[4,5,6]]) + self.assertAlmostEqual([1,2,3,4,5,6], a.data()) + self.assertAlmostEqual([2,3], a.data()) + b = PySplineNetLib.CTensor([6,5,4,3,2,1],[3,2]) + self.assertAlmostEqual([6,5,4,3,2,1], b.data()) + self.assertAlmostEqual([3,2], b.shape()) + c = PySplineNetLib.CTensor(a) + self.assertAlmostEqual([1,2,3,4,5,6], c.data()) + self.assertAlmostEqual([2,3], c.shape()) + + def CTensor_math_Test(self): + a = PySplineNetLib.CTensor([[1,2,3],[4,5,6]]) + b = PySplineNetLib.CTensor([[6,5,4],[3,2,1]]) + + c = a + b; + self.assertAlmostEqual([7,7,7,7,7,7], c.data()) + self.assertAlmostEqual([2,3], c.shape()) + + b.transpose() + d = a * b; + self.assertAlmostEqual([28.0, 10.0, 73.0, 28.0], d.data()) + self.assertAlmostEqual([2,2], d.shape()) + + b.transpose() + e = a - b; + self.assertAlmostEqual([-5.0, -3.0, -1.0, 1.0, 3.0, 5.0], e.data()) + self.assertAlmostEqual([2,2], e.shape()) + + def Ctensor_grad_Test(self): + a = PySplineNetLib.CTensor([[2,2,2],[2,2,2]]) + b = PySplineNetLib.CTensor([[1,2],[3,4],[5,6]]) + c = PySplineNetLib.CTensor([[0.5,0.5],[0.5,0.5]]) + d = a * b + c \ No newline at end of file From e3f50b4b7f0d9947db10e4158da1bfd65fa9bdc3 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Sat, 15 Feb 2025 13:53:58 +0100 Subject: [PATCH 10/19] added minimal toml file for pip install --- PySplineNetLib.egg-info/PKG-INFO | 3 ++- PySplineNetLib.egg-info/SOURCES.txt | 1 + pyproject.toml | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 pyproject.toml diff --git a/PySplineNetLib.egg-info/PKG-INFO b/PySplineNetLib.egg-info/PKG-INFO index ef7e153..14c402b 100644 --- a/PySplineNetLib.egg-info/PKG-INFO +++ b/PySplineNetLib.egg-info/PKG-INFO @@ -1,5 +1,6 @@ -Metadata-Version: 2.1 +Metadata-Version: 2.2 Name: PySplineNetLib Version: 0.1 License-File: LICENSE Requires-Dist: pybind11>=2.6.0 +Dynamic: requires-dist diff --git a/PySplineNetLib.egg-info/SOURCES.txt b/PySplineNetLib.egg-info/SOURCES.txt index 07ef532..ddcf9e8 100644 --- a/PySplineNetLib.egg-info/SOURCES.txt +++ b/PySplineNetLib.egg-info/SOURCES.txt @@ -1,4 +1,5 @@ LICENSE +pyproject.toml setup.py PySplineNetLib.egg-info/PKG-INFO PySplineNetLib.egg-info/SOURCES.txt diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9c5a55d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=42", "wheel", "pybind11"] +build-backend = "setuptools.build_meta" \ No newline at end of file From 684249db030a82b732b31026aae58285db0ad4f3 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Sat, 15 Feb 2025 14:13:36 +0100 Subject: [PATCH 11/19] minor adjustments to gitignore --- .gitignore | 1 + PySplineNetLib.egg-info/PKG-INFO | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index de7e739..fa6e840 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ CTestTestfile.cmake *.user *.swp *.tmp +*.egg-info # IDE specific files (if you're using any IDE) # Uncomment for Visual Studio Code diff --git a/PySplineNetLib.egg-info/PKG-INFO b/PySplineNetLib.egg-info/PKG-INFO index 14c402b..ef7e153 100644 --- a/PySplineNetLib.egg-info/PKG-INFO +++ b/PySplineNetLib.egg-info/PKG-INFO @@ -1,6 +1,5 @@ -Metadata-Version: 2.2 +Metadata-Version: 2.1 Name: PySplineNetLib Version: 0.1 License-File: LICENSE Requires-Dist: pybind11>=2.6.0 -Dynamic: requires-dist From e90cba69c1139cad6cb8b45a093cdcee0d4c2f85 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Sat, 15 Feb 2025 15:00:51 +0100 Subject: [PATCH 12/19] more tests for the py version --- tests/unit_tests/py_spline_tests.py | 47 +++++++++++++++++------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/tests/unit_tests/py_spline_tests.py b/tests/unit_tests/py_spline_tests.py index b324de9..508454c 100644 --- a/tests/unit_tests/py_spline_tests.py +++ b/tests/unit_tests/py_spline_tests.py @@ -3,7 +3,7 @@ class Spline_Test(unittest.TestCase): - def Spline_init_Test(self): + def test_Spline_init_Test(self): A = PySplineNetLib.spline([[0,0],[0.5,1],[1,2]],[[0,0,0,0],[0,0,0,0]]) A.interpolation() a : float = A.forward(0.25) @@ -13,42 +13,51 @@ def Spline_init_Test(self): self.assertAlmostEqual(-0.5, a_y, delta = 0.000001) A.apply_grad(1) #applies the gradient with factor 1.0 (moves y_i at x_i > 0.25 by -1 *grad {same as sign(grad)}) A.interpolation() #fimds new params for the new spline - self.assertAlmostEqual([0.0, 3.5, 0.0, -2.0],[1.5, 2.0, -3.0, 2.0], A.get_params()) - self.assertAlmostEqual([[0.0, 0.0], [0.5, 1.5], [1.0, 2.0]], A.get_points()) + self.assertListEqual([[0.0, 0.5, 0.0, 2.0], [0.5, 2.0, 3.0, -2.0]], A.get_params()) + self.assertListEqual([[0.0, 0.0], [0.5, 0.5], [1.0, 2.0]], A.get_points()) class CTensor_Test(unittest.TestCase): - def CTensor_init_Test(self): + def test_CTensor_init_Test(self): a = PySplineNetLib.CTensor([[1,2,3],[4,5,6]]) - self.assertAlmostEqual([1,2,3,4,5,6], a.data()) - self.assertAlmostEqual([2,3], a.data()) + self.assertListEqual([1,2,3,4,5,6], a.data()) + self.assertListEqual([2,3], a.shape()) b = PySplineNetLib.CTensor([6,5,4,3,2,1],[3,2]) - self.assertAlmostEqual([6,5,4,3,2,1], b.data()) - self.assertAlmostEqual([3,2], b.shape()) + self.assertListEqual([6,5,4,3,2,1], b.data()) + self.assertListEqual([3,2], b.shape()) c = PySplineNetLib.CTensor(a) - self.assertAlmostEqual([1,2,3,4,5,6], c.data()) - self.assertAlmostEqual([2,3], c.shape()) + self.assertListEqual([1,2,3,4,5,6], c.data()) + self.assertListEqual([2,3], c.shape()) - def CTensor_math_Test(self): + def test_CTensor_math_Test(self): a = PySplineNetLib.CTensor([[1,2,3],[4,5,6]]) b = PySplineNetLib.CTensor([[6,5,4],[3,2,1]]) c = a + b; - self.assertAlmostEqual([7,7,7,7,7,7], c.data()) - self.assertAlmostEqual([2,3], c.shape()) + self.assertListEqual([7,7,7,7,7,7], c.data()) + self.assertListEqual([2,3], c.shape()) b.transpose() d = a * b; - self.assertAlmostEqual([28.0, 10.0, 73.0, 28.0], d.data()) - self.assertAlmostEqual([2,2], d.shape()) + self.assertListEqual([28.0, 10.0, 73.0, 28.0], d.data()) + self.assertListEqual([2,2], d.shape()) b.transpose() e = a - b; - self.assertAlmostEqual([-5.0, -3.0, -1.0, 1.0, 3.0, 5.0], e.data()) - self.assertAlmostEqual([2,2], e.shape()) + self.assertListEqual([-5.0, -3.0, -1.0, 1.0, 3.0, 5.0], e.data()) + self.assertListEqual([2,3], e.shape()) - def Ctensor_grad_Test(self): + def test_Ctensor_grad_Test(self): a = PySplineNetLib.CTensor([[2,2,2],[2,2,2]]) b = PySplineNetLib.CTensor([[1,2],[3,4],[5,6]]) c = PySplineNetLib.CTensor([[0.5,0.5],[0.5,0.5]]) - d = a * b + c \ No newline at end of file + d = a * b + c + self.assertListEqual([18.5, 24.5, 18.5, 24.5],d.data()) + self.assertListEqual([2,2],d.shape()) + d.backward() + self.assertListEqual([3.0, 7.0, 11.0, 3.0, 7.0, 11.0], a.grad()) + self.assertListEqual([4.0, 4.0, 4.0, 4.0, 4.0, 4.0], b.grad()) + self.assertListEqual([1.0, 1.0, 1.0, 1.0], c.grad()) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From b2db021f76e050245d6f0b93eda842667311527e Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Sat, 15 Feb 2025 21:36:29 +0100 Subject: [PATCH 13/19] minor changes --- PySplineNetLib.egg-info/PKG-INFO | 5 ----- PySplineNetLib.egg-info/SOURCES.txt | 9 --------- PySplineNetLib.egg-info/dependency_links.txt | 1 - PySplineNetLib.egg-info/requires.txt | 1 - PySplineNetLib.egg-info/top_level.txt | 1 - 5 files changed, 17 deletions(-) delete mode 100644 PySplineNetLib.egg-info/PKG-INFO delete mode 100644 PySplineNetLib.egg-info/SOURCES.txt delete mode 100644 PySplineNetLib.egg-info/dependency_links.txt delete mode 100644 PySplineNetLib.egg-info/requires.txt delete mode 100644 PySplineNetLib.egg-info/top_level.txt diff --git a/PySplineNetLib.egg-info/PKG-INFO b/PySplineNetLib.egg-info/PKG-INFO deleted file mode 100644 index ef7e153..0000000 --- a/PySplineNetLib.egg-info/PKG-INFO +++ /dev/null @@ -1,5 +0,0 @@ -Metadata-Version: 2.1 -Name: PySplineNetLib -Version: 0.1 -License-File: LICENSE -Requires-Dist: pybind11>=2.6.0 diff --git a/PySplineNetLib.egg-info/SOURCES.txt b/PySplineNetLib.egg-info/SOURCES.txt deleted file mode 100644 index ddcf9e8..0000000 --- a/PySplineNetLib.egg-info/SOURCES.txt +++ /dev/null @@ -1,9 +0,0 @@ -LICENSE -pyproject.toml -setup.py -PySplineNetLib.egg-info/PKG-INFO -PySplineNetLib.egg-info/SOURCES.txt -PySplineNetLib.egg-info/dependency_links.txt -PySplineNetLib.egg-info/requires.txt -PySplineNetLib.egg-info/top_level.txt -src/SplineNetLib_py.cpp \ No newline at end of file diff --git a/PySplineNetLib.egg-info/dependency_links.txt b/PySplineNetLib.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/PySplineNetLib.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/PySplineNetLib.egg-info/requires.txt b/PySplineNetLib.egg-info/requires.txt deleted file mode 100644 index d89789f..0000000 --- a/PySplineNetLib.egg-info/requires.txt +++ /dev/null @@ -1 +0,0 @@ -pybind11>=2.6.0 diff --git a/PySplineNetLib.egg-info/top_level.txt b/PySplineNetLib.egg-info/top_level.txt deleted file mode 100644 index 4457d5c..0000000 --- a/PySplineNetLib.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -PySplineNetLib From 188090688826b396ee385159d357ec24cad5dc8d Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Sat, 15 Feb 2025 21:50:29 +0100 Subject: [PATCH 14/19] reconstructing the documentation into multiple specific .md files --- docs/README.md => README.md | 152 +----------------------------------- docs/cpp_splines.md | 147 ++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 151 deletions(-) rename docs/README.md => README.md (55%) create mode 100644 docs/cpp_splines.md diff --git a/docs/README.md b/README.md similarity index 55% rename from docs/README.md rename to README.md index 1f9e623..52f9e62 100644 --- a/docs/README.md +++ b/README.md @@ -4,11 +4,7 @@ [goals](#goals) -[C++ documentation](#C-Implementationdocumentation) - -1. [splines](#splines) -2. [layers](#layers) -3. [network](#Network) +[C++ spline documentation](docs/cpp_spline.md) [python documentation](#python-Implementationdocumentation) @@ -33,152 +29,6 @@ see [install for python](#install-for-python) to install 2. achieve similar or better precision to common deep learning approaches whilst keeping the structure as light-wheight and fast as possible. 3. allow easy adaptability to existing architectures like convolutional and recurrent networks. -## C++ Implementation/documentation - -### Splines -The splines are the main computation unit of a layer. They allow for an easily adjustable and visualizable alternative to wheight matricies. -To create a spline call: -```cpp -SplineNetLib::spline spline_instance = spline(points,parameters); -``` -where points and parameters are vectors of shapes: - -$$ -( \text{output size}, \text{input size}, 2) -$$ - -and - -$$ -( \text{output size},\text{input size}, 4) -$$ - -**Note** that the x values of the points list must be sorted from smallest to biggest. - -to fully initialize the spline call: -```cpp -Spline_instance.interpolate(); -``` -this, although not always nessecery will adjust the parameters with respect to the points. - -To evaluate the spline at point x do: -```cpp -double y = Spline_instance.forward(x) -``` -**Note** that x must be between 0 and the largest x value in the splines points list. Trying to access x values outside the spline will result in an error. - -To perform a backward pass call: -```cpp -double loss_grad = spline.backward(x,d_y,lr) -``` -* double x = input -* double d_y = loss Gradient of the next layer -* double lr = learning rate - -### layers -A layer uses splines as substitution for wheight and bias matricies. -Layers are implemented similar to torch.nn.linear(); -To create a new layer call: -```cpp -SplineNetLib::layer layer_instance = layer(in_size,out_size,detail,max); -``` -* unsigned int in_size = num of elements in the input vector -* unsigned int out_size = num of elements in the target vector (like neurons in linear) -* unsigned int detail = num of controlpoints (exept for default points at 0,0 and max,0) -* double max = Maximum x value (recomended to be 1.0) - -To load a layer from previously found points call: -```cpp -SplineNetLib::layer layer_instance = layer(points,parameters); -``` -**assuming namespace std** -* vector>>> points ({{{{x,y},...},...},...}) = Matrix like (input size • output size • detail + 2 • 2) -* vector>>> parameters ({{{{0,0,0,0},...},...},...} = Matrix like (input size • output size • detail + 1 • 4) - -To fully init a layer call: -```cpp -layer_instance.interpolate_splines(); -``` -**Single layer training:** - -- single sample forward pass: - -**assuming namespace std** -```cpp -vector pred = layer_instance.forward(X, normalize); -``` -* vector X = input vector (with size == layer input size) -* bool normalize = output normalization (if True output will be between 0 and 1) -* pred.size() == layer output size - -- batched forward pass: -```cpp -vector> pred = layer_instance.forward(X, normalize); -``` -* vector> X = batched input (with size == batch size , layer input size) -* bool normalize = output normalization (if True output will be between 0 and 1) -* pred.size() = batch size -* pred[0].size() = layer output size - -- single sample backward pass: - -**assuming namespace std** -```cpp -vector loss_gradient = layer_instance.backward(X,d_y); -``` - -* vector X = input (either from previous layer or from dataset) -* vector d_y = loss_gradient (from next layer or loss function) -* loss_gradient == d_y for the previous layers backward pass - -- batched backward pass: -```cpp -vector> loss_gradient = layer_instance.backward(X, d_y); -``` - -* vector> X = batched input (either from previous layer or from dataset) -* vector> d_y = batched loss_gradient (from next layer or from loss function) -* loss_gradient == d_y for the previous layer backward pass (propagated gradient) - -**layer size:** - -$$ -\text{layer parameters} = \text{input size} × \text{output size} × (\text{detail} + 2) × 2 + \text{input size} * \text{output size} × (\text{detail} + 1) × 4 -$$ - -### Network - -To create a spline network call -```cpp -SplineNetLib::nn network_instance = nn(num_layers,input_sizes,output_sizes,details,max_values) -``` -**assuming namespace std** -* int num_layers = number of layers the network is supposed to have -* vector input_sizes = input_sizes for the layer at each index (e.g. {2,3} layer 0 takes 2 inputs) -* vector output_sizes = output_sizes for each layer -* vector details = detail for each layer -* vector max_values = max value for each layer (best to set all layers except last to 1.0 and use activation functions to normalize the output between 0 and 1) - -**Training** - -- forward pass: - - ```cpp - std::vector pred = network_instance.forward(X, normalize) - ``` - * vector X = input - * bool normalize = normalize outputs (not recommended better use activation functions and itterate manually over the layers) - -- backwards pass - -```cpp -std::vector loss_gradient = network_instance.backward(X,d_y) -``` -* std::vector X = forward prediction -* std::vector d_y = loss_gradient - -(when using the manual approach meaning iterating manually over layers to apply activations you have to do the backward pass manually aswell.) - ## python Implementation/documentation ### import diff --git a/docs/cpp_splines.md b/docs/cpp_splines.md new file mode 100644 index 0000000..b193049 --- /dev/null +++ b/docs/cpp_splines.md @@ -0,0 +1,147 @@ +### splines + +The splines are the main computation unit of a layer. They allow for an easily adjustable and visualizable alternative to wheight matricies. +To create a spline call: +```cpp +SplineNetLib::spline spline_instance = spline(points,parameters); +``` +where points and parameters are vectors of shapes: + +$$ +( \text{output size}, \text{input size}, 2) +$$ + +and + +$$ +( \text{output size},\text{input size}, 4) +$$ + +**Note** that the x values of the points list must be sorted from smallest to biggest. + +to fully initialize the spline call: +```cpp +Spline_instance.interpolate(); +``` +this, although not always nessecery will adjust the parameters with respect to the points. + +To evaluate the spline at point x do: +```cpp +double y = Spline_instance.forward(x) +``` +**Note** that x must be between 0 and the largest x value in the splines points list. Trying to access x values outside the spline will result in an error. + +To perform a backward pass call: +```cpp +double loss_grad = spline.backward(x,d_y,lr) +``` +* double x = input +* double d_y = loss Gradient of the next layer +* double lr = learning rate + +### layers + +A layer uses splines as substitution for wheight and bias matricies. +Layers are implemented similar to torch.nn.linear(); +To create a new layer call: +```cpp +SplineNetLib::layer layer_instance = layer(in_size,out_size,detail,max); +``` +* unsigned int in_size = num of elements in the input vector +* unsigned int out_size = num of elements in the target vector (like neurons in linear) +* unsigned int detail = num of controlpoints (exept for default points at 0,0 and max,0) +* double max = Maximum x value (recomended to be 1.0) + +To load a layer from previously found points call: +```cpp +SplineNetLib::layer layer_instance = layer(points,parameters); +``` +**assuming namespace std** +* vector>>> points ({{{{x,y},...},...},...}) = Matrix like (input size • output size • detail + 2 • 2) +* vector>>> parameters ({{{{0,0,0,0},...},...},...} = Matrix like (input size • output size • detail + 1 • 4) + +To fully init a layer call: +```cpp +layer_instance.interpolate_splines(); +``` +**Single layer training:** + +- single sample forward pass: + +**assuming namespace std** +```cpp +vector pred = layer_instance.forward(X, normalize); +``` +* vector X = input vector (with size == layer input size) +* bool normalize = output normalization (if True output will be between 0 and 1) +* pred.size() == layer output size + +- batched forward pass: +```cpp +vector> pred = layer_instance.forward(X, normalize); +``` +* vector> X = batched input (with size == batch size , layer input size) +* bool normalize = output normalization (if True output will be between 0 and 1) +* pred.size() = batch size +* pred[0].size() = layer output size + +- single sample backward pass: + +**assuming namespace std** +```cpp +vector loss_gradient = layer_instance.backward(X,d_y); +``` + +* vector X = input (either from previous layer or from dataset) +* vector d_y = loss_gradient (from next layer or loss function) +* loss_gradient == d_y for the previous layers backward pass + +- batched backward pass: +```cpp +vector> loss_gradient = layer_instance.backward(X, d_y); +``` + +* vector> X = batched input (either from previous layer or from dataset) +* vector> d_y = batched loss_gradient (from next layer or from loss function) +* loss_gradient == d_y for the previous layer backward pass (propagated gradient) + +**layer size:** + +$$ +\text{layer parameters} = \text{input size} × \text{output size} × (\text{detail} + 2) × 2 + \text{input size} * \text{output size} × (\text{detail} + 1) × 4 +$$ + +### Network + +To create a spline network call +```cpp +SplineNetLib::nn network_instance = nn(num_layers,input_sizes,output_sizes,details,max_values) +``` +**assuming namespace std** +* int num_layers = number of layers the network is supposed to have +* vector input_sizes = input_sizes for the layer at each index (e.g. {2,3} layer 0 takes 2 inputs) +* vector output_sizes = output_sizes for each layer +* vector details = detail for each layer +* vector max_values = max value for each layer (best to set all layers except last to 1.0 and use activation functions to normalize the output between 0 and 1) + +**Training** + +- forward pass: + + ```cpp + std::vector pred = network_instance.forward(X, normalize) + ``` + * vector X = input + * bool normalize = normalize outputs (not recommended better use activation functions and itterate manually over the layers) + +- backwards pass + +```cpp +std::vector loss_gradient = network_instance.backward(X,d_y) +``` +* std::vector X = forward prediction +* std::vector d_y = loss_gradient + +(when using the manual approach meaning iterating manually over layers to apply activations you have to do the backward pass manually aswell.) + +[<- back to Documentation](../README.md) \ No newline at end of file From f8bee993c48855ecd0efd2f694f7958051937a07 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Sat, 15 Feb 2025 21:55:59 +0100 Subject: [PATCH 15/19] reconstructing the documentation into multiple specific .md files --- README.md | 150 +-------------------------------------------- docs/py_splines.md | 146 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 148 deletions(-) create mode 100644 docs/py_splines.md diff --git a/README.md b/README.md index 52f9e62..2464417 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,10 @@ [goals](#goals) -[C++ spline documentation](docs/cpp_spline.md) +[C++ spline documentation](docs/cpp_splines.md) -[python documentation](#python-Implementationdocumentation) +[python spline documentation](docs/py_splines.md) -1. [splines](#splines-2) -2. [layers](#layer-documentation-comming-soon) ## New: @@ -29,150 +27,6 @@ see [install for python](#install-for-python) to install 2. achieve similar or better precision to common deep learning approaches whilst keeping the structure as light-wheight and fast as possible. 3. allow easy adaptability to existing architectures like convolutional and recurrent networks. -## python Implementation/documentation - -### import - -```python -import PySplineNetLib as some_name -``` - -### Splines -Splines are the main computation unit for this approach, they esentially provide a easily visualizable alterp to wheight matricies - -- spline creation: -```python -spline_instance = PySplineNetLib.spline(points,parameters) -``` -* points : list = list of points like (num points, 2) -* parameters : list = list of parameters like (num points - 1, 4) - -**full example** - -```python -points : list = [[0.0,0.0],[0.5,0.25],[1.0,1.0]] -parameters : list = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0]] - -spline_instance = PySplineNetLib.spline(points,parameters) -``` - -or alternatively do: - -```python -spline_instance = PySplineNetLib.spline([[0.0,0.0],[0.5,0.25],[1.0,1.0]],[[0.0]*4]*2) -``` - -- spline interpolation: - -to properly init a spline call .interpolation() - -```python -spline_instance.interpolation() -``` - -this ensures that the parameters are properly set for evaluation and training - -- spline forward pass / evaluation: - -to evaluate the spline at x call - -```python -y : float = spline_instance.forward(x) -``` - -x : float = point to be evaluated - -- spline backward / gradient propagation: - -to find the splines gradient based on a give loss grad at spline point (x,y) call - -```python -d_y : float = spline_instance.backward(x, d_y, y) -``` -x : float = point that was last evaluated - -y : float = actual target - -d_y : float = gradient of loss with (x,target) with respect to spline (x,y) (=> loss.backward() or d_y of next layer) - -**Note :** - -The gradient of this function call is internally stored in the spline. - -- adjust spline based on gradient - -to apply the gradient from .backward and adjust the spline call: -```python -spline_instance.apply_grad(lr) -``` - -lr : float = learning rate (controls how strong the gradient affects the splines points) - -## layer - -layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every inputi there exist n splines. - -mathematically the output $y$ is defined like this: - -$$ -y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\} -$$ - -for example given input size 3 and output size 2, output1 is the sum of splinesi,1 with i from 0 to 3 (input size) - -To create a new layer do: - -```python -layer_instance = PySplineNetLib.layer(input_size, output_size, detail, max) -``` - -where: - -input_size : int = the size of the input vector -output_size : int = the expected size of the output vector -detail : int = the number of controlpoints for ecah spline (NOTE that the spline has detail + 2 points so to get 10 points detail shouod be 8) -max : float = the maximum value that any spline in the layer can evaluate (recomended 1.0 combined with activations that map input and output to range(0,1)) - -alternatively you can create a spline with start values for points and parameters like this: - -```python -spline_instance = PySplineNetLib(points, parameters) -``` - -with: -points : list = nested list of points like : (input_size, output_size, detail +2, 2 = x,y) -parameters : list = nested list of points like : (input_size, output_size, detail +1, 4) - -to fully init the layer call: - -```python -layer_instance.interpolate_splines() -``` - -### forward pass - -```python -pred = layer_instance.forward(X) -``` - -where - -X : list = single input vector or batched input vector -pred : list = prediction vector (also with batch dimension if the input was batched) - -### backward pass - -```python -d_y = layer_instance.backward(X, d_y) -``` - -where: - -X is the last inputvthis layer recieved -d_y is the propagated gradient of the previous layer - -Note that backward will apply the gradient to all splines in the layer automatically - ## install for c++ ```txt diff --git a/docs/py_splines.md b/docs/py_splines.md new file mode 100644 index 0000000..9805eb1 --- /dev/null +++ b/docs/py_splines.md @@ -0,0 +1,146 @@ + +## python Implementation/documentation + +### import + +```python +import PySplineNetLib as some_name +``` + +### Splines +Splines are the main computation unit for this approach, they esentially provide a easily visualizable alterp to wheight matricies + +- spline creation: +```python +spline_instance = PySplineNetLib.spline(points,parameters) +``` +* points : list = list of points like (num points, 2) +* parameters : list = list of parameters like (num points - 1, 4) + +**full example** + +```python +points : list = [[0.0,0.0],[0.5,0.25],[1.0,1.0]] +parameters : list = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0]] + +spline_instance = PySplineNetLib.spline(points,parameters) +``` + +or alternatively do: + +```python +spline_instance = PySplineNetLib.spline([[0.0,0.0],[0.5,0.25],[1.0,1.0]],[[0.0]*4]*2) +``` + +- spline interpolation: + +to properly init a spline call .interpolation() + +```python +spline_instance.interpolation() +``` + +this ensures that the parameters are properly set for evaluation and training + +- spline forward pass / evaluation: + +to evaluate the spline at x call + +```python +y : float = spline_instance.forward(x) +``` + +x : float = point to be evaluated + +- spline backward / gradient propagation: + +to find the splines gradient based on a give loss grad at spline point (x,y) call + +```python +d_y : float = spline_instance.backward(x, d_y, y) +``` +x : float = point that was last evaluated + +y : float = actual target + +d_y : float = gradient of loss with (x,target) with respect to spline (x,y) (=> loss.backward() or d_y of next layer) + +**Note :** + +The gradient of this function call is internally stored in the spline. + +- adjust spline based on gradient + +to apply the gradient from .backward and adjust the spline call: +```python +spline_instance.apply_grad(lr) +``` + +lr : float = learning rate (controls how strong the gradient affects the splines points) + +## layer + +layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every inputi there exist n splines. + +mathematically the output $y$ is defined like this: + +$$ +y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\} +$$ + +for example given input size 3 and output size 2, output1 is the sum of splinesi,1 with i from 0 to 3 (input size) + +To create a new layer do: + +```python +layer_instance = PySplineNetLib.layer(input_size, output_size, detail, max) +``` + +where: + +input_size : int = the size of the input vector +output_size : int = the expected size of the output vector +detail : int = the number of controlpoints for ecah spline (NOTE that the spline has detail + 2 points so to get 10 points detail shouod be 8) +max : float = the maximum value that any spline in the layer can evaluate (recomended 1.0 combined with activations that map input and output to range(0,1)) + +alternatively you can create a spline with start values for points and parameters like this: + +```python +spline_instance = PySplineNetLib(points, parameters) +``` + +with: +points : list = nested list of points like : (input_size, output_size, detail +2, 2 = x,y) +parameters : list = nested list of points like : (input_size, output_size, detail +1, 4) + +to fully init the layer call: + +```python +layer_instance.interpolate_splines() +``` + +### forward pass + +```python +pred = layer_instance.forward(X) +``` + +where + +X : list = single input vector or batched input vector +pred : list = prediction vector (also with batch dimension if the input was batched) + +### backward pass + +```python +d_y = layer_instance.backward(X, d_y) +``` + +where: + +X is the last inputvthis layer recieved +d_y is the propagated gradient of the previous layer + +Note that backward will apply the gradient to all splines in the layer automatically + +[<- back to Documentation](../README.md) \ No newline at end of file From 1604ce3862ba707dbe4a71c74945b7802bbab9c8 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Sat, 15 Feb 2025 22:58:18 +0100 Subject: [PATCH 16/19] reconstructing the documentation into multiple specific .md files --- README.md | 4 + docs/cpp_CTensor.md | 135 +++++++++++++++++++++++++++++++ docs/cpp_splines.md | 2 +- docs/py_splines.md | 2 +- include/SplineNetLib/CTensor.hpp | 1 + 5 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 docs/cpp_CTensor.md diff --git a/README.md b/README.md index 2464417..1193d67 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,10 @@ see [install for python](#install-for-python) to install * batch compatibility for layers +* CTensor class (tensor class with automatic computation graph and gradient propagation) + +* python version for CTensor + **documentation was not yet updated some features might have changed and new features were added** **updates will follow soon** diff --git a/docs/cpp_CTensor.md b/docs/cpp_CTensor.md new file mode 100644 index 0000000..e9cc365 --- /dev/null +++ b/docs/cpp_CTensor.md @@ -0,0 +1,135 @@ +## CPP CTensor Documentation + +### include + +first include the library header + +```cpp +#include "SplineNetLib/SplineNet.hpp" +``` + +### CTensor constructors + +The CTensor class is usefull to perform tensor operations while automatically tracking the operations that a CTensor was involved with. +A CTensor stores the N dimensional data in a flat projected vector (std::vector) alongside it's actual shape (std::vector). +It will also store all arithmetic functions that it was used in or created from in a grad_fn vector (std::vector>>). Important to note here is that a CTensor only gets a new grad_fn if it was the direct result of an operation (e.g. c = a + b , here only c gets the grad_fn entry). +grad_fns are classes that hold information about the parents of a CTensor (e.g. c = a + b, here c gets a new grad_fn that knows that a and b are the parents). They also have functions that determine the behaviour of the gradient propagation. +Calling the backward function on one CTensor will automatically calculate the respective gradients of all other CTensors in the graph. + +**Note** that the CTensor architecture was inspired by the pytorch tensor architecture. Read more here : [pytorch](https://github.com/pytorch/pytorch) + +CTensors have multiple constructor options: + +1: construct from nested vector + +```cpp +std::vecor> data = {{1,2,3},{4,5,6}}; + +auto CTensor_instance = SplineNetLib::CTensor(data); +``` + +this creates a CTensor of shape {2,3}. +**Note** that new CTensors always have their requires gradient flag set to True. + +2: construct from flat initializer list with initializer list of shape: + +```cpp +auto CTensor_instance = SplineNetLib::CTensor({1.0,2.0,3.0,4.0,5.0,6.0}, {2,3}); +``` + +this will result in the same CTensor as in the previous constructor + +3: construct from flat vector and shape + +```cpp +std::vector shape = {2,3}; +std::vector data = {{1,2,3},{4,5,6}}; + +auto CTensor_instance = SplineNetLib::CTensor(data, shape); +``` + +4: construct from existing CTensor (shallow copy) + +```cpp +auto first_CTensor = SplineNetLib::CTensor({1.0,2.0,3.0,4.0,5.0,6.0}, {2,3}); + +auto new_CTensor = SplineNetLib::CTensor(first_CTensor); +``` + +**Note** this creates a shallow copy any changes to each will affect the other + +4.1: deep copy / clone + +If a exact copy of a CTensor, that is independent, is needed do: + +```cpp +auto first_CTensor = SplineNetLib::CTensor({1.0,2.0,3.0,4.0,5.0,6.0}, {2,3}); + +auto new_CTensor = first_CTensor.clone(); +``` + +this will create a deep copy of "first_CTensor" + +### CTensor shape related functions + +#### squeeze + +squeeze will remove the indexed dimension from the shape. **Note** that the tensor size will remain the same and the size of the adjacent dimension will increase. + +syntax: +```cpp +Ctensor.squeeze(size_t dim); +``` + +example: +```cpp +auto CTensor_instance = SplineNetLib::CTensor({1,2,3},{1,3}); +CTensor_instance.squeeze(0); +``` + +this will turn shape (1,3) into (3) + +#### unsqueeze + +unsqueeze will add a dimension of size 1 at the given indexed + +syntax: +```cpp +Ctensor.unsqueeze(size_t dim); +``` + +example: +```cpp +auto CTensor_instance = SplineNetLib::CTensor({1,2,3},{3}); +CTensor_instance.unsqueeze(0); +``` + +this turns CTensor with shape (3) to CTensor with shape (1,3) + +#### expand + +expand can increase the size of the selected dimension by a factor n. The data at the seoected dimension will be copied and appended n times. + +syntax: +```cpp +Ctensor.unsqueeze(size_t dim, int factor); +``` + +example: +```cpp +auto CTensor_instance = SplineNetLib::CTensor({1,2,3},{1,3}); +CTensor_instance.expand(0, 3); +``` + +the shape (1,3) becomes (3,3) and the data + +((1,2,3)) becomes ↓ + +((1,2,3), + (1,2,3), + (1,2,3)) + + +**more coming soon** + +[<- back to Documentation](../README.md) \ No newline at end of file diff --git a/docs/cpp_splines.md b/docs/cpp_splines.md index b193049..f25a2fb 100644 --- a/docs/cpp_splines.md +++ b/docs/cpp_splines.md @@ -1,4 +1,4 @@ -### splines +### CPP Splines Documentation The splines are the main computation unit of a layer. They allow for an easily adjustable and visualizable alternative to wheight matricies. To create a spline call: diff --git a/docs/py_splines.md b/docs/py_splines.md index 9805eb1..97a396d 100644 --- a/docs/py_splines.md +++ b/docs/py_splines.md @@ -1,5 +1,5 @@ -## python Implementation/documentation +## python Spline Implementation/documentation ### import diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp index 272f40b..758406c 100644 --- a/include/SplineNetLib/CTensor.hpp +++ b/include/SplineNetLib/CTensor.hpp @@ -72,6 +72,7 @@ class CTensor { _tensor_data = new DTensor(init, shape); } + CTensor(const std::vector& data, const std::vector& shape) { _tensor_data = new DTensor(data, shape); } From 80db8f7f09f10568f213b07e2ee18144c54ad750 Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Sat, 15 Feb 2025 22:59:33 +0100 Subject: [PATCH 17/19] reconstructing the documentation into multiple specific .md files --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 1193d67..1d3336e 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ [C++ spline documentation](docs/cpp_splines.md) +[C++ CTensor documentation](docs/cpp_CTensor.md) + [python spline documentation](docs/py_splines.md) From 7a97c01578c47e63a59eafabd0084b773aa2d1da Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Thu, 20 Feb 2025 11:49:02 +0100 Subject: [PATCH 18/19] fixed bug where reshaping ops where not included in comp graph. Added ReShapeFunction of type Function to manage reshape ops in the graph. This uses reshape flags see CTensorFuncs.hpp --- README.md | 4 ++ docs/cpp_CTensor.md | 66 +++++++++++++++++++++++++ include/SplineNetLib/CTensor.hpp | 2 + include/SplineNetLib/CTensorFunc.hpp | 37 +++++++++++++- include/SplineNetLib/CTensorUtils.hpp | 4 ++ src/CTensor.tpp | 70 ++++++++++++++++++++++++++- src/CTensorFunc.tpp | 56 +++++++++++++++++++-- src/CTensorUtils.tpp | 42 ++++------------ 8 files changed, 242 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 1d3336e..91ab0ca 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # About Spline-based-DeepLearning +## bugs + +* reshaping a CTensor after performing operations on it may break the gradient calculation! + ## Table of contents [goals](#goals) diff --git a/docs/cpp_CTensor.md b/docs/cpp_CTensor.md index e9cc365..7ab2c38 100644 --- a/docs/cpp_CTensor.md +++ b/docs/cpp_CTensor.md @@ -70,6 +70,33 @@ auto new_CTensor = first_CTensor.clone(); this will create a deep copy of "first_CTensor" +### CTensor getter functions + +#### data() + +this returns the inner data vector from the CTensor **Note** that this data vector is the flat representation of the CTensor. + +example: + +```cpp +auto CTensor_instance = SplineNetLib::CTensor({1,2,3,4,5,6},{2,3}); +auto data = CTensor_instance.data(); +``` + +here data will be a vector like {1,2,3,4,5,6}, where 'T' is the datatype of CTensor_instance. + +#### shape() + +this returns the shape of the CTensor + +example: +```cpp +auto CTensor_instance = SplineNetLib::CTensor({1,2,3,4,5,6},{2,3}); +auto data = CTensor_instance.shape(); +``` + +this returns a vector = {2,3}. + ### CTensor shape related functions #### squeeze @@ -129,6 +156,45 @@ the shape (1,3) becomes (3,3) and the data (1,2,3), (1,2,3)) +#### permute + +swaps around dimension sizes + +syntax: + +syntax: +```cpp +Ctensor.permute(index_vector); +``` +example: +```cpp +auto CTensor_instance = SplineNetLib::CTensor({1,2,1,2,1,2,1,2},{1,4,2}); +std::vector index_vector = {0,2,1}; +``` + +the shape (1,4,2) will become (1,2,4). **Note** that this will not change the actual data vector as the permutation only affects the projection logic, meaning that when indexing a permutated CTensor the result will be different to before the permutation although the underlaying data is the same. + +#### transpose + +this transposes the CTensor meaning it swaps the inner most two dimensions (including the data in the flat vector) + +syntax: + +```cpp +Ctensor.transpose(); +``` + +example: + +```cpp +auto CTensor_instance = SplineNetLib::CTensor({1.0,2.0,3.0,4.0,5.0,6.0}, {2,3}); + +CTensor_instance.transpose(); +``` + +this will swap dim0 and dim1, so shape (2,3) becomes (3,2). The data vector [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] will change to [1.0, 4.0, 2.0, 5.0, 3.0, 6.0] to fit the new shape. + + **more coming soon** diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp index 758406c..6e5b087 100644 --- a/include/SplineNetLib/CTensor.hpp +++ b/include/SplineNetLib/CTensor.hpp @@ -120,6 +120,8 @@ class CTensor { void permute(const std::vector &permutation_indecies) ; //will swap dimesnions at the permutation indecies //shape (2,3,4) permute(2,0,1) becomes: (4,2,3) + void reduce(const size_t &dim, const size_t &factor) ; + void transpose() ; //-----auto_grad----- diff --git a/include/SplineNetLib/CTensorFunc.hpp b/include/SplineNetLib/CTensorFunc.hpp index 8b686f7..8f86a3e 100644 --- a/include/SplineNetLib/CTensorFunc.hpp +++ b/include/SplineNetLib/CTensorFunc.hpp @@ -16,6 +16,16 @@ #include "CTensorUtils.hpp" namespace SplineNetLib { + +typedef enum { + RESHAPE_SQUEEZE = 1, + RESHAPE_UNSQUEEZE = 2, + RESHAPE_EXPAND = 3, + RESHAPE_REDUCE = 4, + RESHAPE_PERMUTE = 5, + RESHAPE_TRANSPOSE = 6 +} ReshapeType; + template class CTensor; @@ -28,8 +38,12 @@ class Function { //pointers to this functions "parents" (like : a operator b) std::shared_ptr> a; std::shared_ptr> b; + std::vector a_shape; + std::vector b_shape; - Function(std::shared_ptr> A, std::shared_ptr> B) : a(A), b(B) {} + Function(std::shared_ptr> A, std::shared_ptr> B) : a(A), b(B), + /*nullptr check for A and B to ensure no segfaults happen ->*/a_shape(A ? A->shape() : std::vector {1}), + b_shape(B ? B->shape() : std::vector {1}) {} //virtual desctructor virtual ~Function() = default; @@ -98,6 +112,27 @@ class MatMulFunction : public Function { virtual std::unique_ptr> clone() const override; }; +template +requires Scalar +class ReShapeFunction : public Function { +public : + + ReshapeType operation; + /* + std::vector original_shape; + std::vector new_shape; + */ + + ReShapeFunction(std::shared_ptr> a, ReshapeType _operation) : + Function(a, nullptr),operation(_operation){} + + std::vector fwd() override; + + void backward(std::vector &prop_grad, CTensor *result) override; + + virtual std::unique_ptr> clone() const override; +}; + } //namepace #include "../src/CTensorFunc.tpp" diff --git a/include/SplineNetLib/CTensorUtils.hpp b/include/SplineNetLib/CTensorUtils.hpp index fba2654..ea4f4b3 100644 --- a/include/SplineNetLib/CTensorUtils.hpp +++ b/include/SplineNetLib/CTensorUtils.hpp @@ -26,6 +26,7 @@ #include namespace SplineNetLib { + template std::string vectorToString(const std::vector& vec); @@ -88,6 +89,9 @@ template requires Scalar std::vector permute_vec(const std::vector& A, const std::vector& A_shape, const std::vector& permutation_indices) ; +//swaps last two dimensions as if transposing a ctensor +std::vector transpose_shape(const std::vector& shape) ; + } //namespace #include "../src/CTensorUtils.tpp" diff --git a/src/CTensor.tpp b/src/CTensor.tpp index 834ea71..376ac1f 100644 --- a/src/CTensor.tpp +++ b/src/CTensor.tpp @@ -39,6 +39,12 @@ void CTensor::squeeze(const size_t& dim) { this->_tensor_data->_shape[dim] *= this->_tensor_data->_shape[dim+1]; this->_tensor_data->_shape.erase(this->_tensor_data->_shape.begin() + dim + 1); } + + if (this->requires_grad) { + auto new_fn = std::make_unique>(std::make_shared>(*this), RESHAPE_SQUEEZE); + + this->_tensor_data->_grad_fn.push_back(std::move(new_fn)); + } } template @@ -50,6 +56,12 @@ void CTensor::unsqueeze(const size_t &dim) { } else { (*shape).insert((*shape).begin() + dim, 1); } + + if (this->requires_grad) { + auto new_fn = std::make_unique>(std::make_shared>(*this), RESHAPE_UNSQUEEZE); + + this->_tensor_data->_grad_fn.push_back(std::move(new_fn)); + } } template @@ -90,9 +102,56 @@ void CTensor::expand(const size_t &dim, const size_t &factor) { idx += data_size_per_expansion; } - // Update the shape and number of dimensions + auto new_shape = (*shape); + new_shape[dim] *= factor; + + //create new addfunction with shared ptr to this and other + auto new_fn = std::make_unique>(std::make_shared>(*this), RESHAPE_EXPAND); + + // Update the shape and number of dimensions (*shape)[dim] *= factor; + + this->_tensor_data->_grad_fn.push_back(std::move(new_fn)); + +} + +template +void CTensor::reduce(const size_t &dim, const size_t &factor) { + if (factor <= 1) { + return; // No reduction needed + } + + auto* shape = &(this->_tensor_data->_shape); // Pointer to shape vector + auto* data = &(this->_tensor_data->_data); + size_t n_dims = shape->size(); + + // Ensure valid dimension + if (dim >= n_dims) { + throw std::invalid_argument("Input dim: " + std::to_string(dim) + + " cannot be larger than _n_dims: " + std::to_string(n_dims)); + } + + // Ensure the shape is divisible by factor + if ((*shape)[dim] % factor != 0) { + return; + } + + // Calculate the size of sub-vectors + size_t sub_vector_size = 1; + for (size_t i = dim + 1; i < n_dims; i++) { + sub_vector_size *= (*shape)[i]; + } + size_t idx = 0; + while (idx < data->size()) { + // Remove (factor - 1) repetitions of the sub-vector + for (size_t i = 1; i < factor; i++) { + data->erase(data->begin() + idx, data->begin() + idx + sub_vector_size); + } + idx += sub_vector_size; // Move to the next section after all removals + } + + (*shape)[dim] /= factor; } template @@ -250,7 +309,7 @@ void CTensor::clear_graph() { //can be improved with overload if no arg is passe to use {} so that this function below can use refernces template void CTensor::backward(std::vector prop_grad) { - + /* //go through all parent Functions for (auto &fn : this->_tensor_data->_grad_fn) { if (fn) { @@ -263,6 +322,13 @@ void CTensor::backward(std::vector prop_grad) { } } //std::cout<<"debug Ct bwd fn all bwd finish\n"; + */ + //testing with revers as this makes more sense fir the tree traversal + for (int i = this->_tensor_data->_grad_fn.size() - 1; i >= 0; i--){ + if (this->_tensor_data->_grad_fn[i]){ + this->_tensor_data->_grad_fn[i]->backward(prop_grad, this); + } + } } template diff --git a/src/CTensorFunc.tpp b/src/CTensorFunc.tpp index ebb1b6a..10b5df8 100644 --- a/src/CTensorFunc.tpp +++ b/src/CTensorFunc.tpp @@ -252,7 +252,7 @@ void MatMulFunction::backward(std::vector &prop_grad, CTensor *result) auto prop_grad_shape = result->shape(); //std::cout<<"matmul bwd prop shape : "<data().size(); i++) { prop_grad.push_back(1); @@ -270,8 +270,9 @@ void MatMulFunction::backward(std::vector &prop_grad, CTensor *result) //create a copy of b and transpose it auto b_copy = this->b->clone(); b_copy.transpose(); + auto b_shape = transpose_shape(this->b_shape); - prop_grad_a = matmul(prop_grad, b_copy.data(), prop_grad_shape, b_copy.shape()); + prop_grad_a = matmul(prop_grad, b_copy.data(), prop_grad_shape, b_shape); //assign grad for (size_t i = 0; i < prop_grad_a.size(); i++) { @@ -292,8 +293,9 @@ void MatMulFunction::backward(std::vector &prop_grad, CTensor *result) //create a copy of b and transpose it auto a_copy = this->a->clone(); a_copy.transpose(); + auto a_shape = transpose_shape(this->a_shape); //std::cout<<"b bwd a_copy shape :"<> MatMulFunction::clone() const { return std::make_unique>(*this); } +template +requires Scalar +std::vector ReShapeFunction::fwd() { + return this->a->data(); +} + + +template +requires Scalar +void ReShapeFunction::backward(std::vector &prop_grad, CTensor *result){ + //std::cout<<"RESHAPEFUNCTION CALL\n"; + + switch(this->operation) { + case RESHAPE_SQUEEZE: + if (result != this->a.get()){ + this->a->backward(prop_grad); + } + break; + case RESHAPE_UNSQUEEZE: + if (result != this->a.get()){ + this->a->backward(prop_grad); + } + break; + case RESHAPE_EXPAND: + std::cout<<"\n\nWARNING: This CTensor was expanded in the computational graph, therefore gradients can not be calculated further in this branch\n\n"; + break; + + case RESHAPE_REDUCE: + break; + case RESHAPE_PERMUTE: + + break; + case RESHAPE_TRANSPOSE: + if (result != this->a.get()){ + this->a->backward(prop_grad); + } + break; + default: //should throw exeption + break; + } +} + +template +requires Scalar +std::unique_ptr> ReShapeFunction::clone() const{ + return std::make_unique>(*this); +} + }//namespace #endif \ No newline at end of file diff --git a/src/CTensorUtils.tpp b/src/CTensorUtils.tpp index 985de1a..ca0b26a 100644 --- a/src/CTensorUtils.tpp +++ b/src/CTensorUtils.tpp @@ -102,39 +102,7 @@ inline size_t stride(size_t idx, const std::vector &shape) { return stride; } -// Math functions -/* see readable version below -template -requires Scalar -std::vector matmul(const std::vector &A, const std::vector &B, const std::vector &A_shape, const std::vector &B_shape) { - size_t batch_size = 1; - if (B_shape.size() != A_shape.size()) { - throw std::invalid_argument("A_shape.size() and B_shape.size() must be equal"); - return std::vector(1, 0); - } - - if (A_shape.size() > 2) { - for (size_t i = 0; i < A_shape.size() - 2; i++) { - batch_size *= A_shape[i]; - } - } - - size_t M = A_shape[A_shape.size() - 2], K = A_shape[A_shape.size() - 1], N = B_shape[B_shape.size() - 1]; - std::vector result(batch_size * M * N); - - for (size_t batch_dim = 0; batch_dim < batch_size; batch_dim++) { - for (size_t row = 0; row < M; row++) { - for (size_t col = 0; col < N; col++) { - T sum = 0.0; - for (size_t shared = 0; shared < K; shared++) { - sum += A[batch_dim * M * K + row * K + shared] * B[batch_dim * K * N + shared * N + col]; - } - result[batch_dim * M * N + row * N + col] = sum; - } - } - } - return result; -}*/ +//math funcs template // Template function that accepts any scalar type 'T' (e.g., float, double) requires Scalar // This constraint ensures that the type 'T' is a scalar (e.g., not a matrix, vector, etc.) @@ -200,6 +168,14 @@ std::vector permute_vec(const std::vector& A, const std::vector& A return B; } +inline std::vector transpose_shape(const std::vector& shape) { + std::vector temp = shape; + size_t n_dims = temp.size(); + temp[n_dims - 2] = shape[n_dims - 1]; + temp[n_dims - 1] = shape[n_dims - 2]; + return temp; +} + }//namespace #endif \ No newline at end of file From c4e970bd8441791e0e8bef98f842fa17a75d693b Mon Sep 17 00:00:00 2001 From: K-T0BIAS Date: Thu, 20 Feb 2025 13:03:21 +0100 Subject: [PATCH 19/19] new ci workflow .yml for the python lib tests --- .github/workflows/py_test.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/py_test.yml diff --git a/.github/workflows/py_test.yml b/.github/workflows/py_test.yml new file mode 100644 index 0000000..95b358d --- /dev/null +++ b/.github/workflows/py_test.yml @@ -0,0 +1,34 @@ +name: CI - Python Tests + +# Trigger the workflow on pushes and pull requests involving Python files +on: + workflow_dispatch: + push: + paths: + - 'src/**' # Monitor changes in src directory + - 'include/**' # Monitor changes in include directory + pull_request: + paths: + - 'src/**' + - 'include/**' + +jobs: + python-tests: + runs-on: ubuntu-latest # Use the latest Ubuntu virtual environment + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' # Specify the Python version you need + + - name: Install project and dependencies + run: | + python -m pip install --upgrade pip + pip install . # Install the project from the root directory + + - name: Run Python unit tests + run: | + python -m unittest discover -s tests -p '*_test.py' # Discover and run all unittests in the 'tests' folder \ No newline at end of file