From f6df88c30b5813d0f30000c4590218bb30911e8b Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Fri, 7 Feb 2025 12:16:23 +0100
Subject: [PATCH 01/19] added the ctensor class and changes to cmake

---
 CMakeLists.txt                        |  13 ++
 examples/example_CTensor.cpp          |  13 ++
 include/SplineNetLib/CTensor.hpp      | 155 +++++++++++++
 include/SplineNetLib/CTensorFunc.hpp  |  97 +++++++++
 include/SplineNetLib/CTensorUtils.hpp |  95 ++++++++
 include/SplineNetLib/splines.hpp      |   1 +
 src/CTensor.tpp                       | 303 ++++++++++++++++++++++++++
 src/CTensorFunc.tpp                   | 300 +++++++++++++++++++++++++
 src/CTensorUtils.tpp                  | 205 +++++++++++++++++
 9 files changed, 1182 insertions(+)
 create mode 100644 examples/example_CTensor.cpp
 create mode 100644 include/SplineNetLib/CTensor.hpp
 create mode 100644 include/SplineNetLib/CTensorFunc.hpp
 create mode 100644 include/SplineNetLib/CTensorUtils.hpp
 create mode 100644 src/CTensor.tpp
 create mode 100644 src/CTensorFunc.tpp
 create mode 100644 src/CTensorUtils.tpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5424b75..1f9c63b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,6 +35,13 @@ add_library(SplineNetLib
     src/splines.cpp
 )
 
+# Add the new template-based class headers and implementations
+target_sources(SplineNetLib PRIVATE
+    src/CTensor.tpp
+    src/CTensorFunc.tpp
+    src/CTensorUtils.tpp
+)
+
 # Specify the include directories for the library target
 target_include_directories(SplineNetLib PUBLIC ${PROJECT_SOURCE_DIR}/include)
 
@@ -85,3 +92,9 @@ write_basic_package_version_file(
 install(FILES "${CMAKE_CURRENT_BINARY_DIR}/SplineNetLibConfigVersion.cmake" DESTINATION lib/cmake/SplineNetLib)
 
 
+
+
+
+
+
+
diff --git a/examples/example_CTensor.cpp b/examples/example_CTensor.cpp
new file mode 100644
index 0000000..89a5d33
--- /dev/null
+++ b/examples/example_CTensor.cpp
@@ -0,0 +1,13 @@
+#include "../include/SplineNetLib/layers.hpp"
+
+using namespace SplineNetLib;
+
+int main() {
+    
+    //this will create a CTensor that holds a data vector and shape vector, all other member variables are uninitialized
+    auto a = CTensor({1,1,1,2,2,2},{2,3});
+    
+    std::cout<<"created CTensor a with data : "<<vectorToString(a.data())<<" and shape : "<<vectorToString(a.shape())<<"\n";
+    
+    return 0;
+}
\ No newline at end of file
diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp
new file mode 100644
index 0000000..1a3c6d3
--- /dev/null
+++ b/include/SplineNetLib/CTensor.hpp
@@ -0,0 +1,155 @@
+// Copyright (c) <2025>, <Tobias Karusseit>
+// 
+// This file is part of the PySplineNetLib project, which is licensed under the 
+// Mozilla Public License, Version 2.0 (MPL-2.0).
+// 
+// SPDX-License-Identifier: MPL-2.0
+// For the full text of the licenses, see:
+// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0
+
+
+
+
+#ifndef CTENSOR_HPP
+#define CTENSOR_HPP
+
+#include "CTensorFunc.hpp"
+
+namespace SplineNetLib {
+
+template<typename T>
+requires Scalar<T>
+class Function;
+
+template<Scalar T>
+class DTensor{
+public: 
+    std::vector<T> _data;
+    std::vector<size_t> _shape;
+    std::vector<T> _grad;
+    std::vector<std::unique_ptr<Function<T>>> _grad_fn;
+    int _ref_c;
+    
+    DTensor(const std::vector<T>& data, const std::vector<size_t>& shape) : 
+    _data(data), _shape(shape), _ref_c(1) {}
+    
+    DTensor(const std::initializer_list<T>& data, const std::initializer_list<size_t>& shape) : 
+    _data(data), _shape(shape), _ref_c(1) {}
+    
+    void add_ref(){
+        _ref_c++;
+    }
+    
+    void rmf_ref(){
+        _ref_c--;
+        if (_ref_c == 0){
+            delete this;
+        }
+    }
+};
+
+
+template<Scalar T>
+class CTensor { 
+private:
+    
+    
+    
+public:
+
+    DTensor<T>* _tensor_data;
+    
+    bool requires_grad = true;
+        
+    CTensor(const std::initializer_list<T>& init, const std::initializer_list<size_t>& shape) {
+        _tensor_data = new DTensor(init, shape);
+    }
+    
+    CTensor(const std::vector<T>& data, const std::vector<size_t>& shape) {
+        _tensor_data = new DTensor(data, shape);
+    }
+    
+    CTensor(const CTensor<T>& other){
+        _tensor_data = other._tensor_data;
+        _tensor_data->_ref_c++;
+    }
+    
+    
+    ~CTensor(){
+        _tensor_data->rmf_ref();
+    }
+    
+    //-----getters-----
+    
+    std::vector<T> data() const { return this->_tensor_data->_data; }
+    
+    std::vector<size_t> shape() const { return this->_tensor_data->_shape; }
+    
+    std::vector<T> grad() const { return this->_tensor_data->_grad; }
+    
+    std::vector<std::unique_ptr<Function<T>>> grad_fn() const { return this->_tensor_data->grad_fn; }
+    
+    void zero_grad();
+    
+    //-----shape-utils-----
+    
+    void squeeze(const size_t &dim) ;
+    
+    void unsqueeze(const size_t &dim) ;
+    
+    void expand(const size_t &dim, const size_t &factor) ;
+    
+    void permute(const std::vector<size_t> &permutation_indecies) ;
+    
+    void transpose() ;
+    
+    //-----auto_grad-----
+    
+    void clear_history() ;
+    
+    void clear_graph() ;
+    //maybe add overload o this so that f no arg was passed propagated grad is set to {}, than this function below could use all by ref
+    void backward(std::vector<T> prop_grad = {}) ;
+    
+    
+    //-----operator-----
+    
+    auto operator[](size_t idx) ;
+    
+    auto operator+(CTensor<T> &other) ;
+    
+    auto operator-(CTensor<T> &other) ;
+    
+    auto operator*(CTensor<T> &other) ;
+    
+    //CTensor<T>& operator=(const CTensor<T> &other) noexcept;
+    
+    //CTensor<T>& operator=(CTensor<T> &&other) ;
+
+    
+};
+/*
+template<Scalar T>
+CTensor<T> zeros(std::vector<size_t> shape) ;
+
+template<Scalar T>
+CTensor<T> ones(std::vector<size_t> shape) ;
+
+template<Scalar T>
+CTensor<T> random(std::vector<size_t> shape, T min, T max) ;
+
+template<typename T ,Container U>
+CTensor<T> Tensor(U data) ;
+
+template<typename T, Scalar U>
+CTensor<T> Tensor(U data) ;
+
+template<typename T>
+CTensor<T> Tensor(std::vector<T> data, std::vector<size_t> shape) ;
+*/
+} //namespace
+
+#include "../src/CTensor.tpp"
+
+
+#endif
\ No newline at end of file
diff --git a/include/SplineNetLib/CTensorFunc.hpp b/include/SplineNetLib/CTensorFunc.hpp
new file mode 100644
index 0000000..b3fb503
--- /dev/null
+++ b/include/SplineNetLib/CTensorFunc.hpp
@@ -0,0 +1,97 @@
+// Copyright (c) <2025>, <Tobias Karusseit>
+// 
+// This file is part of the PySplineNetLib project, which is licensed under the 
+// Mozilla Public License, Version 2.0 (MPL-2.0).
+// 
+// SPDX-License-Identifier: MPL-2.0
+// For the full text of the licenses, see:
+// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0
+
+
+
+
+#ifndef CTENSORFUNC_HPP
+#define CTENSORFUNC_HPP
+
+#include "CTensorUtils.hpp"
+
+namespace SplineNetLib {
+
+template<Scalar T>
+class CTensor;
+
+//base function class for specialization
+template<typename T>
+requires Scalar<T>
+class Function {
+public:
+    //pointers to this functions "parents" (like : a operator b)
+    std::shared_ptr<CTensor<T>> a;
+    std::shared_ptr<CTensor<T>> b;
+    
+    Function(std::shared_ptr<CTensor<T>> A, std::shared_ptr<CTensor<T>> B) : a(A), b(B) {}
+    
+    //virtual desctructor
+    virtual ~Function() = default;
+    
+    virtual std::vector<T> fwd() = 0;
+    
+    virtual void backward(std::vector<T> &prop_grad, CTensor<T> *result) = 0;
+    
+    static std::unordered_set<Function<T>*> global_chain;
+    
+    void clear_graph_f();
+};
+
+template<typename T>
+requires Scalar<T>
+std::unordered_set<Function<T>*> Function<T>::global_chain;
+
+//addition class for CTensor<T>::operator+
+template<typename T>
+requires Scalar<T>
+class AddFunction : public Function<T> {
+public:
+
+    //construct base class
+    AddFunction(std::shared_ptr<CTensor<T>> a, std::shared_ptr<CTensor<T>> b) : Function<T>(a, b) {}
+    
+    std::vector<T> fwd() override ;
+    
+    void backward(std::vector<T> &prop_grad, CTensor<T> *result) override;
+};
+
+//subtractor function class for CTensor<T>::operator-
+template<typename T>
+requires Scalar<T>
+class SubFunction : public Function<T> {
+public:
+
+    //construct base class
+    SubFunction(std::shared_ptr<CTensor<T>> a, std::shared_ptr<CTensor<T>> b) : Function<T>(a, b) {}
+    
+    std::vector<T> fwd() override;
+    
+    void backward(std::vector<T> &prop_grad, CTensor<T> *result) override;
+    
+};
+
+//matrix multiplication function class for CTensor<T>::operator*
+template<typename T>
+requires Scalar<T>
+class MatMulFunction : public Function<T> {
+public:
+
+    //construct base class
+    MatMulFunction(std::shared_ptr<CTensor<T>> a, std::shared_ptr<CTensor<T>> b) : Function<T>(a, b) {}
+    
+    std::vector<T> fwd() override;
+    
+    void backward(std::vector<T> &prop_grad, CTensor<T> *result) override;
+};
+
+} //namepace
+
+#include "../src/CTensorFunc.tpp"
+
+#endif
\ No newline at end of file
diff --git a/include/SplineNetLib/CTensorUtils.hpp b/include/SplineNetLib/CTensorUtils.hpp
new file mode 100644
index 0000000..fba2654
--- /dev/null
+++ b/include/SplineNetLib/CTensorUtils.hpp
@@ -0,0 +1,95 @@
+// Copyright (c) <2025>, <Tobias Karusseit>
+// 
+// This file is part of the PySplineNetLib project, which is licensed under the 
+// Mozilla Public License, Version 2.0 (MPL-2.0).
+// 
+// SPDX-License-Identifier: MPL-2.0
+// For the full text of the licenses, see:
+// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0
+
+
+
+
+#ifndef CTENSORUTILS_HPP
+#define CTENSORUTILS_HPP
+
+#include <iostream>
+#include <vector>
+#include <type_traits>
+#include <iterator>
+#include <concepts>
+#include <functional>
+#include <sstream>
+#include <string>
+#include <any>
+#include <random>
+#include <unordered_set>
+
+namespace SplineNetLib {
+
+template <typename T>
+std::string vectorToString(const std::vector<T>& vec);
+
+template <typename T>
+concept Container = requires(T t) {
+    typename T::value_type;             // Requires a nested `value_type` (if T::value_type fails T is not a Container)
+    typename T::iterator;               // Requires a nested `iterator`
+    typename T::const_iterator;         // Also requires a nested `const_iterator` for const containers
+    { t.begin() } -> std::input_iterator;  // Requires a `begin()` function that has return type std::input_iterator
+    { t.end() } -> std::input_iterator;    // Requires an `end()` function that has return type std::input_iterator
+    { t.size() } -> std::convertible_to<std::size_t>; //also requires a `size()` function thatvhas return type std::convertible_to<std::size_t>
+};
+
+template <typename T>
+concept Scalar = std::is_arithmetic_v<T>; // Requires T to be is_arithmetic_v
+
+// Function to generate a std::vector<T> with random values
+template <Scalar T>
+std::vector<T> randomVector(size_t size, T min, T max) ;
+
+
+//base case for recursive n_dims check
+template <Scalar T>
+int get_depth(const T &scalar) ;
+
+//Recursive case for the n_dims check will return the number of dimensions od the input
+template<Container T>
+int get_depth (const T &vec) ;
+
+//base Recursive case for the get_shape func will return the shape
+template <Scalar T>
+std::vector<size_t> get_shape(const T &scalar, std::vector<size_t> Shape = {}) ;
+
+//Recursive function to get shape of container (assumes uniform dims) pushes back the size of the container at current recursion depth
+template <Container T>
+std::vector<size_t> get_shape(const T &vec, std::vector<size_t> Shape = {}) ;
+
+//base case if input is scalar type (will in place push back to the result)
+template<typename U, Scalar T>
+void Flatten(const T &in_scalar, std::vector<U> &result) ;
+
+//Recursive case will move down one dim into the input and recursively call itself for all "values" in input
+template<typename U, Container T>
+void Flatten(const T &in_vector, std::vector<U> &result) ;
+
+// Flatten controll function will create the result variable and initialize the recursion
+template<typename U, typename T>
+std::vector<U> Flatten(const T& in_vector) ;
+    
+// calculate the stride length to get to next index in dim forvthe projected vector
+size_t stride(size_t idx, const std::vector<size_t> &shape) ;
+
+//math -------------------
+
+template<typename T>
+std::vector<T> matmul(const std::vector<T> &A, const std::vector<T> &B, const std::vector<size_t> &A_shape, const std::vector<size_t> &B_shape) ;
+
+template<typename T>
+requires Scalar<T>
+std::vector<T> permute_vec(const std::vector<T>& A, const std::vector<size_t>& A_shape, const std::vector<size_t>& permutation_indices) ;
+
+} //namespace
+
+#include "../src/CTensorUtils.tpp"
+
+#endif
\ No newline at end of file
diff --git a/include/SplineNetLib/splines.hpp b/include/SplineNetLib/splines.hpp
index 76a315c..c8b7577 100644
--- a/include/SplineNetLib/splines.hpp
+++ b/include/SplineNetLib/splines.hpp
@@ -17,6 +17,7 @@
 #include <vector>
 #include <stdexcept>
 #include <cmath>
+#include "CTensor.hpp"
 /*
 #include <thread>
 #include <mutex>
diff --git a/src/CTensor.tpp b/src/CTensor.tpp
new file mode 100644
index 0000000..1fe0ce2
--- /dev/null
+++ b/src/CTensor.tpp
@@ -0,0 +1,303 @@
+// Copyright (c) <2025>, <Tobias Karusseit>
+// 
+// This file is part of the PySplineNetLib project, which is licensed under the 
+// Mozilla Public License, Version 2.0 (MPL-2.0).
+// 
+// SPDX-License-Identifier: MPL-2.0
+// For the full text of the licenses, see:
+// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0
+
+
+
+
+
+#ifndef CTENSOR_TPP
+#define CTENSOR_TPP
+
+
+#include "../include/SplineNetLib/CTensor.hpp"
+
+namespace SplineNetLib {
+
+template<Scalar T>
+void CTensor<T>::zero_grad(){
+    this->_tensor_data->_grad = std::vector(this->_tensor_data->_data.size(),static_cast<T>(0));
+}
+
+
+template<Scalar T>
+void CTensor<T>::squeeze(const size_t& dim) {
+    auto n_dims = this->_tensor_data->_shape.size();
+    if (n_dims == 1) {
+        throw std::invalid_argument("CTensor with 1 Dim can not be squeezed to be 0D\n");
+    } else if (dim >= n_dims) {
+        throw std::invalid_argument("target Dim: "+std::to_string(dim)+"is out of range of CTensor with n_dims: "+std::to_string(n_dims)+"\n");
+    } else if (dim == n_dims-1){
+        this->_tensor_data->_shape[dim-1] *= this->_tensor_data->_shape[dim];
+        this->_tensor_data->_shape.pop_back();
+    } else {
+        this->_tensor_data->_shape[dim] *= this->_tensor_data->_shape[dim+1];
+        this->_tensor_data->_shape.erase(this->_tensor_data->_shape.begin() + dim + 1);
+    }
+}
+    
+template<Scalar T>
+void CTensor<T>::unsqueeze(const size_t &dim) {
+    auto n_dims = this->_tensor_data->_shape.size();
+    auto* shape = &(this->_tensor_data->_shape);//make a temp ptr to the shape vector for easier syntax
+    if (dim > n_dims) {
+        (*shape).push_back(1);
+    } else {
+        (*shape).insert((*shape).begin() + dim, 1);
+    }
+}
+
+template<Scalar T>
+void CTensor<T>::expand(const size_t &dim, const size_t &factor) {
+    if (factor <= 1) {
+        return; // No expansion needed
+    }
+    
+    auto* shape = &(this->_tensor_data->_shape);//make a temp ptr to the shape vector for easier syntax
+    auto* data = &(this->_tensor_data->_data);
+    auto n_dims = (*shape).size();
+    
+    
+    // Check if the specified dimension is valid
+    if (dim >= n_dims) {
+        throw std::invalid_argument("Input dim: " + std::to_string(dim) + " cannot be larger than _n_dims: " + std::to_string(n_dims));
+    }
+    
+    // Calculate the size of the sub-vectors to repeat
+    size_t sub_vector_size = 1;
+    for (size_t i = dim + 1; i < n_dims; i++) {
+        sub_vector_size *= (*shape)[i];
+    }
+    
+    size_t data_size_per_expansion = (*shape)[dim] * sub_vector_size;
+        
+    // Repeat the data by the specified factor
+    size_t idx = 0;
+    while (idx < (*data).size()) {
+        std::vector<T> sub_vector((*data).begin() + idx, (*data).begin() + idx + data_size_per_expansion);
+    
+        // Insert the sub-vector factor times
+        for (size_t i = 1; i < factor; i++) {
+            (*data).insert((*data).begin() + idx, sub_vector.begin(), sub_vector.end());
+            idx += data_size_per_expansion;
+        }
+            
+        idx += data_size_per_expansion;
+    }
+    
+    // Update the shape and number of dimensions
+    (*shape)[dim] *= factor;
+
+}
+
+template<Scalar T>
+void CTensor<T>::permute(const std::vector<size_t> &permutation_indecies) {
+    //renamed global func permute to permute_vec so that func czll in class is nolonger ::permute now permute_vec
+    this->_tensor_data->_data = permute_vec(this->_tensor_data->_data, this->_tensor_data->_shape, permutation_indecies); 
+        
+    auto shape_copy = this->shape();
+    for (size_t i = 0; i < permutation_indecies.size(); i++) {
+        this->_tensor_data->_shape[i] = shape_copy[permutation_indecies[i]];
+    }
+}
+
+template<Scalar T>
+void CTensor<T>::transpose() {
+    if (this->_tensor_data->_shape.size()>=2) {
+        std::vector<size_t> transpose_idx;
+        for (size_t i = 0; i < this->_tensor_data->_shape.size()-2; i++) {
+            transpose_idx.push_back(i);
+        }
+            
+        transpose_idx.push_back(this->_tensor_data->_shape.size() - 1);
+        transpose_idx.push_back(this->_tensor_data->_shape.size() - 2);
+            
+        this->permute(transpose_idx);
+    } 
+}
+
+
+
+//-----operator-----/
+template<Scalar T>
+auto CTensor<T>::operator[](size_t idx){
+    std::vector<size_t> Shape = this->shape();
+    //check if index should exist in multi dim space
+    if (idx >= Shape[0]) {
+        throw std::invalid_argument("index ["+std::to_string(idx)+"] is out of range with dim of size : "+std::to_string(Shape[0])+"\n");
+    }
+    //if vector is 1D to begin with
+    if (Shape.size() == 1) {
+        //create sub vector with scalar data at data[idx]
+        std::vector<typename decltype(this->data())::value_type> sub_vector = {this->data()[idx]};
+        //std::cout<<"operator[] scalar case debug data[idx]="<<sub_vector[0]<<"\n"; //debug
+        Shape = {1};  // now just a scalar (still packed in a vector but treated as scalar)
+        return CTensor(sub_vector, Shape);
+    }
+            
+    //remove first dim from Shape as sub vector only uses the later dims
+    Shape.erase(Shape.begin());
+    size_t size_sub_vector = 1;
+    //calculate the projected size of the sub tensor in 1D spcae
+    for (const size_t& dim : Shape) {
+        size_sub_vector *= dim;
+    }
+            
+    //projected index to 1D
+    size_t flat_idx = idx * size_sub_vector;
+    //to avoid exessivevcalls to this->data() in range constructor (could likely also be used in decltype)
+    auto data = this->data();
+    //creates a vector of same type as stored in CTensor using range constructor from flat_idx to flat_idx + size_sub_vector
+    std::vector<typename decltype(this->data())::value_type> sub_vector(data.begin() + flat_idx, data.begin() + flat_idx + size_sub_vector);
+        
+        
+    auto new_CT = CTensor(sub_vector, Shape);
+    return new_CT;
+}
+
+template<Scalar T>
+auto CTensor<T>::operator+(CTensor<T>& other){
+    //create new addfunction with shared ptr to this and other
+    auto new_fn = std::make_unique<AddFunction<T>>(std::make_shared<CTensor<T>>(*this),
+                                                    std::make_shared<CTensor<T>>(other));
+    auto res_vec = new_fn->fwd(); //add this data and other data
+    auto result = CTensor(res_vec, this->shape());//create the result CTensor 
+    if (this->requires_grad || other.requires_grad) {
+        result.requires_grad = true;
+        
+        result._tensor_data->_grad_fn.push_back(std::move(new_fn));
+    } else {
+        result.requires_grad = false;
+    }
+    return result;
+}
+
+
+template<Scalar T>
+auto CTensor<T>::operator-(CTensor<T> &other) {
+    //create new SubFunction with shared ptr to this and other
+    auto new_fn = std::make_unique<SubFunction<T>>(std::make_shared<CTensor<T>>(*this),
+                                                   std::make_shared<CTensor<T>>(other));
+    auto res_vec = new_fn->fwd();
+    auto result = CTensor<T>(res_vec, this->shape());
+    if (this->requires_grad || other.requires_grad) {
+        result.requires_grad = true;
+        
+        result._tensor_data->_grad_fn.push_back(std::move(new_fn));
+    } else {
+        result.requires_grad = false;
+    }
+    return result;
+}
+
+template<Scalar T>
+auto CTensor<T>::operator* (CTensor<T> &other) {
+    //create the parent function for the result using parents this and other
+    //this will make a shared ptr of the base class. this works since the functions in tje derived classes are all overrides 
+    //this is doen so that all grad fns of a CTensor can be stored in the same std::vector<shared_ptr<Function<T>>> _grad_fn
+    auto new_fn = std::make_unique<MatMulFunction<T>>(std::make_shared<CTensor<T>>(*this),
+                                                     std::make_shared<CTensor<T>>(other));
+    //use new_fn.forward() to perfo5m the addition
+    auto res_vec = new_fn->fwd();
+    
+    std::vector<size_t> result_shape;
+    auto this_shape = this->shape();
+    auto other_shape = other.shape();
+    for (size_t i = 0; i < this_shape.size() -1; i++){
+        result_shape.push_back(this_shape[i]);
+    }
+    result_shape.push_back(other_shape[other_shape.size()-1]);
+        
+    auto result = CTensor<T>(res_vec, result_shape);
+    //assign parent function to the result._grad_fn
+    if (this->requires_grad || other.requires_grad) {
+        result.requires_grad = true;
+        
+        result._tensor_data->_grad_fn.push_back(std::move(new_fn));
+    } else {
+        result.requires_grad = false;
+    }
+    
+    return result;
+}
+
+
+
+
+
+template<Scalar T>
+void CTensor<T>::clear_history() {
+    this->_tensor_data->_grad_fn.clear();
+    //this should be safe since Function uses pointers to Ctensor and the Tensor will survive the _grad_fn clear
+}
+
+template<Scalar T>
+void CTensor<T>::clear_graph() {
+    //recursive call to traverse grad graph 
+    for (auto &fn : this->_tensor_data->_grad_fn) {
+
+        fn->clear_graph_f();
+    }
+    //clear this CTensor history when sub graph is cleared
+    this->clear_history();
+}
+
+//can be improved with overload if no arg is passe to use {} so that this function below can use refernces
+template<Scalar T>
+void CTensor<T>::backward(std::vector<T> prop_grad) {
+    
+    //go through all parent Functions
+    for (auto &fn : this->_tensor_data->_grad_fn) {
+        if (fn) {
+            //std::cout<<sizeof(fn)<<"\n";
+            //cast fn to Function type (not done before bc circular dependencies between Ctensor and Function)
+            //call backward on fn and pass prop_grad as propagated gradient and 'this' as result ptr (result = child of parent func)
+            //std::cout<<"debug Ct bwd fn bwd call\n";
+            fn->backward(prop_grad, this);
+            //std::cout<<"debug Ct bwd fn bwd finish\n";
+        }
+    }
+    //std::cout<<"debug Ct bwd fn all bwd finish\n";
+}
+/* untestee
+template<Scalar T>
+CTensor<T> zeros(std::vector<size_t> shape) {
+    std::vector<T> data(stride(-1,shape),T(0));
+    return CTensor<T>(data, shape);
+}
+
+template<Scalar T>
+CTensor<T> ones(std::vector<size_t> shape) {
+    std::vector<T> data(stride(-1,shape),T(1));
+    return CTensor<T>(data, shape);
+}
+
+template<Scalar T>
+CTensor<T> random(std::vector<size_t> shape, T min, T max) {
+    return CTensor<T>(randomVector<T>(stride(-1,shape),min,max),shape);
+}
+    
+template<typename T,Container U>
+CTensor<T> Tensor(U data) {
+    return CTensor<T>(data);
+}
+
+template<typename T, Scalar U>
+CTensor<T> Tensor(U data) {
+    return CTensor<T>(data);
+}
+
+template<typename T>
+CTensor<T> Tensor(std::vector<T> data, std::vector<size_t> shape) {
+    return CTensor<T>(data,shape);
+}
+*/
+
+} //namespace
+
+#endif
\ No newline at end of file
diff --git a/src/CTensorFunc.tpp b/src/CTensorFunc.tpp
new file mode 100644
index 0000000..cf7c506
--- /dev/null
+++ b/src/CTensorFunc.tpp
@@ -0,0 +1,300 @@
+// Copyright (c) <2025>, <Tobias Karusseit>
+// 
+// This file is part of the PySplineNetLib project, which is licensed under the 
+// Mozilla Public License, Version 2.0 (MPL-2.0).
+// 
+// SPDX-License-Identifier: MPL-2.0
+// For the full text of the licenses, see:
+// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0
+
+
+
+
+#ifndef CTENSORFUNC_TPP
+#define CTENSORFUNC_TPP
+
+
+#include "../include/SplineNetLib/CTensorFunc.hpp"
+
+namespace SplineNetLib {
+
+template<typename T>
+requires Scalar<T>
+void Function<T>::clear_graph_f() {
+    a->clear_graph();
+    b->clear_graph();
+}
+
+template<typename T>
+requires Scalar<T>
+std::vector<T> AddFunction<T>::fwd() {
+    
+    auto* a_data = &(this->a->_tensor_data->_data);
+    auto* b_data = &(this->b->_tensor_data->_data);
+    
+    T l;
+    T r;
+    
+    std::vector<T> res_vec;
+    for (size_t i = 0; i < (*a_data).size() || i < (*b_data).size(); i++){
+        l = (i < (*a_data).size()) ? (*a_data)[i] : 0 ;
+        r = (i < (*b_data).size()) ? (*b_data)[i] : 0 ;        
+        res_vec.push_back(l + r);
+    }
+    return res_vec;
+}
+    
+
+
+template<typename T>
+requires Scalar<T>
+void AddFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result) {
+    //std::cout<<"debug add bwd call\n";
+    //check if func already exists in the recursive chain
+    if (Function<T>::global_chain.find(this) != Function<T>::global_chain.end()) {
+        std::cout<<"cyle detected in grad backward, ensure no incorrect reassignments to to Ctensors that were previously used in the computation graph\n";
+        return;
+    }
+    //std::cout<<"debug add bwd cycle check\n";
+    //insert this func into the chain for cycle detection
+    Function<T>::global_chain.insert(this);
+    //std::cout<<"debug add bwd chain insert\n";
+    if (prop_grad.empty()){
+        for (size_t i=0; i < this->a->data().size(); i++) {
+            prop_grad.push_back(1);
+            //std::cout<<"debug add bwd empty grad set to 1s \n";
+        }
+    }
+    //std::cout<<"debug add bwd grad add\n";
+    //ensure self dependend gradients arent added twice
+    if (result != this->a.get()) {
+        std::cout<<"debug add bwd this->a gradient propagation initialized\n";
+        //std::cout<<"debug add bwd this a grad size:"<<this->a->grad().size()<<"prop_grad size: "<<prop_grad.size()<<"\n";
+        if (this->a->requires_grad == true) {
+            if (this->a->grad().empty()){
+                std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
+                this->a->zero_grad();
+            }
+            std::cout<<"working on grad of a at "<<this->a<<" "<<vectorToString(this->a->grad())<<" "<<vectorToString(prop_grad)<<"\n";
+            for (size_t i = 0; i < prop_grad.size(); i++) {
+                
+                this->a->_tensor_data->_grad[i] += prop_grad[i];
+                //std::cout<<"debug add bwd accumulation step\n";
+            }
+        }
+        std::cout<<"debug add bwd this a grad accumulated\n";
+        this->a->backward(prop_grad);
+        std::cout<<"debug add bwd this a recursion finished\n";
+    }
+    //ensure self dependend gradients arent added twice
+    if (result != this->b.get()) {
+        std::cout<<"debug add bwd this->b gradient propagation initialized\n";
+        //std::cout<<"debug add bwd this b grad size:"<<this->b->grad().size()<<"prop_grad size: "<<prop_grad.size()<<"\n";
+        if (this->b->requires_grad == true) {
+            if (this->b->grad().empty()){
+                std::cout<<"b grqd empty "<<this->b->grad().size()<<"\n";
+                this->b->zero_grad();
+            }
+            std::cout<<"working on grad of b at "<<this->b<<" "<<vectorToString(this->b->grad())<<" "<<vectorToString(prop_grad)<<"\n";
+            for (size_t i = 0; i < prop_grad.size(); i++) {
+                
+                this->b->_tensor_data->_grad[i] += prop_grad[i];
+                //std::cout<<"debug add bwd accumulation step\n";
+            }
+        }
+        //std::cout<<"debug add bwd this b grad accumulated\n";
+        this->b->backward(prop_grad);
+        //std::cout<<"debug add bwd this b recursion finished\n";
+    }
+    //std::cout<<"debug add bwd recursive propagation\n";
+    //remove this func from the chain if all its recursive processes finished
+    Function<T>::global_chain.erase(this);
+    //std::cout<<"debug add bwd chain erase\n";
+}
+
+
+template<typename T>
+requires Scalar<T>
+std::vector<T> SubFunction<T>::fwd() {
+    
+    auto* a_data = &(this->a->_tensor_data->_data);
+    auto* b_data = &(this->b->_tensor_data->_data);
+    
+    T l;
+    T r;
+    
+    std::vector<T> res_vec;
+    for (size_t i = 0; i < (*a_data).size() || i < (*b_data).size(); i++){
+        l = (i < (*a_data).size()) ? (*a_data)[i] : 0 ;
+        r = (i < (*b_data).size()) ? (*b_data)[i] : 0 ;        
+        res_vec.push_back(l - r);
+    }
+    return res_vec;
+}
+
+
+template<typename T>
+requires Scalar<T>
+void SubFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result) {
+    
+    //check if func already exists in the recursive chain
+    if (Function<T>::global_chain.find(this) != Function<T>::global_chain.end()) {
+        std::cout<<"cyle detected in  Ctensor.backward(), ensure no incorrect reassignments to to Ctensors that were previously used in the computation graph\n";
+        return;
+    }
+    
+    //insert this func into the chain for cycle detection
+    Function<T>::global_chain.insert(this);
+        
+    if (prop_grad.empty()){
+        for (size_t i=0; i < this->a->data().size(); i++) {
+            prop_grad.push_back(1);
+        }
+    }
+    
+    //ensure self dependend gradients arent added twice
+    if (result != this->a.get()) {
+        if (this->a->requires_grad == true) {
+            if (this->a->grad().empty()){
+                std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
+                this->a->zero_grad();
+            }
+            for (size_t i = 0; i < prop_grad.size(); i++) {
+                
+                this->a->_tensor_data->_grad[i] += prop_grad[i];
+                
+            }
+        }
+        this->a->backward(prop_grad);
+    }
+    //ensure self dependend gradients arent added twice
+    if (result != this->b.get()) {
+        if (this->b->requires_grad == true) {
+            if (this->b->grad().empty()){
+                std::cout<<"b grqd empty "<<this->b->grad().size()<<"\n";
+                this->b->zero_grad();
+            }
+            for (size_t i = 0; i < prop_grad.size(); i++) {
+                this->b->_tensor_data->_grad[i] -= prop_grad[i];
+            }
+        }
+        this->b->backward(prop_grad);
+    }
+    //remove this func from the chain if all its recursive processes finished
+    Function<T>::global_chain.erase(this);
+}
+
+
+template<typename T>
+requires Scalar<T>
+std::vector<T> MatMulFunction<T>::fwd() {
+    
+    std::vector<size_t> a_shape = this->a->shape();
+    std::vector<size_t> b_shape = this->b->shape();
+        
+    size_t a_n_dims = a_shape.size();
+    size_t b_n_dims = b_shape.size();
+    
+    auto a_copy = *(this->a);
+    auto b_copy = *(this->b);
+        
+    if (a_n_dims != b_n_dims) {
+        throw std::invalid_argument("operator (*) expects both opperants to have the same num of dimensions but got:"+std::to_string(a_n_dims)+"and "+std::to_string(b_n_dims)+",please ensure opperants dims match by using squeeze or unsqueeze beforehand\n");
+    }
+    if (a_n_dims > 2) {
+        //Create sub vectors for the batch dimensions
+        std::vector<size_t> a_batch_shape;
+        std::vector<size_t> b_batch_shape;
+        //get only the batch dimension shapes
+        for (size_t i = 0; i < a_shape.size()-2; i++ ){
+            a_batch_shape.push_back(a_shape[i]);
+            b_batch_shape.push_back(b_shape[i]);
+        }
+        for (size_t i = 0; i < a_batch_shape.size(); i++) {
+            //expand dims so that batch dimensions are the same
+            if (a_batch_shape[i] != b_batch_shape[i]) {
+                a_copy.expand(i,b_batch_shape[i]);
+                b_copy.expand(i,a_batch_shape[i]);
+            }
+        }
+    }
+    std::vector<T> result_vector = matmul(a_copy.data(), b_copy.data(), a_copy.shape(), b_copy.shape());
+    return result_vector;
+    
+}
+
+template<typename T>
+requires Scalar<T>
+void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result) {
+    
+    //check if func already exists in the recursive chain
+    if (Function<T>::global_chain.find(this) != Function<T>::global_chain.end()) {
+        std::cout<<"cyle detected in  Ctensor.backward(), ensure no incorrect reassignments to to Ctensors that were previously used in the computation graph\n";
+        return;
+    }
+    
+        //insert this func into the chain for cycle detection
+    Function<T>::global_chain.insert(this);
+
+    
+    auto prop_grad_shape = result->shape();
+    std::cout<<"matmul bwd prop shape : "<<vectorToString(prop_grad_shape)<<"\n";
+    
+        //this is wrong for matmul if is empty return opasite operant (for a grad is b and reversed)
+    if (prop_grad.empty()){
+        for (size_t i=0; i < result->data().size(); i++) {
+            prop_grad.push_back(1);
+        }
+    }
+    
+    //ensure self dependend gradients arent added twice
+    if (result != this->a.get()) {
+        auto prop_grad_a = this->a->grad(); //needs to be deeply checked
+        if (this->a->requires_grad == true) {
+            if (this->a->_tensor_data->_grad.empty()){
+                std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
+                this->a->zero_grad();
+            }
+                    //create a copy of b and transpose it
+            auto b_copy = *(this->b);
+            b_copy.transpose();
+            
+            prop_grad_a = matmul(prop_grad, b_copy.data(), prop_grad_shape, b_copy.shape());
+            
+            //assign grad
+            for (size_t i = 0; i < prop_grad_a.size(); i++) {
+                this->a->_tensor_data->_grad[i] += prop_grad_a[i];
+            }
+        }
+        this->a->backward(prop_grad_a);
+    }
+    
+    //ensure self dependend gradients arent added twice
+    if (result != this->b.get()) {
+        auto prop_grad_b = this->b->grad();
+        if (this->b->requires_grad == true) {
+            if (this->b->_tensor_data->_grad.empty()){
+                std::cout<<"b grad empty "<<this->b->grad().size()<<"\n";
+                this->b->zero_grad();
+            }
+                    //create a copy of b and transpose it
+            auto a_copy = *(this->a);
+            a_copy.transpose();
+            std::cout<<"b bwd a_copy shape :"<<vectorToString(a_copy.shape())<<" "<<vectorToString(prop_grad)<<"\n";
+            prop_grad_b = matmul(a_copy.data(), prop_grad, a_copy.shape(), prop_grad_shape);
+            
+            //assign grad
+            for (size_t i = 0; i < prop_grad_b.size(); i++) {
+                this->b->_tensor_data->_grad[i] += prop_grad_b[i];
+            }
+        }
+        this->b->backward(prop_grad_b);
+    }
+    
+    //remove this func from the chain if all its recursive processes finished
+    Function<T>::global_chain.erase(this);
+}
+
+}//namespace
+
+#endif
\ No newline at end of file
diff --git a/src/CTensorUtils.tpp b/src/CTensorUtils.tpp
new file mode 100644
index 0000000..985de1a
--- /dev/null
+++ b/src/CTensorUtils.tpp
@@ -0,0 +1,205 @@
+// Copyright (c) <2025>, <Tobias Karusseit>
+// 
+// This file is part of the PySplineNetLib project, which is licensed under the 
+// Mozilla Public License, Version 2.0 (MPL-2.0).
+// 
+// SPDX-License-Identifier: MPL-2.0
+// For the full text of the licenses, see:
+// - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0
+
+
+
+
+#ifndef CTENSORUTILS_TPP
+#define CTENSORUTILS_TPP
+
+#include "../include/SplineNetLib/CTensorUtils.hpp"
+
+namespace SplineNetLib {
+
+template <Scalar T>
+std::vector<T> randomVector(size_t size, T min, T max) {
+    // Random number generator
+    std::random_device rd;
+    std::mt19937 gen(rd());
+
+    // Distribution depending on type T
+    typename std::conditional<std::is_integral<T>::value, 
+                              std::uniform_int_distribution<T>, 
+                              std::uniform_real_distribution<T>>::type dist(min, max);
+
+    std::vector<T> vec(size);
+    for (auto& v : vec) {
+        v = dist(gen);
+    }
+    return vec;
+}
+
+
+template <Scalar T>
+int get_depth(const T &scalar) {
+    return 0;
+}
+
+template <Container T>
+int get_depth(const T &vec) {
+    int max_depth = 1;
+    for (const auto &element : vec) {
+        max_depth = std::max(max_depth, 1 + get_depth(element));
+    }
+    return max_depth;
+}
+
+template<typename T>
+std::string vectorToString(const std::vector<T>& vec) {
+    std::ostringstream oss;
+    oss << "(";
+    for (size_t i = 0; i < vec.size(); ++i) {
+        oss << vec[i];
+        if (i < vec.size() - 1) {
+            oss << ", ";
+        }
+    }
+    oss << ")";
+    return oss.str();
+}
+
+template <Scalar T>
+std::vector<size_t> get_shape(const T &scalar, std::vector<size_t> Shape) {
+    return Shape;
+}
+
+template <Container T>
+std::vector<size_t> get_shape(const T &vec, std::vector<size_t> Shape) {
+    Shape.push_back(vec.size());
+    return get_shape(vec[0], Shape);
+}
+
+template<typename U, Scalar T>
+void Flatten(const T &in_scalar, std::vector<U> &result) {
+    result.push_back(in_scalar);
+}
+
+template<typename U, Container T>
+void Flatten(const T &in_vector, std::vector<U> &result) {
+    for (const auto &vec : in_vector) {
+        Flatten(vec, result);
+    }
+}
+
+template<typename U, typename T>
+std::vector<U> Flatten(const T& in_vector) {
+    std::vector<U> result;
+    Flatten(in_vector, result);
+    return result;
+}
+
+inline size_t stride(size_t idx, const std::vector<size_t> &shape) {
+    size_t stride = 1;
+    for (size_t i = idx + 1; i < shape.size(); i++) {
+        stride *= shape[i];
+    }
+    return stride;
+}
+
+// Math functions
+/* see readable version below
+template<typename T>
+requires Scalar<T>
+std::vector<T> matmul(const std::vector<T> &A, const std::vector<T> &B, const std::vector<size_t> &A_shape, const std::vector<size_t> &B_shape) {
+    size_t batch_size = 1;
+    if (B_shape.size() != A_shape.size()) {
+        throw std::invalid_argument("A_shape.size() and B_shape.size() must be equal");
+        return std::vector<T>(1, 0);
+    }
+
+    if (A_shape.size() > 2) {
+        for (size_t i = 0; i < A_shape.size() - 2; i++) {
+            batch_size *= A_shape[i];
+        }
+    }
+
+    size_t M = A_shape[A_shape.size() - 2], K = A_shape[A_shape.size() - 1], N = B_shape[B_shape.size() - 1];
+    std::vector<T> result(batch_size * M * N);
+
+    for (size_t batch_dim = 0; batch_dim < batch_size; batch_dim++) {
+        for (size_t row = 0; row < M; row++) {
+            for (size_t col = 0; col < N; col++) {
+                T sum = 0.0;
+                for (size_t shared = 0; shared < K; shared++) {
+                    sum += A[batch_dim * M * K + row * K + shared] * B[batch_dim * K * N + shared * N + col];
+                }
+                result[batch_dim * M * N + row * N + col] = sum;
+            }
+        }
+    }
+    return result;
+}*/
+
+template<typename T>  // Template function that accepts any scalar type 'T' (e.g., float, double)
+requires Scalar<T>   // This constraint ensures that the type 'T' is a scalar (e.g., not a matrix, vector, etc.)
+std::vector<T> matmul(const std::vector<T> &A, const std::vector<T> &B, const std::vector<size_t> &A_shape, const std::vector<size_t> &B_shape) {
+    size_t batch_size = 1;  // Variable to store the number of batches (default to 1)
+    //std::cout<<"debug : matmul : a shape = "<<vectorToString(A_shape)<<" b shape = "<<vectorToString(B_shape)<<"\n";
+    // Ensure A and B have the same number of dimensions
+    if (B_shape.size() != A_shape.size()) {
+        throw std::invalid_argument("A_shape.size() and B_shape.size() must be equal");
+        return std::vector<T>(1, 0);  // This return statement is unreachable due to the exception, but just in case.
+    }
+    
+    
+    // If A has more than 2 dimensions (e.g., batching is involved), calculate the batch size
+    if (A_shape.size() > 2) {
+        for (size_t i = 0; i < A_shape.size() - 2; i++) {
+            batch_size *= A_shape[i];  // Multiply the sizes of the leading dimensions (batch dimensions)
+        }
+    }
+
+    // Get the dimensions for matrix multiplication
+    size_t M = A_shape[A_shape.size() - 2];  // Rows of A
+    size_t K = A_shape[A_shape.size() - 1];  // Columns of A and rows of B
+    size_t N = B_shape[B_shape.size() - 1];  // Columns of B
+
+    // Initialize the result vector with a size to hold all results (batch_size * M * N)
+    std::vector<T> result(batch_size * M * N);
+
+    // Perform matrix multiplication for each batch
+    for (size_t batch_dim = 0; batch_dim < batch_size; batch_dim++) {
+        for (size_t row = 0; row < M; row++) {  // Iterate over each row of A
+            for (size_t col = 0; col < N; col++) {  // Iterate over each column of B
+                T sum = 0.0;  // Initialize the sum for the current element in the result matrix
+                for (size_t shared = 0; shared < K; shared++) {  // Iterate over the shared dimension (columns of A, rows of B)
+                    // Perform the dot product between the row of A and the column of B
+                    sum += A[batch_dim * M * K + row * K + shared] * B[batch_dim * K * N + shared * N + col];
+                }
+                // Store the computed value in the result vector at the appropriate position
+                result[batch_dim * M * N + row * N + col] = sum;
+            }
+        }
+    }
+    return result;  // Return the final result of the matrix multiplication
+}
+
+template<typename T>
+requires Scalar<T>
+std::vector<T> permute_vec(const std::vector<T>& A, const std::vector<size_t>& A_shape, const std::vector<size_t>& permutation_indices) {
+    std::vector<T> B(A.size(), 0);
+    std::vector<size_t> B_shape;
+
+    for (const auto& idx : permutation_indices) {
+        B_shape.push_back(A_shape[idx]);
+    }
+
+    for (size_t i = 0; i < A.size(); i++) {
+        size_t idx = 0;
+        for (size_t k = 0; k < A_shape.size(); k++) {
+            idx += ((i / stride(permutation_indices[k], A_shape)) % B_shape[k]) * stride(k, B_shape);
+        }
+        B[idx] = A[i];
+    }
+    return B;
+}
+
+}//namespace
+
+#endif
\ No newline at end of file

From 64e574cd13fe27e1a4728cceb1630ebe9fe45748 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Fri, 7 Feb 2025 12:47:32 +0100
Subject: [PATCH 02/19] changed cpp version from 17 to 20 to use
 'concepts'(cmake now works Note that a lot of warnings are created when
 running cmake)

---
 CMakeLists.txt | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1f9c63b..ee2e5cf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,20 +4,9 @@ cmake_minimum_required(VERSION 3.10)
 project(SplineNetLib VERSION 1.0)
 
 # Set the C++ standard
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
 
-#include fetch content for catch2
-include(FetchContent)
-
-# fetch catch2 and make usable
-FetchContent_Declare(
-    Catch2
-    GIT_REPOSITORY https://github.com/catchorg/Catch2.git
-    GIT_TAG v3.4.0
-)
-FetchContent_MakeAvailable(Catch2)
-
 # Optionally enable warnings for all compilers
 if(MSVC)
     add_compile_options(/W4)
@@ -48,6 +37,16 @@ target_include_directories(SplineNetLib PUBLIC ${PROJECT_SOURCE_DIR}/include)
 option(ENABLE_TESTS "allow catch2 install and tests to run" OFF)
 
 if(ENABLE_TESTS)
+    #include fetch content for catch2
+    include(FetchContent)
+    
+    # fetch catch2 and make usable
+    FetchContent_Declare(
+        Catch2
+        GIT_REPOSITORY https://github.com/catchorg/Catch2.git
+        GIT_TAG v3.4.0
+    )
+    FetchContent_MakeAvailable(Catch2)
 
     #enable testing
     enable_testing()
@@ -74,6 +73,12 @@ add_executable(SplineNetExample examples/example_network.cpp)
 # Link the example with the library
 target_link_libraries(SplineNetExample PRIVATE SplineNetLib)
 
+# Add an example or test executable 
+add_executable(SplineNetExampleTensor examples/example_CTensor.cpp)
+
+# Link the example with the library
+target_link_libraries(SplineNetExampleTensor PRIVATE SplineNetLib)
+
 # Optional: Install the library and headers
 install(TARGETS SplineNetLib DESTINATION lib)
 install(DIRECTORY include/SplineNetLib DESTINATION include)

From 8f43990b5d75b52134f39ae2413304a73dc5dbae Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Fri, 7 Feb 2025 13:24:04 +0100
Subject: [PATCH 03/19] added new CTensor constructor use nested vectors as
 init arg. call like : CTensor<int>(arg)

---
 examples/example_CTensor.cpp     | 10 ++++++++++
 include/SplineNetLib/CTensor.hpp |  5 +++++
 2 files changed, 15 insertions(+)

diff --git a/examples/example_CTensor.cpp b/examples/example_CTensor.cpp
index 89a5d33..aa87409 100644
--- a/examples/example_CTensor.cpp
+++ b/examples/example_CTensor.cpp
@@ -9,5 +9,15 @@ int main() {
     
     std::cout<<"created CTensor a with data : "<<vectorToString(a.data())<<" and shape : "<<vectorToString(a.shape())<<"\n";
     
+    std::vector<std::vector<int>> data_b({{1,1,1},{2,2,2}});
+    
+    auto b = CTensor<int>(data_b);
+    
+    std::cout<<"created CTensor b with data : "<<vectorToString(b.data())<<" and shape : "<<vectorToString(b.shape())<<"\n";
+    
+    auto c = a + b;
+    
+    std::cout<<"created CTensor c by adding a + b. Data : "<<vectorToString(c.data())<<" and shape : "<<vectorToString(c.shape())<<"\n";
+    
     return 0;
 }
\ No newline at end of file
diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp
index 1a3c6d3..9b60103 100644
--- a/include/SplineNetLib/CTensor.hpp
+++ b/include/SplineNetLib/CTensor.hpp
@@ -69,6 +69,11 @@ class CTensor {
         _tensor_data = new DTensor(data, shape);
     }
     
+    template<Container U>
+    CTensor(const U& data) {
+        _tensor_data = new DTensor(Flatten<T>(data), get_shape(data));
+    }
+    
     CTensor(const CTensor<T>& other){
         _tensor_data = other._tensor_data;
         _tensor_data->_ref_c++;

From 3154e6452c33e5cb793c524551046fe5fff284a6 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Tue, 11 Feb 2025 12:15:15 +0100
Subject: [PATCH 04/19] created pybinds for Ctensor int and double also fixed
 some bugs and hid some debug outputs

---
 PySplineNetLib.egg-info/PKG-INFO             |   5 +
 PySplineNetLib.egg-info/SOURCES.txt          |   8 +
 PySplineNetLib.egg-info/dependency_links.txt |   1 +
 PySplineNetLib.egg-info/requires.txt         |   1 +
 PySplineNetLib.egg-info/top_level.txt        |   1 +
 include/SplineNetLib/CTensor.hpp             |  14 +-
 setup.py                                     |   1 +
 src/CTensorFunc.tpp                          |  28 ++--
 src/SplineNetLib_py.cpp                      | 152 +++++++++++++------
 9 files changed, 143 insertions(+), 68 deletions(-)
 create mode 100644 PySplineNetLib.egg-info/PKG-INFO
 create mode 100644 PySplineNetLib.egg-info/SOURCES.txt
 create mode 100644 PySplineNetLib.egg-info/dependency_links.txt
 create mode 100644 PySplineNetLib.egg-info/requires.txt
 create mode 100644 PySplineNetLib.egg-info/top_level.txt

diff --git a/PySplineNetLib.egg-info/PKG-INFO b/PySplineNetLib.egg-info/PKG-INFO
new file mode 100644
index 0000000..ef7e153
--- /dev/null
+++ b/PySplineNetLib.egg-info/PKG-INFO
@@ -0,0 +1,5 @@
+Metadata-Version: 2.1
+Name: PySplineNetLib
+Version: 0.1
+License-File: LICENSE
+Requires-Dist: pybind11>=2.6.0
diff --git a/PySplineNetLib.egg-info/SOURCES.txt b/PySplineNetLib.egg-info/SOURCES.txt
new file mode 100644
index 0000000..07ef532
--- /dev/null
+++ b/PySplineNetLib.egg-info/SOURCES.txt
@@ -0,0 +1,8 @@
+LICENSE
+setup.py
+PySplineNetLib.egg-info/PKG-INFO
+PySplineNetLib.egg-info/SOURCES.txt
+PySplineNetLib.egg-info/dependency_links.txt
+PySplineNetLib.egg-info/requires.txt
+PySplineNetLib.egg-info/top_level.txt
+src/SplineNetLib_py.cpp
\ No newline at end of file
diff --git a/PySplineNetLib.egg-info/dependency_links.txt b/PySplineNetLib.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/PySplineNetLib.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/PySplineNetLib.egg-info/requires.txt b/PySplineNetLib.egg-info/requires.txt
new file mode 100644
index 0000000..d89789f
--- /dev/null
+++ b/PySplineNetLib.egg-info/requires.txt
@@ -0,0 +1 @@
+pybind11>=2.6.0
diff --git a/PySplineNetLib.egg-info/top_level.txt b/PySplineNetLib.egg-info/top_level.txt
new file mode 100644
index 0000000..4457d5c
--- /dev/null
+++ b/PySplineNetLib.egg-info/top_level.txt
@@ -0,0 +1 @@
+PySplineNetLib
diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp
index 9b60103..862b915 100644
--- a/include/SplineNetLib/CTensor.hpp
+++ b/include/SplineNetLib/CTensor.hpp
@@ -98,20 +98,22 @@ class CTensor {
     
     //-----shape-utils-----
     
-    void squeeze(const size_t &dim) ;
+    void squeeze(const size_t &dim) ;//squeezes / removes the input dim and changes the internal projection shape
     
-    void unsqueeze(const size_t &dim) ;
+    void unsqueeze(const size_t &dim) ; //adds a new dim at the input dim
     
-    void expand(const size_t &dim, const size_t &factor) ;
+    void expand(const size_t &dim, const size_t &factor) ; //expands the dimension by factor so that shape 3,2 expanded(1,3) 
+                                                           //becomes: (3,6) (will duplicate values at the dimension to match new projected shape)
     
-    void permute(const std::vector<size_t> &permutation_indecies) ;
+    void permute(const std::vector<size_t> &permutation_indecies) ; //will swap dimesnions at the permutation indecies 
+                                                                    //shape (2,3,4) permute(2,0,1) becomes: (4,2,3) 
     
     void transpose() ;
     
     //-----auto_grad-----
-    
+    //delete all grad fns of this 
     void clear_history() ;
-    
+    //recursive delete of grad fns for all tensors in the graph with this as root
     void clear_graph() ;
     //maybe add overload o this so that f no arg was passed propagated grad is set to {}, than this function below could use all by ref
     void backward(std::vector<T> prop_grad = {}) ;
diff --git a/setup.py b/setup.py
index af120ca..3ca8869 100644
--- a/setup.py
+++ b/setup.py
@@ -62,6 +62,7 @@ def build_python_extension():
                 libraries=["SplineNetLib"],  # Link with your precompiled library
                 library_dirs=[get_library_path()],  # Directory containing the precompiled library
                 language="c++",  # Ensure it's compiled as C++
+                extra_compile_args=["-std=c++20"],
             )
         ],
         install_requires=[
diff --git a/src/CTensorFunc.tpp b/src/CTensorFunc.tpp
index cf7c506..4640897 100644
--- a/src/CTensorFunc.tpp
+++ b/src/CTensorFunc.tpp
@@ -68,34 +68,34 @@ void AddFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result) {
     //std::cout<<"debug add bwd grad add\n";
     //ensure self dependend gradients arent added twice
     if (result != this->a.get()) {
-        std::cout<<"debug add bwd this->a gradient propagation initialized\n";
+        //std::cout<<"debug add bwd this->a gradient propagation initialized\n";
         //std::cout<<"debug add bwd this a grad size:"<<this->a->grad().size()<<"prop_grad size: "<<prop_grad.size()<<"\n";
         if (this->a->requires_grad == true) {
             if (this->a->grad().empty()){
-                std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
+                //std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
                 this->a->zero_grad();
             }
-            std::cout<<"working on grad of a at "<<this->a<<" "<<vectorToString(this->a->grad())<<" "<<vectorToString(prop_grad)<<"\n";
+            //std::cout<<"working on grad of a at "<<this->a<<" "<<vectorToString(this->a->grad())<<" "<<vectorToString(prop_grad)<<"\n";
             for (size_t i = 0; i < prop_grad.size(); i++) {
                 
                 this->a->_tensor_data->_grad[i] += prop_grad[i];
                 //std::cout<<"debug add bwd accumulation step\n";
             }
         }
-        std::cout<<"debug add bwd this a grad accumulated\n";
+        //std::cout<<"debug add bwd this a grad accumulated\n";
         this->a->backward(prop_grad);
-        std::cout<<"debug add bwd this a recursion finished\n";
+        //std::cout<<"debug add bwd this a recursion finished\n";
     }
     //ensure self dependend gradients arent added twice
     if (result != this->b.get()) {
-        std::cout<<"debug add bwd this->b gradient propagation initialized\n";
+        //std::cout<<"debug add bwd this->b gradient propagation initialized\n";
         //std::cout<<"debug add bwd this b grad size:"<<this->b->grad().size()<<"prop_grad size: "<<prop_grad.size()<<"\n";
         if (this->b->requires_grad == true) {
             if (this->b->grad().empty()){
-                std::cout<<"b grqd empty "<<this->b->grad().size()<<"\n";
+                //std::cout<<"b grqd empty "<<this->b->grad().size()<<"\n";
                 this->b->zero_grad();
             }
-            std::cout<<"working on grad of b at "<<this->b<<" "<<vectorToString(this->b->grad())<<" "<<vectorToString(prop_grad)<<"\n";
+            //std::cout<<"working on grad of b at "<<this->b<<" "<<vectorToString(this->b->grad())<<" "<<vectorToString(prop_grad)<<"\n";
             for (size_t i = 0; i < prop_grad.size(); i++) {
                 
                 this->b->_tensor_data->_grad[i] += prop_grad[i];
@@ -156,7 +156,7 @@ void SubFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result) {
     if (result != this->a.get()) {
         if (this->a->requires_grad == true) {
             if (this->a->grad().empty()){
-                std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
+                //std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
                 this->a->zero_grad();
             }
             for (size_t i = 0; i < prop_grad.size(); i++) {
@@ -171,7 +171,7 @@ void SubFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result) {
     if (result != this->b.get()) {
         if (this->b->requires_grad == true) {
             if (this->b->grad().empty()){
-                std::cout<<"b grqd empty "<<this->b->grad().size()<<"\n";
+                //std::cout<<"b grqd empty "<<this->b->grad().size()<<"\n";
                 this->b->zero_grad();
             }
             for (size_t i = 0; i < prop_grad.size(); i++) {
@@ -238,7 +238,7 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
 
     
     auto prop_grad_shape = result->shape();
-    std::cout<<"matmul bwd prop shape : "<<vectorToString(prop_grad_shape)<<"\n";
+    //std::cout<<"matmul bwd prop shape : "<<vectorToString(prop_grad_shape)<<"\n";
     
         //this is wrong for matmul if is empty return opasite operant (for a grad is b and reversed)
     if (prop_grad.empty()){
@@ -252,7 +252,7 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
         auto prop_grad_a = this->a->grad(); //needs to be deeply checked
         if (this->a->requires_grad == true) {
             if (this->a->_tensor_data->_grad.empty()){
-                std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
+                //std::cout<<"a grqd empty "<<this->a->grad().size()<<"\n";
                 this->a->zero_grad();
             }
                     //create a copy of b and transpose it
@@ -274,13 +274,13 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
         auto prop_grad_b = this->b->grad();
         if (this->b->requires_grad == true) {
             if (this->b->_tensor_data->_grad.empty()){
-                std::cout<<"b grad empty "<<this->b->grad().size()<<"\n";
+                //std::cout<<"b grad empty "<<this->b->grad().size()<<"\n";
                 this->b->zero_grad();
             }
                     //create a copy of b and transpose it
             auto a_copy = *(this->a);
             a_copy.transpose();
-            std::cout<<"b bwd a_copy shape :"<<vectorToString(a_copy.shape())<<" "<<vectorToString(prop_grad)<<"\n";
+            //std::cout<<"b bwd a_copy shape :"<<vectorToString(a_copy.shape())<<" "<<vectorToString(prop_grad)<<"\n";
             prop_grad_b = matmul(a_copy.data(), prop_grad, a_copy.shape(), prop_grad_shape);
             
             //assign grad
diff --git a/src/SplineNetLib_py.cpp b/src/SplineNetLib_py.cpp
index 9194507..a3c45f2 100644
--- a/src/SplineNetLib_py.cpp
+++ b/src/SplineNetLib_py.cpp
@@ -14,14 +14,57 @@
 // - Mozilla Public License 2.0: https://opensource.org/licenses/MPL-2.0
 // - MIT License: https://opensource.org/licenses/MIT
 
-#include <pybind11/pybind11.h>
 
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>  // To handle STL types like std::string, std::vector
+#include <pybind11/operators.h>
 #include "SplineNetLib/SplineNet.hpp"    // Header for the library
 
+
 namespace py = pybind11;
 
+// Function to handle nested Python lists and convert them to std::vector<U>
+template <typename U>
+void flatten_pylist(const py::handle &obj, std::vector<U> &result) {
+    if (py::isinstance<py::list>(obj)) {
+        for (const auto &item : obj.cast<py::list>()) {
+            flatten_pylist<U>(item, result);
+        }
+    } else {
+        result.push_back(obj.cast<U>());
+    }
+}
+
+// Wrapper function to create a new vector
+template <typename U>
+std::vector<U> convert_pylist_to_vector(const py::list &py_list) {
+    std::vector<U> result;
+    flatten_pylist<U>(py_list, result);
+    return result;
+}
+
+void get_shape_recursive(const py::list& py_list, std::vector<size_t>& shape) {
+    // Base case: when the list is empty, do nothing
+    if (py_list.size() == 0) return;
+
+    // Push the size of the current level
+    shape.push_back(py_list.size());
+
+    // Check if the first element is a list (nested)
+    if (py::isinstance<py::list>(py_list[0])) {
+        // Recursively call get_shape_recursive for nested lists
+        get_shape_recursive(py::cast<py::list>(py_list[0]), shape);
+    }
+}
+
+std::vector<size_t> get_shape(const py::list& py_list) {
+    std::vector<size_t> shape;
+    // Use the recursive get_shape implementation for vectors
+    get_shape_recursive(py_list, shape);
+    return shape;
+}
+
+
 PYBIND11_MODULE(PySplineNetLib, m) {
     py::class_<SplineNetLib::spline>(m, "spline")
         .def(py::init<const std::vector < std::vector < double>>&, const std::vector < std::vector < double>>& >())  // Bind constructor
@@ -40,56 +83,69 @@ PYBIND11_MODULE(PySplineNetLib, m) {
         .def("backward",py::overload_cast<std::vector<double>,std::vector<double> , bool>(&SplineNetLib::layer::backward),"[double] ([double] x,[double]d_y,bool normalize), takes input x, loss gradient d_y and bool apply_grad,returns propageted loss (applies grad to all splines if True)")
         .def("backward",py::overload_cast<const std::vector<std::vector<double>> &,std::vector<std::vector<double>> >(&SplineNetLib::layer::backward),"backward but for batches (will always apply gradients)")
         .def("get_splines",&SplineNetLib::layer::get_splines,"[[SplineNetLib::spline]] (None), returns all splines in the layer");
-}
+    //int tensor
+    py::class_<SplineNetLib::CTensor<int>>(m, "CTensor")
 
-/*to be checked
-#include <pybind11/pybind11.h>
-#include <pybind11/stl.h>  // To handle STL types like std::string, std::vector
-#include "SplineNetLib/SplineNet.hpp"    // Header for the library
+        .def(py::init<const std::initializer_list<int>&, const std::initializer_list<size_t>&>())
+        .def(py::init<const std::vector<int>&, const std::vector<size_t>&>())
+        .def(py::init<const SplineNetLib::CTensor<int>&>())
+        .def(py::init([](const py::list &py_list) {//only for py module to turn nested lists and turn them to nested vector
+            auto nested_vector = convert_pylist_to_vector<int>(py_list);
+            std::vector<size_t> shape = get_shape(py_list);
+            return new SplineNetLib::CTensor<int>(nested_vector,shape); 
+        }))
+        .def("data",&SplineNetLib::CTensor<int>::data,"std::vector<int>, (None), returns the stored data vector as a copy")
+        .def("shape",&SplineNetLib::CTensor<int>::shape,"std::vector<size_t>, (None), returns the shape of the tensor like (dim0, dim1, ..., dimN)")
+        .def("grad",&SplineNetLib::CTensor<int>::grad, "std::vector<int>, (None), returns the grad as flat 1D projected vector (internally using tensor.shape)")
+        .def("zero_grad",&SplineNetLib::CTensor<int>::zero_grad, "None, (None), sets the gradient of this tensor to 0" )
+        .def("squeeze",&SplineNetLib::CTensor<int>::squeeze, "None, (size_t dim), removes the dim and projects the data to the new shape")
+        .def("unsqueeze",&SplineNetLib::CTensor<int>::unsqueeze, "None, (size_t dim), adds new dim at input dim index")
+        .def("expand",&SplineNetLib::CTensor<int>::expand, "None, (size_t dim, size_t factor), expands the dimesnion at dim by factor -> shape: (2,2) expand(0,3) becomes: shape(6,2), (note this WILL affect the data)")
+        .def("permute",&SplineNetLib::CTensor<int>::permute, "None, (std::vector<size_t>), swaps dimesnions at input indecies -> shape(2,1,3) permute([2,0,1] becomes: shape(3,2,1))")
+        .def("transpose",&SplineNetLib::CTensor<int>::transpose, "None, (None), transposes the tensor (swaps the innermost two dimesnions)")
+        .def("clear_history",&SplineNetLib::CTensor<int>::clear_history, "None, (None), clears all grad fns from the tensor (gradient propagatuon WILL NOT work after this so use carefully)")
+        .def("clear_graph",&SplineNetLib::CTensor<int>::clear_graph,"None, (None), clears full computational graph for all tensors conected to this one")
+        //.def("backward",&SplineNetLib::CTensor<int>::backward, "None, (None), backwards pass through this and connected graph")
+        .def("backward", &SplineNetLib::CTensor<int>::backward, 
+            py::arg("prop_grad") = std::vector<int>(), "Backward pass, takes an optional gradient vector (defaults to empty).")
+        .def("__mul__", [](SplineNetLib::CTensor<int>& self, SplineNetLib::CTensor<int>& other) {return self * other;})
+        .def("__add__", [](SplineNetLib::CTensor<int>& self, SplineNetLib::CTensor<int>& other) {return self + other; })
+        .def("__sub__", [](SplineNetLib::CTensor<int>& self, SplineNetLib::CTensor<int>& other) {return self - other; })
 
-namespace py = pybind11;
+        .def("__getitem__", [](SplineNetLib::CTensor<int>& self, size_t idx)->SplineNetLib::CTensor<int> { return self[idx]; });
+    
+    py::class_<SplineNetLib::CTensor<double>>(m, "CTensor")
 
-PYBIND11_MODULE(mylibrary, m) {
-    // Binding the spline class
-    py::class_<SplineNetLib::spline>(m, "spline")
-        .def(py::init<const std::vector<std::vector<double>>&, const std::vector<std::vector<double>>&>(),  // Constructor
-             "Constructs the spline with control points and parameters")
-        .def("interpolation", &SplineNetLib::spline::interpolation, 
-             "None -> Interpolates the spline based on its points")
-        .def("forward", &SplineNetLib::spline::forward, 
-             "double (double x) -> Evaluates the spline at x (if x is in bounds)")
-        .def("backward", &SplineNetLib::spline::backward,
-             "double (double in, double d_y, double out) -> Uses previous input, loss gradient, and last output for gradient descent")
-        .def("apply_grad", &SplineNetLib::spline::apply_grad,
-             "None (double lr) -> Applies gradient from backward * learning rate (lr)")
-        .def("get_points", &SplineNetLib::spline::get_points,
-             "[[double]] -> Returns spline points like [[x0, y0], ..., [xn, yn]]")
-        .def("get_params", &SplineNetLib::spline::get_params,
-             "[[double]] -> Returns spline parameters/coefficients like [[a0, b0, c0, d0], ..., [an, bn, cn, dn]]");
-
-    // Binding the layer class
-    py::class_<SplineNetLib::layer>(m, "layer")
-        .def(py::init<unsigned int, unsigned int, unsigned int, double>(),  // Constructor with size and learning rate
-             "Constructs a layer with the specified number of splines and learning rate")
-        .def(py::init<std::vector<std::vector<std::vector<std::vector<double>>>>, 
-                      std::vector<std::vector<std::vector<std::vector<double>>>>>(),  // Constructor for nested vector input
-             "Constructs a layer with nested vector inputs for spline initialization")
-        .def("interpolate_splines", &SplineNetLib::layer::interpolate_splines,
-             "None -> Calls interpolation on all splines in the layer")
-        
-        // Overloaded 'forward' methods
-        .def("forward", py::overload_cast<std::vector<double>, bool>(&SplineNetLib::layer::forward), 
-             "[double] (x, bool normalize) -> Forward call for single input sample, applies normalization if needed")
-        .def("forward", py::overload_cast<const std::vector<std::vector<double>>&, bool>(&SplineNetLib::layer::forward), 
-             "[[double]] (x, bool normalize) -> Forward call for batch inputs, applies normalization if needed")
+        .def(py::init<const std::initializer_list<double>&, const std::initializer_list<size_t>&>())
+        .def(py::init<const std::vector<double>&, const std::vector<size_t>&>())
+        .def(py::init<const SplineNetLib::CTensor<double>&>())
+        .def(py::init([](const py::list &py_list) {//only for py module to turn nested lists and turn them to nested vector
+            auto nested_vector = convert_pylist_to_vector<double>(py_list);
+            std::vector<size_t> shape = get_shape(py_list);
+            return new SplineNetLib::CTensor<double>(nested_vector,shape); 
+        }))
+        .def("data",&SplineNetLib::CTensor<double>::data,"std::vector<int>, (None), returns the stored data vector as a copy")
+        .def("shape",&SplineNetLib::CTensor<double>::shape,"std::vector<size_t>, (None), returns the shape of the tensor like (dim0, dim1, ..., dimN)")
+        .def("grad",&SplineNetLib::CTensor<double>::grad, "std::vector<int>, (None), returns the grad as flat 1D projected vector (internally using tensor.shape)")
+        .def("zero_grad",&SplineNetLib::CTensor<double>::zero_grad, "None, (None), sets the gradient of this tensor to 0" )
+        .def("squeeze",&SplineNetLib::CTensor<double>::squeeze, "None, (size_t dim), removes the dim and projects the data to the new shape")
+        .def("unsqueeze",&SplineNetLib::CTensor<double>::unsqueeze, "None, (size_t dim), adds new dim at input dim index")
+        .def("expand",&SplineNetLib::CTensor<double>::expand, "None, (size_t dim, size_t factor), expands the dimesnion at dim by factor -> shape: (2,2) expand(0,3) becomes: shape(6,2), (note this WILL affect the data)")
+        .def("permute",&SplineNetLib::CTensor<double>::permute, "None, (std::vector<size_t>), swaps dimesnions at input indecies -> shape(2,1,3) permute([2,0,1] becomes: shape(3,2,1))")
+        .def("transpose",&SplineNetLib::CTensor<double>::transpose, "None, (None), transposes the tensor (swaps the innermost two dimesnions)")
+        .def("clear_history",&SplineNetLib::CTensor<double>::clear_history, "None, (None), clears all grad fns from the tensor (gradient propagatuon WILL NOT work after this so use carefully)")
+        .def("clear_graph",&SplineNetLib::CTensor<double>::clear_graph,"None, (None), clears full computational graph for all tensors conected to this one")
+        //.def("backward",&SplineNetLib::CTensor<int>::backward, "None, (None), backwards pass through this and connected graph")
+        .def("backward", &SplineNetLib::CTensor<double>::backward,
+            py::arg("prop_grad") = std::vector<double>(),  "None, (None), backwards pass through this and connected graph")
+        .def("__mul__", [](SplineNetLib::CTensor<double>& self, SplineNetLib::CTensor<double>& other) {return self * other;})
+        .def("__add__", [](SplineNetLib::CTensor<double>& self, SplineNetLib::CTensor<double>& other) {return self + other; })
+        .def("__sub__", [](SplineNetLib::CTensor<double>& self, SplineNetLib::CTensor<double>& other) {return self - other; })
+
+        .def("__getitem__", [](SplineNetLib::CTensor<double>& self, size_t idx)->SplineNetLib::CTensor<double> { return self[idx]; });
         
-        // Overloaded 'backward' methods
-        .def("backward", py::overload_cast<std::vector<double>, std::vector<double>, bool>(&SplineNetLib::layer::backward),
-             "[double] (x, d_y, bool normalize) -> Backward propagation for single input sample, applies grad if normalize is True")
-        .def("backward", py::overload_cast<const std::vector<std::vector<double>>&, std::vector<std::vector<double>>>(&SplineNetLib::layer::backward),
-             "[[double]] (x, d_y) -> Backward propagation for batch inputs, always applies gradients")
         
-        .def("get_splines", &SplineNetLib::layer::get_splines,
-             "[[SplineNetLib::spline]] -> Returns all splines in the layer");
 }
-*/
\ No newline at end of file
+
+
+

From a9781b14a7863f736336ad013ac7218132e73948 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Wed, 12 Feb 2025 21:57:54 +0100
Subject: [PATCH 05/19] fixed error in the setup.py file (sys wasnt included)

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 3ca8869..f6f7032 100644
--- a/setup.py
+++ b/setup.py
@@ -22,6 +22,7 @@
 import os
 import subprocess
 import pybind11
+import sys
 
 def build_cpp_library():
     #make sure cmake is installed

From 01db9bcc985330c0bc8e7f650fedeee71f06140c Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Thu, 13 Feb 2025 11:35:08 +0100
Subject: [PATCH 06/19] testing .md changes

---
 docs/README.md                      | 20 ++++++++++++++------
 src/SplineNetLib_py.cpp             |  5 +++--
 tests/unit_tests/py_spline_tests.py | 18 ++++++++++++++++++
 3 files changed, 35 insertions(+), 8 deletions(-)
 create mode 100644 tests/unit_tests/py_spline_tests.py

diff --git a/docs/README.md b/docs/README.md
index 3dbbb03..386b59e 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -206,6 +206,12 @@ parameters : list = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0]]
 spline_instance = PySplineNetLib.spline(points,parameters)
 ```
 
+or alternatively do:
+
+```python
+spline_instance = PySplineNetLib.spline([[0.0,0.0],[0.5,0.25],[1.0,1.0]],[[0.0]*4]*2)
+```
+
 - spline interpolation:
 
 to properly init a spline call .interpolation()
@@ -234,7 +240,9 @@ to find the splines gradient based on a give loss grad at spline point (x,y) cal
 d_y : float = spline_instance.backward(x, d_y, y)
 ```
 x : float = point that was last evaluated
-y : float = spline prediction at x
+
+y : float = actual target 
+
 d_y : float = gradient of loss with (x,target) with respect to spline (x,y) (=> loss.backward() or d_y of next layer)
 
 **Note :**
@@ -252,7 +260,12 @@ lr : float = learning rate (controls how strong the gradient affects the splines
 
 ### layer documentation comming soon
 
+layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every input<sub>i</sub> there exist m splines. 
+For example, given a layer with input size 3 and output size 2 a 3 by 2 matrix of splines is created. Now input<sub>i</sub> is given to the spline vector<sub>i</sub> so that all splines<sub>j</sub> get evaluated. the results of spline<sub>i,j</sub> is added to output<sub>j</sub>.
+
+mathematically the output is defined like this:
 
+<img src="https://latex.codecogs.com/png.latex?\dpi{120} y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\}" />
 
 ## install for c++
 
@@ -307,11 +320,6 @@ g++ -std=c++17 -I/path_to_include -L/path_to_lib -lSplineNetLib main.cpp -o Your
 ```txt
 git clone https://github.com/K-T0BIAS/Spline-based-DeepLearning.git
 cd Spline-based-DeepLearning
-mkdir -p build
-cd build
-cmake ..
-make
-cd ..
 pip install .
 ```
 
diff --git a/src/SplineNetLib_py.cpp b/src/SplineNetLib_py.cpp
index a3c45f2..674e15a 100644
--- a/src/SplineNetLib_py.cpp
+++ b/src/SplineNetLib_py.cpp
@@ -74,8 +74,9 @@ PYBIND11_MODULE(PySplineNetLib, m) {
         .def("apply_grad",&SplineNetLib::spline::apply_grad,"None (double lr),apply grad from backward * lr")
         .def("get_points",&SplineNetLib::spline::get_points,"[[double]] (None),return spline points like [[x0,y0],...,[xn,yn]]")
         .def("get_params",&SplineNetLib::spline::get_params,"[[double]] (None),return spline parameters/coefficients like [[a0,b0,c0,d0],...,[an,bn,cn,dn]]");
+        
     py::class_<SplineNetLib::layer>(m, "layer")
-        .def(py::init<unsigned int, unsigned int, unsigned int, double>())
+        .def(py::init<unsigned int, unsigned int, unsigned int, double>())//in size, out size, detail (num of parameters -2), max (maximum input value that spline processes)
         .def(py::init<std::vector<std::vector<std::vector<std::vector<double>>>>, std::vector<std::vector<std::vector<std::vector<double>>>> >())
         .def("interpolate_splines",&SplineNetLib::layer::interpolate_splines,"None (None), calls interpolation on all splines in the layer")
         .def("forward",py::overload_cast<std::vector<double>, bool>(&SplineNetLib::layer::forward),"[double] ([double] x, bool normalize), forward call for single input sample")
@@ -84,7 +85,7 @@ PYBIND11_MODULE(PySplineNetLib, m) {
         .def("backward",py::overload_cast<const std::vector<std::vector<double>> &,std::vector<std::vector<double>> >(&SplineNetLib::layer::backward),"backward but for batches (will always apply gradients)")
         .def("get_splines",&SplineNetLib::layer::get_splines,"[[SplineNetLib::spline]] (None), returns all splines in the layer");
     //int tensor
-    py::class_<SplineNetLib::CTensor<int>>(m, "CTensor")
+    py::class_<SplineNetLib::CTensor<int>>(m, "IntCTensor")
 
         .def(py::init<const std::initializer_list<int>&, const std::initializer_list<size_t>&>())
         .def(py::init<const std::vector<int>&, const std::vector<size_t>&>())
diff --git a/tests/unit_tests/py_spline_tests.py b/tests/unit_tests/py_spline_tests.py
new file mode 100644
index 0000000..0811244
--- /dev/null
+++ b/tests/unit_tests/py_spline_tests.py
@@ -0,0 +1,18 @@
+import PySplineNetLib
+import unittest
+
+class Spline_Test(unittest.TestCase):
+    
+    def Spline_init_Test(self):
+        A = PySplineNetLib.spline([[0,0],[0.5,1],[1,2]],[[0,0,0,0],[0,0,0,0]])
+        A.interpolation()
+        a : float = A.forward(0.25)
+        self.assertAlmostEqual(0.5, a, delta = 0.000001)
+        a_y : float = A.backward(0.25, 0, 1)
+        #returns A.forward(0.25)=0.5 - y = 0 + d_y = 0 -> 0.5 - 1 = -0.5
+        self.assertAlmostEqual(-0.5, a_y, delta = 0.000001)
+        A.apply_grad(1) #applies the gradient with factor 1.0 (moves y_i at x_i > 0.25 by -1 *grad {same as sign(grad)})
+        A.interpolation() #fimds new params for the new spline
+        self.assertAlmostEqual([0.0, 3.5, 0.0, -2.0],[1.5, 2.0, -3.0, 2.0], A.get_params())
+        self.assertAlmostEqual([[0.0, 0.0], [0.5, 1.5], [1.0, 2.0]], A.get_points())
+        
\ No newline at end of file

From a4e676349d52dedb746b8952c1b1a4ea7b6fa237 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Thu, 13 Feb 2025 11:38:14 +0100
Subject: [PATCH 07/19] testing .md changes

---
 docs/README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/README.md b/docs/README.md
index 386b59e..977cc6f 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -265,7 +265,9 @@ For example, given a layer with input size 3 and output size 2 a 3 by 2 matrix o
 
 mathematically the output is defined like this:
 
-<img src="https://latex.codecogs.com/png.latex?\dpi{120} y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\}" />
+$$
+y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\}
+$$
 
 ## install for c++
 

From 61ba244716f2ff08172988c2c584a2484014a312 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Thu, 13 Feb 2025 12:20:49 +0100
Subject: [PATCH 08/19] layer documentation in the python part added

---
 docs/README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 60 insertions(+), 6 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index 977cc6f..1f9e623 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -124,7 +124,7 @@ vector<vector<double>> pred = layer_instance.forward(X, normalize);
 
 **assuming namespace std**
 ```cpp
-vector<double> loss_gradient = layer_instance(X,d_y);
+vector<double> loss_gradient = layer_instance.backward(X,d_y);
 ```
 
 * vector<double> X = input (either from previous layer or from dataset)
@@ -133,7 +133,7 @@ vector<double> loss_gradient = layer_instance(X,d_y);
 
 - batched backward pass:
 ```cpp
-vector<vector<double>> loss_gradient = layer_instance(X, d_y);
+vector<vector<double>> loss_gradient = layer_instance.backward(X, d_y);
 ```
 
 * vector<vector<double>> X = batched input (either from previous layer or from dataset)
@@ -258,17 +258,71 @@ spline_instance.apply_grad(lr)
 
 lr : float = learning rate (controls how strong the gradient affects the splines points)
 
-### layer documentation comming soon
+## layer
 
-layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every input<sub>i</sub> there exist m splines. 
-For example, given a layer with input size 3 and output size 2 a 3 by 2 matrix of splines is created. Now input<sub>i</sub> is given to the spline vector<sub>i</sub> so that all splines<sub>j</sub> get evaluated. the results of spline<sub>i,j</sub> is added to output<sub>j</sub>.
+layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every input<sub>i</sub> there exist n splines. 
 
-mathematically the output is defined like this:
+mathematically the output $y$ is defined like this:
 
 $$
 y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\}
 $$
 
+for example given input size 3 and output size 2, output<sub>1</sub> is the sum of splines<sub>i,1</sub> with i from 0 to 3 (input size)
+
+To create a new layer do:
+
+```python
+layer_instance = PySplineNetLib.layer(input_size, output_size, detail, max)
+```
+
+where:
+
+input_size : int = the size of the input vector
+output_size : int = the expected size of the output vector
+detail : int = the number of controlpoints for ecah spline (NOTE that the spline has detail + 2 points so to get 10 points detail shouod be 8)
+max : float = the maximum value that any spline in the layer can evaluate (recomended 1.0 combined with activations that map input and output to range(0,1))
+
+alternatively you can create a spline with start values for points and parameters like this:
+
+```python
+spline_instance = PySplineNetLib(points, parameters)
+```
+
+with:
+points : list = nested list of points like : (input_size, output_size, detail +2, 2 = x,y) 
+parameters : list = nested list of points like : (input_size, output_size, detail +1, 4)
+
+to fully init the layer call:
+
+```python
+layer_instance.interpolate_splines()
+```
+
+### forward pass
+
+```python
+pred = layer_instance.forward(X)
+```
+
+where
+
+X : list = single input vector or batched input vector
+pred : list = prediction vector (also with batch dimension if the input was batched)
+
+### backward pass
+
+```python
+d_y = layer_instance.backward(X, d_y)
+```
+
+where:
+
+X is the last inputvthis layer recieved
+d_y is the propagated gradient of the previous layer
+
+Note that backward will apply the gradient to all splines in the layer automatically
+
 ## install for c++
 
 ```txt

From 7a664fa1bdcb29ac14c6b403ae2ac1b4cd001217 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Thu, 13 Feb 2025 18:17:35 +0100
Subject: [PATCH 09/19] fixed bug where the CTensor would change after matmul
 backward. added clone mem function in CTensor,DTensor and  all Functin
 classes for this

---
 include/SplineNetLib/CTensor.hpp     | 13 +++++++++-
 include/SplineNetLib/CTensorFunc.hpp |  8 ++++++
 src/CTensor.tpp                      |  6 +++++
 src/CTensorFunc.tpp                  | 26 ++++++++++++++++---
 tests/unit_tests/py_spline_tests.py  | 38 +++++++++++++++++++++++++++-
 5 files changed, 85 insertions(+), 6 deletions(-)

diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp
index 862b915..272f40b 100644
--- a/include/SplineNetLib/CTensor.hpp
+++ b/include/SplineNetLib/CTensor.hpp
@@ -36,6 +36,13 @@ class DTensor{
     DTensor(const std::initializer_list<T>& data, const std::initializer_list<size_t>& shape) : 
     _data(data), _shape(shape), _ref_c(1) {}
     
+    DTensor(const DTensor<T>& other) : _data(other._data), _shape(other._shape), _grad(other._grad), _ref_c(1) {
+        // Deep copy unique_ptrs to grad fns by calling clone()
+        for (const auto& fn : other._grad_fn) {
+            _grad_fn.push_back(fn ? fn->clone() : nullptr);
+        }
+    }
+    
     void add_ref(){
         _ref_c++;
     }
@@ -53,7 +60,7 @@ template<Scalar T>
 class CTensor { 
 private:
     
-    
+    CTensor(DTensor<T>* _t_data) : _tensor_data(_t_data){}
     
 public:
 
@@ -80,6 +87,8 @@ class CTensor {
     }
     
     
+    
+    
     ~CTensor(){
         _tensor_data->rmf_ref();
     }
@@ -96,6 +105,8 @@ class CTensor {
     
     void zero_grad();
     
+    CTensor<T> clone();
+    
     //-----shape-utils-----
     
     void squeeze(const size_t &dim) ;//squeezes / removes the input dim and changes the internal projection shape
diff --git a/include/SplineNetLib/CTensorFunc.hpp b/include/SplineNetLib/CTensorFunc.hpp
index b3fb503..8b686f7 100644
--- a/include/SplineNetLib/CTensorFunc.hpp
+++ b/include/SplineNetLib/CTensorFunc.hpp
@@ -38,6 +38,8 @@ class Function {
     
     virtual void backward(std::vector<T> &prop_grad, CTensor<T> *result) = 0;
     
+    virtual std::unique_ptr<Function<T>> clone() const = 0;
+    
     static std::unordered_set<Function<T>*> global_chain;
     
     void clear_graph_f();
@@ -59,6 +61,8 @@ class AddFunction : public Function<T> {
     std::vector<T> fwd() override ;
     
     void backward(std::vector<T> &prop_grad, CTensor<T> *result) override;
+    
+    virtual std::unique_ptr<Function<T>> clone() const override;
 };
 
 //subtractor function class for CTensor<T>::operator-
@@ -74,6 +78,8 @@ class SubFunction : public Function<T> {
     
     void backward(std::vector<T> &prop_grad, CTensor<T> *result) override;
     
+    virtual std::unique_ptr<Function<T>> clone() const override;
+    
 };
 
 //matrix multiplication function class for CTensor<T>::operator*
@@ -88,6 +94,8 @@ class MatMulFunction : public Function<T> {
     std::vector<T> fwd() override;
     
     void backward(std::vector<T> &prop_grad, CTensor<T> *result) override;
+    
+    virtual std::unique_ptr<Function<T>> clone() const override;
 };
 
 } //namepace
diff --git a/src/CTensor.tpp b/src/CTensor.tpp
index 1fe0ce2..834ea71 100644
--- a/src/CTensor.tpp
+++ b/src/CTensor.tpp
@@ -264,6 +264,12 @@ void CTensor<T>::backward(std::vector<T> prop_grad) {
     }
     //std::cout<<"debug Ct bwd fn all bwd finish\n";
 }
+
+template<Scalar T>
+CTensor<T> CTensor<T>::clone() {
+    CTensor<T> Cloned_CTensor(new DTensor<T>(*_tensor_data));
+    return Cloned_CTensor;
+}
 /* untestee
 template<Scalar T>
 CTensor<T> zeros(std::vector<size_t> shape) {
diff --git a/src/CTensorFunc.tpp b/src/CTensorFunc.tpp
index 4640897..ebb1b6a 100644
--- a/src/CTensorFunc.tpp
+++ b/src/CTensorFunc.tpp
@@ -112,6 +112,12 @@ void AddFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result) {
     //std::cout<<"debug add bwd chain erase\n";
 }
 
+template<typename T>
+requires Scalar<T>
+std::unique_ptr<Function<T>> AddFunction<T>::clone() const {
+    return std::make_unique<AddFunction<T>>(*this);
+}
+
 
 template<typename T>
 requires Scalar<T>
@@ -185,6 +191,12 @@ void SubFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result) {
 }
 
 
+template<typename T>
+requires Scalar<T>
+std::unique_ptr<Function<T>> SubFunction<T>::clone() const {
+    return std::make_unique<SubFunction<T>>(*this);
+}
+
 template<typename T>
 requires Scalar<T>
 std::vector<T> MatMulFunction<T>::fwd() {
@@ -195,8 +207,8 @@ std::vector<T> MatMulFunction<T>::fwd() {
     size_t a_n_dims = a_shape.size();
     size_t b_n_dims = b_shape.size();
     
-    auto a_copy = *(this->a);
-    auto b_copy = *(this->b);
+    auto a_copy = this->a->clone();
+    auto b_copy = this->b->clone();
         
     if (a_n_dims != b_n_dims) {
         throw std::invalid_argument("operator (*) expects both opperants to have the same num of dimensions but got:"+std::to_string(a_n_dims)+"and "+std::to_string(b_n_dims)+",please ensure opperants dims match by using squeeze or unsqueeze beforehand\n");
@@ -256,7 +268,7 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
                 this->a->zero_grad();
             }
                     //create a copy of b and transpose it
-            auto b_copy = *(this->b);
+            auto b_copy = this->b->clone();
             b_copy.transpose();
             
             prop_grad_a = matmul(prop_grad, b_copy.data(), prop_grad_shape, b_copy.shape());
@@ -278,7 +290,7 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
                 this->b->zero_grad();
             }
                     //create a copy of b and transpose it
-            auto a_copy = *(this->a);
+            auto a_copy = this->a->clone();
             a_copy.transpose();
             //std::cout<<"b bwd a_copy shape :"<<vectorToString(a_copy.shape())<<" "<<vectorToString(prop_grad)<<"\n";
             prop_grad_b = matmul(a_copy.data(), prop_grad, a_copy.shape(), prop_grad_shape);
@@ -295,6 +307,12 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
     Function<T>::global_chain.erase(this);
 }
 
+template<typename T>
+requires Scalar<T>
+std::unique_ptr<Function<T>> MatMulFunction<T>::clone() const {
+    return std::make_unique<MatMulFunction<T>>(*this);
+}
+
 }//namespace
 
 #endif
\ No newline at end of file
diff --git a/tests/unit_tests/py_spline_tests.py b/tests/unit_tests/py_spline_tests.py
index 0811244..b324de9 100644
--- a/tests/unit_tests/py_spline_tests.py
+++ b/tests/unit_tests/py_spline_tests.py
@@ -15,4 +15,40 @@ def Spline_init_Test(self):
         A.interpolation() #fimds new params for the new spline
         self.assertAlmostEqual([0.0, 3.5, 0.0, -2.0],[1.5, 2.0, -3.0, 2.0], A.get_params())
         self.assertAlmostEqual([[0.0, 0.0], [0.5, 1.5], [1.0, 2.0]], A.get_points())
-        
\ No newline at end of file
+
+class CTensor_Test(unittest.TestCase):
+    
+    def CTensor_init_Test(self):
+        a = PySplineNetLib.CTensor([[1,2,3],[4,5,6]])
+        self.assertAlmostEqual([1,2,3,4,5,6], a.data())
+        self.assertAlmostEqual([2,3], a.data())
+        b = PySplineNetLib.CTensor([6,5,4,3,2,1],[3,2])
+        self.assertAlmostEqual([6,5,4,3,2,1], b.data())
+        self.assertAlmostEqual([3,2], b.shape())
+        c = PySplineNetLib.CTensor(a)
+        self.assertAlmostEqual([1,2,3,4,5,6], c.data())
+        self.assertAlmostEqual([2,3], c.shape())
+        
+    def CTensor_math_Test(self):
+        a = PySplineNetLib.CTensor([[1,2,3],[4,5,6]])
+        b = PySplineNetLib.CTensor([[6,5,4],[3,2,1]])
+        
+        c = a + b;
+        self.assertAlmostEqual([7,7,7,7,7,7], c.data())
+        self.assertAlmostEqual([2,3], c.shape())
+        
+        b.transpose()
+        d = a * b;
+        self.assertAlmostEqual([28.0, 10.0, 73.0, 28.0], d.data())
+        self.assertAlmostEqual([2,2], d.shape())
+        
+        b.transpose()
+        e = a - b;
+        self.assertAlmostEqual([-5.0, -3.0, -1.0, 1.0, 3.0, 5.0], e.data())
+        self.assertAlmostEqual([2,2], e.shape())
+        
+    def Ctensor_grad_Test(self):
+        a = PySplineNetLib.CTensor([[2,2,2],[2,2,2]])
+        b = PySplineNetLib.CTensor([[1,2],[3,4],[5,6]])
+        c = PySplineNetLib.CTensor([[0.5,0.5],[0.5,0.5]])
+        d = a * b + c
\ No newline at end of file

From e3f50b4b7f0d9947db10e4158da1bfd65fa9bdc3 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Sat, 15 Feb 2025 13:53:58 +0100
Subject: [PATCH 10/19] added minimal toml file for pip install

---
 PySplineNetLib.egg-info/PKG-INFO    | 3 ++-
 PySplineNetLib.egg-info/SOURCES.txt | 1 +
 pyproject.toml                      | 3 +++
 3 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 pyproject.toml

diff --git a/PySplineNetLib.egg-info/PKG-INFO b/PySplineNetLib.egg-info/PKG-INFO
index ef7e153..14c402b 100644
--- a/PySplineNetLib.egg-info/PKG-INFO
+++ b/PySplineNetLib.egg-info/PKG-INFO
@@ -1,5 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: PySplineNetLib
 Version: 0.1
 License-File: LICENSE
 Requires-Dist: pybind11>=2.6.0
+Dynamic: requires-dist
diff --git a/PySplineNetLib.egg-info/SOURCES.txt b/PySplineNetLib.egg-info/SOURCES.txt
index 07ef532..ddcf9e8 100644
--- a/PySplineNetLib.egg-info/SOURCES.txt
+++ b/PySplineNetLib.egg-info/SOURCES.txt
@@ -1,4 +1,5 @@
 LICENSE
+pyproject.toml
 setup.py
 PySplineNetLib.egg-info/PKG-INFO
 PySplineNetLib.egg-info/SOURCES.txt
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..9c5a55d
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools>=42", "wheel", "pybind11"]
+build-backend = "setuptools.build_meta"
\ No newline at end of file

From 684249db030a82b732b31026aae58285db0ad4f3 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Sat, 15 Feb 2025 14:13:36 +0100
Subject: [PATCH 11/19] minor adjustments to gitignore

---
 .gitignore                       | 1 +
 PySplineNetLib.egg-info/PKG-INFO | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index de7e739..fa6e840 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@ CTestTestfile.cmake
 *.user
 *.swp
 *.tmp
+*.egg-info
 
 # IDE specific files (if you're using any IDE)
 # Uncomment for Visual Studio Code
diff --git a/PySplineNetLib.egg-info/PKG-INFO b/PySplineNetLib.egg-info/PKG-INFO
index 14c402b..ef7e153 100644
--- a/PySplineNetLib.egg-info/PKG-INFO
+++ b/PySplineNetLib.egg-info/PKG-INFO
@@ -1,6 +1,5 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.1
 Name: PySplineNetLib
 Version: 0.1
 License-File: LICENSE
 Requires-Dist: pybind11>=2.6.0
-Dynamic: requires-dist

From e90cba69c1139cad6cb8b45a093cdcee0d4c2f85 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Sat, 15 Feb 2025 15:00:51 +0100
Subject: [PATCH 12/19] more tests for the py version

---
 tests/unit_tests/py_spline_tests.py | 47 +++++++++++++++++------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/tests/unit_tests/py_spline_tests.py b/tests/unit_tests/py_spline_tests.py
index b324de9..508454c 100644
--- a/tests/unit_tests/py_spline_tests.py
+++ b/tests/unit_tests/py_spline_tests.py
@@ -3,7 +3,7 @@
 
 class Spline_Test(unittest.TestCase):
     
-    def Spline_init_Test(self):
+    def test_Spline_init_Test(self):
         A = PySplineNetLib.spline([[0,0],[0.5,1],[1,2]],[[0,0,0,0],[0,0,0,0]])
         A.interpolation()
         a : float = A.forward(0.25)
@@ -13,42 +13,51 @@ def Spline_init_Test(self):
         self.assertAlmostEqual(-0.5, a_y, delta = 0.000001)
         A.apply_grad(1) #applies the gradient with factor 1.0 (moves y_i at x_i > 0.25 by -1 *grad {same as sign(grad)})
         A.interpolation() #fimds new params for the new spline
-        self.assertAlmostEqual([0.0, 3.5, 0.0, -2.0],[1.5, 2.0, -3.0, 2.0], A.get_params())
-        self.assertAlmostEqual([[0.0, 0.0], [0.5, 1.5], [1.0, 2.0]], A.get_points())
+        self.assertListEqual([[0.0, 0.5, 0.0, 2.0], [0.5, 2.0, 3.0, -2.0]], A.get_params())
+        self.assertListEqual([[0.0, 0.0], [0.5, 0.5], [1.0, 2.0]], A.get_points())
 
 class CTensor_Test(unittest.TestCase):
     
-    def CTensor_init_Test(self):
+    def test_CTensor_init_Test(self):
         a = PySplineNetLib.CTensor([[1,2,3],[4,5,6]])
-        self.assertAlmostEqual([1,2,3,4,5,6], a.data())
-        self.assertAlmostEqual([2,3], a.data())
+        self.assertListEqual([1,2,3,4,5,6], a.data())
+        self.assertListEqual([2,3], a.shape())
         b = PySplineNetLib.CTensor([6,5,4,3,2,1],[3,2])
-        self.assertAlmostEqual([6,5,4,3,2,1], b.data())
-        self.assertAlmostEqual([3,2], b.shape())
+        self.assertListEqual([6,5,4,3,2,1], b.data())
+        self.assertListEqual([3,2], b.shape())
         c = PySplineNetLib.CTensor(a)
-        self.assertAlmostEqual([1,2,3,4,5,6], c.data())
-        self.assertAlmostEqual([2,3], c.shape())
+        self.assertListEqual([1,2,3,4,5,6], c.data())
+        self.assertListEqual([2,3], c.shape())
         
-    def CTensor_math_Test(self):
+    def test_CTensor_math_Test(self):
         a = PySplineNetLib.CTensor([[1,2,3],[4,5,6]])
         b = PySplineNetLib.CTensor([[6,5,4],[3,2,1]])
         
         c = a + b;
-        self.assertAlmostEqual([7,7,7,7,7,7], c.data())
-        self.assertAlmostEqual([2,3], c.shape())
+        self.assertListEqual([7,7,7,7,7,7], c.data())
+        self.assertListEqual([2,3], c.shape())
         
         b.transpose()
         d = a * b;
-        self.assertAlmostEqual([28.0, 10.0, 73.0, 28.0], d.data())
-        self.assertAlmostEqual([2,2], d.shape())
+        self.assertListEqual([28.0, 10.0, 73.0, 28.0], d.data())
+        self.assertListEqual([2,2], d.shape())
         
         b.transpose()
         e = a - b;
-        self.assertAlmostEqual([-5.0, -3.0, -1.0, 1.0, 3.0, 5.0], e.data())
-        self.assertAlmostEqual([2,2], e.shape())
+        self.assertListEqual([-5.0, -3.0, -1.0, 1.0, 3.0, 5.0], e.data())
+        self.assertListEqual([2,3], e.shape())
         
-    def Ctensor_grad_Test(self):
+    def test_Ctensor_grad_Test(self):
         a = PySplineNetLib.CTensor([[2,2,2],[2,2,2]])
         b = PySplineNetLib.CTensor([[1,2],[3,4],[5,6]])
         c = PySplineNetLib.CTensor([[0.5,0.5],[0.5,0.5]])
-        d = a * b + c
\ No newline at end of file
+        d = a * b + c
+        self.assertListEqual([18.5, 24.5, 18.5, 24.5],d.data())
+        self.assertListEqual([2,2],d.shape())
+        d.backward()
+        self.assertListEqual([3.0, 7.0, 11.0, 3.0, 7.0, 11.0], a.grad())
+        self.assertListEqual([4.0, 4.0, 4.0, 4.0, 4.0, 4.0], b.grad())
+        self.assertListEqual([1.0, 1.0, 1.0, 1.0], c.grad())
+        
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

From b2db021f76e050245d6f0b93eda842667311527e Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Sat, 15 Feb 2025 21:36:29 +0100
Subject: [PATCH 13/19] minor changes

---
 PySplineNetLib.egg-info/PKG-INFO             | 5 -----
 PySplineNetLib.egg-info/SOURCES.txt          | 9 ---------
 PySplineNetLib.egg-info/dependency_links.txt | 1 -
 PySplineNetLib.egg-info/requires.txt         | 1 -
 PySplineNetLib.egg-info/top_level.txt        | 1 -
 5 files changed, 17 deletions(-)
 delete mode 100644 PySplineNetLib.egg-info/PKG-INFO
 delete mode 100644 PySplineNetLib.egg-info/SOURCES.txt
 delete mode 100644 PySplineNetLib.egg-info/dependency_links.txt
 delete mode 100644 PySplineNetLib.egg-info/requires.txt
 delete mode 100644 PySplineNetLib.egg-info/top_level.txt

diff --git a/PySplineNetLib.egg-info/PKG-INFO b/PySplineNetLib.egg-info/PKG-INFO
deleted file mode 100644
index ef7e153..0000000
--- a/PySplineNetLib.egg-info/PKG-INFO
+++ /dev/null
@@ -1,5 +0,0 @@
-Metadata-Version: 2.1
-Name: PySplineNetLib
-Version: 0.1
-License-File: LICENSE
-Requires-Dist: pybind11>=2.6.0
diff --git a/PySplineNetLib.egg-info/SOURCES.txt b/PySplineNetLib.egg-info/SOURCES.txt
deleted file mode 100644
index ddcf9e8..0000000
--- a/PySplineNetLib.egg-info/SOURCES.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-LICENSE
-pyproject.toml
-setup.py
-PySplineNetLib.egg-info/PKG-INFO
-PySplineNetLib.egg-info/SOURCES.txt
-PySplineNetLib.egg-info/dependency_links.txt
-PySplineNetLib.egg-info/requires.txt
-PySplineNetLib.egg-info/top_level.txt
-src/SplineNetLib_py.cpp
\ No newline at end of file
diff --git a/PySplineNetLib.egg-info/dependency_links.txt b/PySplineNetLib.egg-info/dependency_links.txt
deleted file mode 100644
index 8b13789..0000000
--- a/PySplineNetLib.egg-info/dependency_links.txt
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/PySplineNetLib.egg-info/requires.txt b/PySplineNetLib.egg-info/requires.txt
deleted file mode 100644
index d89789f..0000000
--- a/PySplineNetLib.egg-info/requires.txt
+++ /dev/null
@@ -1 +0,0 @@
-pybind11>=2.6.0
diff --git a/PySplineNetLib.egg-info/top_level.txt b/PySplineNetLib.egg-info/top_level.txt
deleted file mode 100644
index 4457d5c..0000000
--- a/PySplineNetLib.egg-info/top_level.txt
+++ /dev/null
@@ -1 +0,0 @@
-PySplineNetLib

From 188090688826b396ee385159d357ec24cad5dc8d Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Sat, 15 Feb 2025 21:50:29 +0100
Subject: [PATCH 14/19] reconstructing the documentation into multiple specific
 .md files

---
 docs/README.md => README.md | 152 +-----------------------------------
 docs/cpp_splines.md         | 147 ++++++++++++++++++++++++++++++++++
 2 files changed, 148 insertions(+), 151 deletions(-)
 rename docs/README.md => README.md (55%)
 create mode 100644 docs/cpp_splines.md

diff --git a/docs/README.md b/README.md
similarity index 55%
rename from docs/README.md
rename to README.md
index 1f9e623..52f9e62 100644
--- a/docs/README.md
+++ b/README.md
@@ -4,11 +4,7 @@
 
 [goals](#goals)
 
-[C++ documentation](#C-Implementationdocumentation)
-
-1. [splines](#splines)
-2. [layers](#layers)
-3. [network](#Network)
+[C++ spline documentation](docs/cpp_spline.md)
 
 [python documentation](#python-Implementationdocumentation)
 
@@ -33,152 +29,6 @@ see [install for python](#install-for-python) to install
 2. achieve similar or better precision to common deep learning approaches whilst keeping the structure as light-wheight and fast as possible.
 3. allow easy adaptability to existing architectures like convolutional and recurrent networks.
 
-## C++ Implementation/documentation
-
-### Splines
-The splines are the main computation unit of a layer. They allow for an easily adjustable and visualizable alternative to wheight matricies.
-To create a spline call:
-```cpp
-SplineNetLib::spline spline_instance = spline(points,parameters);
-```
-where points and parameters are vectors of shapes:
-
-$$
-( \text{output size}, \text{input size}, 2)
-$$
-
-and
-
-$$
-( \text{output size},\text{input size}, 4)
-$$
-
-**Note** that the x values of the points list must be sorted from smallest to biggest.
-
-to fully initialize the spline call:
-```cpp
-Spline_instance.interpolate();
-```
-this, although not always nessecery  will adjust the parameters with respect to the points.
-
-To evaluate the spline at point x do:
-```cpp
-double y = Spline_instance.forward(x)
-```
-**Note** that x must be between 0 and the largest x value in the splines points list. Trying to access x values outside the spline will result in an error.
-
-To perform a backward pass call:
-```cpp
-double loss_grad = spline.backward(x,d_y,lr)
-```
-* double x = input
-* double d_y = loss Gradient of the next layer
-* double lr = learning rate
-
-### layers
-A layer uses splines as substitution for wheight and bias matricies.
-Layers are implemented similar to torch.nn.linear();
-To create a new layer call:
-```cpp
-SplineNetLib::layer layer_instance = layer(in_size,out_size,detail,max);
-```
-* unsigned int in_size = num of elements in the input vector
-* unsigned int out_size = num of elements in the target vector (like neurons in linear)
-* unsigned int detail = num of controlpoints (exept for default points at 0,0 and max,0)
-* double max = Maximum x value (recomended to be 1.0)
-
-To load a layer from previously found points call:
-```cpp
-SplineNetLib::layer layer_instance = layer(points,parameters);
-```
-**assuming namespace std**
-* vector<vector<vector<vector<double>>>> points ({{{{x,y},...},...},...}) = Matrix like (input size • output size • detail + 2 • 2)
-* vector<vector<vector<vector<double>>>> parameters ({{{{0,0,0,0},...},...},...} = Matrix like (input size • output size • detail + 1 • 4)
-
-To fully init a layer call:
-```cpp
-layer_instance.interpolate_splines();
-```
-**Single layer training:**
-
-- single sample forward pass:
-
-**assuming namespace std**
-```cpp
-vector<double> pred = layer_instance.forward(X, normalize);
-```
-* vector<double> X = input vector (with size == layer input size)
-* bool normalize = output normalization (if True output will be between 0 and 1)
-* pred.size() == layer output size
-
-- batched forward pass:
-```cpp
-vector<vector<double>> pred = layer_instance.forward(X, normalize);
-```
-* vector<vector<double>> X = batched input (with size == batch size , layer input size)
-* bool normalize = output normalization (if True output will be between 0 and 1)
-* pred.size() = batch size
-* pred[0].size() = layer output size
-
-- single sample backward pass:
-
-**assuming namespace std**
-```cpp
-vector<double> loss_gradient = layer_instance.backward(X,d_y);
-```
-
-* vector<double> X = input (either from previous layer or from dataset)
-* vector<double> d_y = loss_gradient (from next layer or loss function)
-* loss_gradient == d_y for the previous layers backward pass
-
-- batched backward pass:
-```cpp
-vector<vector<double>> loss_gradient = layer_instance.backward(X, d_y);
-```
-
-* vector<vector<double>> X = batched input (either from previous layer or from dataset)
-* vector<vector<double>> d_y = batched loss_gradient (from next layer or from loss function)
-* loss_gradient == d_y for the previous layer backward pass (propagated gradient)
-
-**layer size:**
-
-$$
-\text{layer parameters} = \text{input size} × \text{output size} × (\text{detail} + 2) × 2 + \text{input size} * \text{output size} × (\text{detail} + 1) × 4
-$$
-
-### Network
-
-To create a spline network call
-```cpp
-SplineNetLib::nn network_instance = nn(num_layers,input_sizes,output_sizes,details,max_values)
-```
-**assuming namespace std**
-* int num_layers = number of layers the network is supposed to have
-* vector<unsigned int> input_sizes = input_sizes for the layer at each index (e.g. {2,3} layer 0 takes 2 inputs)
-* vector<unsigned int> output_sizes = output_sizes for each layer
-* vector<double> details = detail for each layer
-* vector<double> max_values = max value for each layer (best to set all layers except last to 1.0 and use activation functions to normalize the output between 0 and 1)
-
-**Training**
-
-- forward pass:
-
-  ```cpp
-  std::vector<double> pred = network_instance.forward(X, normalize)
-  ```
-  * vector<double> X = input
-  * bool normalize = normalize outputs (not recommended better use activation functions and itterate manually over the layers)
- 
-- backwards pass
-
-```cpp
-std::vector<double> loss_gradient = network_instance.backward(X,d_y)
-```
-* std::vector<double> X = forward prediction
-* std::vector<double> d_y = loss_gradient
-
-(when using the manual approach meaning iterating manually over layers to apply activations you have to do the backward pass manually aswell.)
-
 ## python Implementation/documentation
 
 ### import
diff --git a/docs/cpp_splines.md b/docs/cpp_splines.md
new file mode 100644
index 0000000..b193049
--- /dev/null
+++ b/docs/cpp_splines.md
@@ -0,0 +1,147 @@
+### splines
+
+The splines are the main computation unit of a layer. They allow for an easily adjustable and visualizable alternative to wheight matricies.
+To create a spline call:
+```cpp
+SplineNetLib::spline spline_instance = spline(points,parameters);
+```
+where points and parameters are vectors of shapes:
+
+$$
+( \text{output size}, \text{input size}, 2)
+$$
+
+and
+
+$$
+( \text{output size},\text{input size}, 4)
+$$
+
+**Note** that the x values of the points list must be sorted from smallest to biggest.
+
+to fully initialize the spline call:
+```cpp
+Spline_instance.interpolate();
+```
+this, although not always nessecery  will adjust the parameters with respect to the points.
+
+To evaluate the spline at point x do:
+```cpp
+double y = Spline_instance.forward(x)
+```
+**Note** that x must be between 0 and the largest x value in the splines points list. Trying to access x values outside the spline will result in an error.
+
+To perform a backward pass call:
+```cpp
+double loss_grad = spline.backward(x,d_y,lr)
+```
+* double x = input
+* double d_y = loss Gradient of the next layer
+* double lr = learning rate
+
+### layers
+
+A layer uses splines as substitution for wheight and bias matricies.
+Layers are implemented similar to torch.nn.linear();
+To create a new layer call:
+```cpp
+SplineNetLib::layer layer_instance = layer(in_size,out_size,detail,max);
+```
+* unsigned int in_size = num of elements in the input vector
+* unsigned int out_size = num of elements in the target vector (like neurons in linear)
+* unsigned int detail = num of controlpoints (exept for default points at 0,0 and max,0)
+* double max = Maximum x value (recomended to be 1.0)
+
+To load a layer from previously found points call:
+```cpp
+SplineNetLib::layer layer_instance = layer(points,parameters);
+```
+**assuming namespace std**
+* vector<vector<vector<vector<double>>>> points ({{{{x,y},...},...},...}) = Matrix like (input size • output size • detail + 2 • 2)
+* vector<vector<vector<vector<double>>>> parameters ({{{{0,0,0,0},...},...},...} = Matrix like (input size • output size • detail + 1 • 4)
+
+To fully init a layer call:
+```cpp
+layer_instance.interpolate_splines();
+```
+**Single layer training:**
+
+- single sample forward pass:
+
+**assuming namespace std**
+```cpp
+vector<double> pred = layer_instance.forward(X, normalize);
+```
+* vector<double> X = input vector (with size == layer input size)
+* bool normalize = output normalization (if True output will be between 0 and 1)
+* pred.size() == layer output size
+
+- batched forward pass:
+```cpp
+vector<vector<double>> pred = layer_instance.forward(X, normalize);
+```
+* vector<vector<double>> X = batched input (with size == batch size , layer input size)
+* bool normalize = output normalization (if True output will be between 0 and 1)
+* pred.size() = batch size
+* pred[0].size() = layer output size
+
+- single sample backward pass:
+
+**assuming namespace std**
+```cpp
+vector<double> loss_gradient = layer_instance.backward(X,d_y);
+```
+
+* vector<double> X = input (either from previous layer or from dataset)
+* vector<double> d_y = loss_gradient (from next layer or loss function)
+* loss_gradient == d_y for the previous layers backward pass
+
+- batched backward pass:
+```cpp
+vector<vector<double>> loss_gradient = layer_instance.backward(X, d_y);
+```
+
+* vector<vector<double>> X = batched input (either from previous layer or from dataset)
+* vector<vector<double>> d_y = batched loss_gradient (from next layer or from loss function)
+* loss_gradient == d_y for the previous layer backward pass (propagated gradient)
+
+**layer size:**
+
+$$
+\text{layer parameters} = \text{input size} × \text{output size} × (\text{detail} + 2) × 2 + \text{input size} * \text{output size} × (\text{detail} + 1) × 4
+$$
+
+### Network
+
+To create a spline network call
+```cpp
+SplineNetLib::nn network_instance = nn(num_layers,input_sizes,output_sizes,details,max_values)
+```
+**assuming namespace std**
+* int num_layers = number of layers the network is supposed to have
+* vector<unsigned int> input_sizes = input_sizes for the layer at each index (e.g. {2,3} layer 0 takes 2 inputs)
+* vector<unsigned int> output_sizes = output_sizes for each layer
+* vector<double> details = detail for each layer
+* vector<double> max_values = max value for each layer (best to set all layers except last to 1.0 and use activation functions to normalize the output between 0 and 1)
+
+**Training**
+
+- forward pass:
+
+  ```cpp
+  std::vector<double> pred = network_instance.forward(X, normalize)
+  ```
+  * vector<double> X = input
+  * bool normalize = normalize outputs (not recommended better use activation functions and itterate manually over the layers)
+ 
+- backwards pass
+
+```cpp
+std::vector<double> loss_gradient = network_instance.backward(X,d_y)
+```
+* std::vector<double> X = forward prediction
+* std::vector<double> d_y = loss_gradient
+
+(when using the manual approach meaning iterating manually over layers to apply activations you have to do the backward pass manually aswell.)
+
+[<- back to  Documentation](../README.md)
\ No newline at end of file

From f8bee993c48855ecd0efd2f694f7958051937a07 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Sat, 15 Feb 2025 21:55:59 +0100
Subject: [PATCH 15/19] reconstructing the documentation into multiple specific
 .md files

---
 README.md          | 150 +--------------------------------------------
 docs/py_splines.md | 146 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 148 insertions(+), 148 deletions(-)
 create mode 100644 docs/py_splines.md

diff --git a/README.md b/README.md
index 52f9e62..2464417 100644
--- a/README.md
+++ b/README.md
@@ -4,12 +4,10 @@
 
 [goals](#goals)
 
-[C++ spline documentation](docs/cpp_spline.md)
+[C++ spline documentation](docs/cpp_splines.md)
 
-[python documentation](#python-Implementationdocumentation)
+[python spline documentation](docs/py_splines.md)
 
-1. [splines](#splines-2)
-2. [layers](#layer-documentation-comming-soon)
 
 ## New:
 
@@ -29,150 +27,6 @@ see [install for python](#install-for-python) to install
 2. achieve similar or better precision to common deep learning approaches whilst keeping the structure as light-wheight and fast as possible.
 3. allow easy adaptability to existing architectures like convolutional and recurrent networks.
 
-## python Implementation/documentation
-
-### import
-
-```python
-import PySplineNetLib as some_name
-```
-
-### Splines
-Splines are the main computation unit for this approach, they esentially provide a easily visualizable alterp to wheight matricies
-
-- spline creation:
-```python
-spline_instance = PySplineNetLib.spline(points,parameters)
-```
-* points : list = list of points like (num points, 2)
-* parameters : list = list of parameters like (num points - 1, 4)
-
-**full example**
-
-```python
-points : list = [[0.0,0.0],[0.5,0.25],[1.0,1.0]]
-parameters : list = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0]]
-
-spline_instance = PySplineNetLib.spline(points,parameters)
-```
-
-or alternatively do:
-
-```python
-spline_instance = PySplineNetLib.spline([[0.0,0.0],[0.5,0.25],[1.0,1.0]],[[0.0]*4]*2)
-```
-
-- spline interpolation:
-
-to properly init a spline call .interpolation()
-
-```python
-spline_instance.interpolation()
-```
-
-this ensures that the parameters are properly set for evaluation and training
-
-- spline forward pass / evaluation:
-
-to evaluate the spline at x call
-
-```python
-y : float = spline_instance.forward(x)
-```
-
-x : float = point to be evaluated
-
-- spline backward / gradient propagation:
-
-to find the splines gradient based on a give loss grad at spline point (x,y) call
-
-```python
-d_y : float = spline_instance.backward(x, d_y, y)
-```
-x : float = point that was last evaluated
-
-y : float = actual target 
-
-d_y : float = gradient of loss with (x,target) with respect to spline (x,y) (=> loss.backward() or d_y of next layer)
-
-**Note :**
-
-The gradient of this function call is internally stored in the spline.
-
-- adjust spline based on gradient
-
-to apply the gradient from .backward and adjust the spline call:
-```python
-spline_instance.apply_grad(lr)
-```
-
-lr : float = learning rate (controls how strong the gradient affects the splines points)
-
-## layer
-
-layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every input<sub>i</sub> there exist n splines. 
-
-mathematically the output $y$ is defined like this:
-
-$$
-y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\}
-$$
-
-for example given input size 3 and output size 2, output<sub>1</sub> is the sum of splines<sub>i,1</sub> with i from 0 to 3 (input size)
-
-To create a new layer do:
-
-```python
-layer_instance = PySplineNetLib.layer(input_size, output_size, detail, max)
-```
-
-where:
-
-input_size : int = the size of the input vector
-output_size : int = the expected size of the output vector
-detail : int = the number of controlpoints for ecah spline (NOTE that the spline has detail + 2 points so to get 10 points detail shouod be 8)
-max : float = the maximum value that any spline in the layer can evaluate (recomended 1.0 combined with activations that map input and output to range(0,1))
-
-alternatively you can create a spline with start values for points and parameters like this:
-
-```python
-spline_instance = PySplineNetLib(points, parameters)
-```
-
-with:
-points : list = nested list of points like : (input_size, output_size, detail +2, 2 = x,y) 
-parameters : list = nested list of points like : (input_size, output_size, detail +1, 4)
-
-to fully init the layer call:
-
-```python
-layer_instance.interpolate_splines()
-```
-
-### forward pass
-
-```python
-pred = layer_instance.forward(X)
-```
-
-where
-
-X : list = single input vector or batched input vector
-pred : list = prediction vector (also with batch dimension if the input was batched)
-
-### backward pass
-
-```python
-d_y = layer_instance.backward(X, d_y)
-```
-
-where:
-
-X is the last inputvthis layer recieved
-d_y is the propagated gradient of the previous layer
-
-Note that backward will apply the gradient to all splines in the layer automatically
-
 ## install for c++
 
 ```txt
diff --git a/docs/py_splines.md b/docs/py_splines.md
new file mode 100644
index 0000000..9805eb1
--- /dev/null
+++ b/docs/py_splines.md
@@ -0,0 +1,146 @@
+
+## python Implementation/documentation
+
+### import
+
+```python
+import PySplineNetLib as some_name
+```
+
+### Splines
+Splines are the main computation unit for this approach, they esentially provide a easily visualizable alterp to wheight matricies
+
+- spline creation:
+```python
+spline_instance = PySplineNetLib.spline(points,parameters)
+```
+* points : list = list of points like (num points, 2)
+* parameters : list = list of parameters like (num points - 1, 4)
+
+**full example**
+
+```python
+points : list = [[0.0,0.0],[0.5,0.25],[1.0,1.0]]
+parameters : list = [[0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0]]
+
+spline_instance = PySplineNetLib.spline(points,parameters)
+```
+
+or alternatively do:
+
+```python
+spline_instance = PySplineNetLib.spline([[0.0,0.0],[0.5,0.25],[1.0,1.0]],[[0.0]*4]*2)
+```
+
+- spline interpolation:
+
+to properly init a spline call .interpolation()
+
+```python
+spline_instance.interpolation()
+```
+
+this ensures that the parameters are properly set for evaluation and training
+
+- spline forward pass / evaluation:
+
+to evaluate the spline at x call
+
+```python
+y : float = spline_instance.forward(x)
+```
+
+x : float = point to be evaluated
+
+- spline backward / gradient propagation:
+
+to find the splines gradient based on a give loss grad at spline point (x,y) call
+
+```python
+d_y : float = spline_instance.backward(x, d_y, y)
+```
+x : float = point that was last evaluated
+
+y : float = actual target 
+
+d_y : float = gradient of loss with (x,target) with respect to spline (x,y) (=> loss.backward() or d_y of next layer)
+
+**Note :**
+
+The gradient of this function call is internally stored in the spline.
+
+- adjust spline based on gradient
+
+to apply the gradient from .backward and adjust the spline call:
+```python
+spline_instance.apply_grad(lr)
+```
+
+lr : float = learning rate (controls how strong the gradient affects the splines points)
+
+## layer
+
+layers combine multiple splines to map an input vector of size m to an output vector of size n by evaluating splines at the input values and combining these outputs into the output. To achieve this the layer uses an m x n spline matrix where for every input<sub>i</sub> there exist n splines. 
+
+mathematically the output $y$ is defined like this:
+
+$$
+y_j = \sum_{i=1}^{m} S_{i,j}(x_i), \quad \forall j \in \{1, \dots, n\}
+$$
+
+for example given input size 3 and output size 2, output<sub>1</sub> is the sum of splines<sub>i,1</sub> with i from 0 to 3 (input size)
+
+To create a new layer do:
+
+```python
+layer_instance = PySplineNetLib.layer(input_size, output_size, detail, max)
+```
+
+where:
+
+input_size : int = the size of the input vector
+output_size : int = the expected size of the output vector
+detail : int = the number of controlpoints for ecah spline (NOTE that the spline has detail + 2 points so to get 10 points detail shouod be 8)
+max : float = the maximum value that any spline in the layer can evaluate (recomended 1.0 combined with activations that map input and output to range(0,1))
+
+alternatively you can create a spline with start values for points and parameters like this:
+
+```python
+spline_instance = PySplineNetLib(points, parameters)
+```
+
+with:
+points : list = nested list of points like : (input_size, output_size, detail +2, 2 = x,y) 
+parameters : list = nested list of points like : (input_size, output_size, detail +1, 4)
+
+to fully init the layer call:
+
+```python
+layer_instance.interpolate_splines()
+```
+
+### forward pass
+
+```python
+pred = layer_instance.forward(X)
+```
+
+where
+
+X : list = single input vector or batched input vector
+pred : list = prediction vector (also with batch dimension if the input was batched)
+
+### backward pass
+
+```python
+d_y = layer_instance.backward(X, d_y)
+```
+
+where:
+
+X is the last inputvthis layer recieved
+d_y is the propagated gradient of the previous layer
+
+Note that backward will apply the gradient to all splines in the layer automatically
+
+[<- back to Documentation](../README.md)
\ No newline at end of file

From 1604ce3862ba707dbe4a71c74945b7802bbab9c8 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Sat, 15 Feb 2025 22:58:18 +0100
Subject: [PATCH 16/19] reconstructing the documentation into multiple specific
 .md files

---
 README.md                        |   4 +
 docs/cpp_CTensor.md              | 135 +++++++++++++++++++++++++++++++
 docs/cpp_splines.md              |   2 +-
 docs/py_splines.md               |   2 +-
 include/SplineNetLib/CTensor.hpp |   1 +
 5 files changed, 142 insertions(+), 2 deletions(-)
 create mode 100644 docs/cpp_CTensor.md

diff --git a/README.md b/README.md
index 2464417..1193d67 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,10 @@ see [install for python](#install-for-python) to install
 
 * batch compatibility for layers 
 
+* CTensor class (tensor class with automatic computation graph and gradient propagation)
+
+* python version for CTensor
+
 **documentation was not yet updated some features might have changed and new features were added**
 
 **updates will follow soon**
diff --git a/docs/cpp_CTensor.md b/docs/cpp_CTensor.md
new file mode 100644
index 0000000..e9cc365
--- /dev/null
+++ b/docs/cpp_CTensor.md
@@ -0,0 +1,135 @@
+## CPP CTensor Documentation
+
+### include
+
+first include the library header
+
+```cpp 
+#include "SplineNetLib/SplineNet.hpp"
+```
+
+### CTensor constructors
+
+The CTensor class is usefull to perform tensor operations while automatically tracking the operations that a CTensor was involved with.
+A CTensor stores the N dimensional data in a flat projected vector (std::vector<T>) alongside it's actual shape (std::vector<size_t>).
+It will also store all arithmetic functions that it was used in or created from in a grad_fn vector (std::vector<std::unique_ptr<Function<T>>>). Important to note here is that a CTensor only gets a new grad_fn if it was the direct result of an operation (e.g. c = a + b , here only c gets the grad_fn entry).
+grad_fns are classes that hold information about the parents of a CTensor (e.g. c = a + b, here c gets a new grad_fn that knows that a and b are the parents). They also have functions that determine the behaviour of the gradient propagation. 
+Calling the backward function on one CTensor will automatically calculate the respective gradients of all other CTensors in the graph.
+
+**Note** that the CTensor architecture was inspired by the pytorch tensor architecture. Read more here : [pytorch](https://github.com/pytorch/pytorch)
+
+CTensors have multiple constructor options:
+
+1: construct from nested vector
+
+```cpp
+std::vecor<std::vector<float>> data = {{1,2,3},{4,5,6}};
+
+auto CTensor_instance = SplineNetLib::CTensor(data);
+```
+
+this creates a CTensor of shape {2,3}.
+**Note** that new CTensors always have their requires gradient flag set to True.
+
+2: construct from flat initializer list with initializer list of shape:
+
+```cpp
+auto CTensor_instance = SplineNetLib::CTensor({1.0,2.0,3.0,4.0,5.0,6.0}, {2,3});
+```
+
+this will result in the same CTensor as in the previous constructor
+
+3: construct from flat vector and shape
+
+```cpp
+std::vector<size_t> shape = {2,3};
+std::vector<float> data = {{1,2,3},{4,5,6}};
+
+auto CTensor_instance = SplineNetLib::CTensor(data, shape);
+```
+
+4: construct from existing CTensor (shallow copy)
+
+```cpp
+auto first_CTensor = SplineNetLib::CTensor({1.0,2.0,3.0,4.0,5.0,6.0}, {2,3});
+
+auto new_CTensor = SplineNetLib::CTensor(first_CTensor);
+```
+
+**Note** this creates a shallow copy any changes to each will affect the other
+
+4.1: deep copy / clone
+
+If a exact copy of a CTensor, that is independent, is needed do:
+
+```cpp
+auto first_CTensor = SplineNetLib::CTensor({1.0,2.0,3.0,4.0,5.0,6.0}, {2,3});
+
+auto new_CTensor = first_CTensor.clone();
+```
+
+this will create a deep copy of "first_CTensor"
+
+### CTensor shape related functions
+
+#### squeeze
+
+squeeze will remove the indexed dimension from the shape. **Note** that the tensor size will remain the same and the size of the adjacent dimension will increase.
+
+syntax:
+```cpp
+Ctensor.squeeze(size_t dim);
+```
+
+example:
+```cpp
+auto CTensor_instance = SplineNetLib::CTensor({1,2,3},{1,3});
+CTensor_instance.squeeze(0);
+```
+
+this will turn shape (1,3) into (3)
+
+#### unsqueeze
+
+unsqueeze will add a dimension of size 1 at the given indexed
+
+syntax:
+```cpp
+Ctensor.unsqueeze(size_t dim);
+```
+
+example:
+```cpp
+auto CTensor_instance = SplineNetLib::CTensor({1,2,3},{3});
+CTensor_instance.unsqueeze(0);
+```
+
+this turns CTensor with shape (3) to CTensor with shape (1,3)
+
+#### expand
+
+expand can increase the size of the selected dimension by a factor n. The data at the seoected dimension will be copied and appended n times.
+
+syntax:
+```cpp
+Ctensor.unsqueeze(size_t dim, int factor);
+```
+
+example:
+```cpp
+auto CTensor_instance = SplineNetLib::CTensor({1,2,3},{1,3});
+CTensor_instance.expand(0, 3);
+```
+
+the shape (1,3) becomes (3,3) and the data
+
+((1,2,3)) becomes ↓
+
+((1,2,3),  
+ (1,2,3),  
+ (1,2,3))
+
+
+**more coming soon**
+
+[<- back to Documentation](../README.md)
\ No newline at end of file
diff --git a/docs/cpp_splines.md b/docs/cpp_splines.md
index b193049..f25a2fb 100644
--- a/docs/cpp_splines.md
+++ b/docs/cpp_splines.md
@@ -1,4 +1,4 @@
-### splines
+### CPP Splines Documentation
 
 The splines are the main computation unit of a layer. They allow for an easily adjustable and visualizable alternative to wheight matricies.
 To create a spline call:
diff --git a/docs/py_splines.md b/docs/py_splines.md
index 9805eb1..97a396d 100644
--- a/docs/py_splines.md
+++ b/docs/py_splines.md
@@ -1,5 +1,5 @@
 
-## python Implementation/documentation
+## python Spline Implementation/documentation
 
 ### import
 
diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp
index 272f40b..758406c 100644
--- a/include/SplineNetLib/CTensor.hpp
+++ b/include/SplineNetLib/CTensor.hpp
@@ -72,6 +72,7 @@ class CTensor {
         _tensor_data = new DTensor(init, shape);
     }
     
+    
     CTensor(const std::vector<T>& data, const std::vector<size_t>& shape) {
         _tensor_data = new DTensor(data, shape);
     }

From 80db8f7f09f10568f213b07e2ee18144c54ad750 Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Sat, 15 Feb 2025 22:59:33 +0100
Subject: [PATCH 17/19] reconstructing the documentation into multiple specific
 .md files

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 1193d67..1d3336e 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,8 @@
 
 [C++ spline documentation](docs/cpp_splines.md)
 
+[C++ CTensor documentation](docs/cpp_CTensor.md)
+
 [python spline documentation](docs/py_splines.md)
 
 

From 7a97c01578c47e63a59eafabd0084b773aa2d1da Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Thu, 20 Feb 2025 11:49:02 +0100
Subject: [PATCH 18/19] fixed bug where reshaping ops where not included in
 comp graph. Added ReShapeFunction of type Function to manage reshape ops in
 the graph. This uses reshape flags see CTensorFuncs.hpp

---
 README.md                             |  4 ++
 docs/cpp_CTensor.md                   | 66 +++++++++++++++++++++++++
 include/SplineNetLib/CTensor.hpp      |  2 +
 include/SplineNetLib/CTensorFunc.hpp  | 37 +++++++++++++-
 include/SplineNetLib/CTensorUtils.hpp |  4 ++
 src/CTensor.tpp                       | 70 ++++++++++++++++++++++++++-
 src/CTensorFunc.tpp                   | 56 +++++++++++++++++++--
 src/CTensorUtils.tpp                  | 42 ++++------------
 8 files changed, 242 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index 1d3336e..91ab0ca 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # About Spline-based-DeepLearning
 
+## bugs
+
+* reshaping a CTensor after performing operations on it may break the gradient calculation!
+
 ## Table of contents
 
 [goals](#goals)
diff --git a/docs/cpp_CTensor.md b/docs/cpp_CTensor.md
index e9cc365..7ab2c38 100644
--- a/docs/cpp_CTensor.md
+++ b/docs/cpp_CTensor.md
@@ -70,6 +70,33 @@ auto new_CTensor = first_CTensor.clone();
 
 this will create a deep copy of "first_CTensor"
 
+### CTensor getter functions
+
+#### data()
+
+this returns the inner data vector from the CTensor **Note** that this data vector is the flat representation of the CTensor.
+
+example:
+
+```cpp
+auto CTensor_instance = SplineNetLib::CTensor({1,2,3,4,5,6},{2,3});
+auto data = CTensor_instance.data();
+```
+
+here data will be a vector<T> like {1,2,3,4,5,6}, where 'T' is the datatype of CTensor_instance.
+
+#### shape()
+
+this returns the shape of the CTensor
+
+example:
+```cpp
+auto CTensor_instance = SplineNetLib::CTensor({1,2,3,4,5,6},{2,3});
+auto data = CTensor_instance.shape();
+```
+
+this returns a vector<size_t> = {2,3}.
+
 ### CTensor shape related functions
 
 #### squeeze
@@ -129,6 +156,45 @@ the shape (1,3) becomes (3,3) and the data
  (1,2,3),  
  (1,2,3))
 
+#### permute 
+
+swaps around dimension sizes 
+
+syntax:
+
+syntax:
+```cpp
+Ctensor.permute(index_vector);
+```
+example:
+```cpp
+auto CTensor_instance = SplineNetLib::CTensor({1,2,1,2,1,2,1,2},{1,4,2});
+std::vector<size_t> index_vector = {0,2,1};
+```
+
+the shape (1,4,2) will become (1,2,4). **Note** that this will not change the actual data vector as the permutation only affects the projection logic, meaning that when indexing a permutated CTensor the result will be different to before the permutation although the underlaying data is the same.
+
+#### transpose
+
+this transposes the CTensor meaning it swaps the inner most two dimensions (including the data in the flat vector)
+
+syntax:
+
+```cpp
+Ctensor.transpose();
+```
+
+example: 
+
+```cpp
+auto CTensor_instance = SplineNetLib::CTensor({1.0,2.0,3.0,4.0,5.0,6.0}, {2,3});
+
+CTensor_instance.transpose();
+```
+
+this will swap dim0 and dim1, so shape (2,3) becomes (3,2). The data vector [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] will change to [1.0, 4.0, 2.0, 5.0, 3.0, 6.0] to fit the new shape.
+
+
 
 **more coming soon**
 
diff --git a/include/SplineNetLib/CTensor.hpp b/include/SplineNetLib/CTensor.hpp
index 758406c..6e5b087 100644
--- a/include/SplineNetLib/CTensor.hpp
+++ b/include/SplineNetLib/CTensor.hpp
@@ -120,6 +120,8 @@ class CTensor {
     void permute(const std::vector<size_t> &permutation_indecies) ; //will swap dimesnions at the permutation indecies 
                                                                     //shape (2,3,4) permute(2,0,1) becomes: (4,2,3) 
     
+    void reduce(const size_t &dim, const size_t &factor) ; 
+    
     void transpose() ;
     
     //-----auto_grad-----
diff --git a/include/SplineNetLib/CTensorFunc.hpp b/include/SplineNetLib/CTensorFunc.hpp
index 8b686f7..8f86a3e 100644
--- a/include/SplineNetLib/CTensorFunc.hpp
+++ b/include/SplineNetLib/CTensorFunc.hpp
@@ -16,6 +16,16 @@
 #include "CTensorUtils.hpp"
 
 namespace SplineNetLib {
+    
+typedef enum {
+    RESHAPE_SQUEEZE = 1,
+    RESHAPE_UNSQUEEZE  = 2,
+    RESHAPE_EXPAND = 3,
+    RESHAPE_REDUCE = 4,
+    RESHAPE_PERMUTE = 5,
+    RESHAPE_TRANSPOSE = 6
+} ReshapeType;
+
 
 template<Scalar T>
 class CTensor;
@@ -28,8 +38,12 @@ class Function {
     //pointers to this functions "parents" (like : a operator b)
     std::shared_ptr<CTensor<T>> a;
     std::shared_ptr<CTensor<T>> b;
+    std::vector<size_t> a_shape;
+    std::vector<size_t> b_shape;
     
-    Function(std::shared_ptr<CTensor<T>> A, std::shared_ptr<CTensor<T>> B) : a(A), b(B) {}
+    Function(std::shared_ptr<CTensor<T>> A, std::shared_ptr<CTensor<T>> B) : a(A), b(B), 
+               /*nullptr check for A and B to ensure no segfaults happen ->*/a_shape(A ? A->shape() : std::vector<size_t> {1}), 
+                                                                             b_shape(B ? B->shape() : std::vector<size_t> {1}) {}
     
     //virtual desctructor
     virtual ~Function() = default;
@@ -98,6 +112,27 @@ class MatMulFunction : public Function<T> {
     virtual std::unique_ptr<Function<T>> clone() const override;
 };
 
+template<typename T>
+requires Scalar<T>
+class ReShapeFunction : public Function<T> {
+public :
+    
+    ReshapeType operation;
+    /*
+    std::vector<size_t> original_shape;
+    std::vector<size_t> new_shape;
+    */
+    
+    ReShapeFunction(std::shared_ptr<CTensor<T>> a, ReshapeType _operation) : 
+    Function<T>(a, nullptr),operation(_operation){}
+    
+    std::vector<T> fwd() override;
+    
+    void backward(std::vector<T> &prop_grad, CTensor<T> *result) override;
+    
+    virtual std::unique_ptr<Function<T>> clone() const override;
+};
+
 } //namepace
 
 #include "../src/CTensorFunc.tpp"
diff --git a/include/SplineNetLib/CTensorUtils.hpp b/include/SplineNetLib/CTensorUtils.hpp
index fba2654..ea4f4b3 100644
--- a/include/SplineNetLib/CTensorUtils.hpp
+++ b/include/SplineNetLib/CTensorUtils.hpp
@@ -26,6 +26,7 @@
 #include <unordered_set>
 
 namespace SplineNetLib {
+    
 
 template <typename T>
 std::string vectorToString(const std::vector<T>& vec);
@@ -88,6 +89,9 @@ template<typename T>
 requires Scalar<T>
 std::vector<T> permute_vec(const std::vector<T>& A, const std::vector<size_t>& A_shape, const std::vector<size_t>& permutation_indices) ;
 
+//swaps last two dimensions as if transposing a ctensor
+std::vector<size_t> transpose_shape(const std::vector<size_t>& shape) ;
+
 } //namespace
 
 #include "../src/CTensorUtils.tpp"
diff --git a/src/CTensor.tpp b/src/CTensor.tpp
index 834ea71..376ac1f 100644
--- a/src/CTensor.tpp
+++ b/src/CTensor.tpp
@@ -39,6 +39,12 @@ void CTensor<T>::squeeze(const size_t& dim) {
         this->_tensor_data->_shape[dim] *= this->_tensor_data->_shape[dim+1];
         this->_tensor_data->_shape.erase(this->_tensor_data->_shape.begin() + dim + 1);
     }
+    
+    if (this->requires_grad) {
+        auto new_fn = std::make_unique<ReShapeFunction<T>>(std::make_shared<CTensor<T>>(*this), RESHAPE_SQUEEZE);
+        
+        this->_tensor_data->_grad_fn.push_back(std::move(new_fn));
+    }
 }
     
 template<Scalar T>
@@ -50,6 +56,12 @@ void CTensor<T>::unsqueeze(const size_t &dim) {
     } else {
         (*shape).insert((*shape).begin() + dim, 1);
     }
+    
+    if (this->requires_grad) {
+        auto new_fn = std::make_unique<ReShapeFunction<T>>(std::make_shared<CTensor<T>>(*this), RESHAPE_UNSQUEEZE);
+        
+        this->_tensor_data->_grad_fn.push_back(std::move(new_fn));
+    }
 }
 
 template<Scalar T>
@@ -90,9 +102,56 @@ void CTensor<T>::expand(const size_t &dim, const size_t &factor) {
         idx += data_size_per_expansion;
     }
     
-    // Update the shape and number of dimensions
+    auto new_shape = (*shape);
+    new_shape[dim] *= factor;
+    
+        //create new addfunction with shared ptr to this and other
+    auto new_fn = std::make_unique<ReShapeFunction<T>>(std::make_shared<CTensor<T>>(*this), RESHAPE_EXPAND);
+    
+        // Update the shape and number of dimensions
     (*shape)[dim] *= factor;
+    
+    this->_tensor_data->_grad_fn.push_back(std::move(new_fn));
+
+}
+
+template <Scalar T>
+void CTensor<T>::reduce(const size_t &dim, const size_t &factor) {
+    if (factor <= 1) {
+        return; // No reduction needed
+    }
+
+    auto* shape = &(this->_tensor_data->_shape); // Pointer to shape vector
+    auto* data = &(this->_tensor_data->_data);
+    size_t n_dims = shape->size();
+
+    // Ensure valid dimension
+    if (dim >= n_dims) {
+        throw std::invalid_argument("Input dim: " + std::to_string(dim) + 
+                                    " cannot be larger than _n_dims: " + std::to_string(n_dims));
+    }
+
+    // Ensure the shape is divisible by factor
+    if ((*shape)[dim] % factor != 0) {
+        return;
+    }
+
+    // Calculate the size of sub-vectors
+    size_t sub_vector_size = 1;
+    for (size_t i = dim + 1; i < n_dims; i++) {
+        sub_vector_size *= (*shape)[i];
+    }
 
+    size_t idx = 0;
+    while (idx < data->size()) {
+        // Remove (factor - 1) repetitions of the sub-vector
+        for (size_t i = 1; i < factor; i++) {
+            data->erase(data->begin() + idx, data->begin() + idx + sub_vector_size);
+        }
+        idx += sub_vector_size;  // Move to the next section after all removals
+    }
+
+    (*shape)[dim] /= factor;
 }
 
 template<Scalar T>
@@ -250,7 +309,7 @@ void CTensor<T>::clear_graph() {
 //can be improved with overload if no arg is passe to use {} so that this function below can use refernces
 template<Scalar T>
 void CTensor<T>::backward(std::vector<T> prop_grad) {
-    
+    /*
     //go through all parent Functions
     for (auto &fn : this->_tensor_data->_grad_fn) {
         if (fn) {
@@ -263,6 +322,13 @@ void CTensor<T>::backward(std::vector<T> prop_grad) {
         }
     }
     //std::cout<<"debug Ct bwd fn all bwd finish\n";
+    */
+    //testing with revers as this makes more sense fir the tree traversal
+    for (int i = this->_tensor_data->_grad_fn.size() - 1; i >= 0; i--){
+        if (this->_tensor_data->_grad_fn[i]){
+            this->_tensor_data->_grad_fn[i]->backward(prop_grad, this);
+        }
+    }
 }
 
 template<Scalar T>
diff --git a/src/CTensorFunc.tpp b/src/CTensorFunc.tpp
index ebb1b6a..10b5df8 100644
--- a/src/CTensorFunc.tpp
+++ b/src/CTensorFunc.tpp
@@ -252,7 +252,7 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
     auto prop_grad_shape = result->shape();
     //std::cout<<"matmul bwd prop shape : "<<vectorToString(prop_grad_shape)<<"\n";
     
-        //this is wrong for matmul if is empty return opasite operant (for a grad is b and reversed)
+
     if (prop_grad.empty()){
         for (size_t i=0; i < result->data().size(); i++) {
             prop_grad.push_back(1);
@@ -270,8 +270,9 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
                     //create a copy of b and transpose it
             auto b_copy = this->b->clone();
             b_copy.transpose();
+            auto b_shape = transpose_shape(this->b_shape);
             
-            prop_grad_a = matmul(prop_grad, b_copy.data(), prop_grad_shape, b_copy.shape());
+            prop_grad_a = matmul(prop_grad, b_copy.data(), prop_grad_shape, b_shape);
             
             //assign grad
             for (size_t i = 0; i < prop_grad_a.size(); i++) {
@@ -292,8 +293,9 @@ void MatMulFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result)
                     //create a copy of b and transpose it
             auto a_copy = this->a->clone();
             a_copy.transpose();
+            auto a_shape = transpose_shape(this->a_shape);
             //std::cout<<"b bwd a_copy shape :"<<vectorToString(a_copy.shape())<<" "<<vectorToString(prop_grad)<<"\n";
-            prop_grad_b = matmul(a_copy.data(), prop_grad, a_copy.shape(), prop_grad_shape);
+            prop_grad_b = matmul(a_copy.data(), prop_grad, a_shape, prop_grad_shape);
             
             //assign grad
             for (size_t i = 0; i < prop_grad_b.size(); i++) {
@@ -313,6 +315,54 @@ std::unique_ptr<Function<T>> MatMulFunction<T>::clone() const {
     return std::make_unique<MatMulFunction<T>>(*this);
 }
 
+template<typename T>
+requires Scalar<T>
+std::vector<T> ReShapeFunction<T>::fwd() {
+    return this->a->data();
+}
+
+
+template<typename T>
+requires Scalar<T>
+void ReShapeFunction<T>::backward(std::vector<T> &prop_grad, CTensor<T> *result){
+    //std::cout<<"RESHAPEFUNCTION CALL\n";
+    
+    switch(this->operation) {
+        case RESHAPE_SQUEEZE:
+            if (result != this->a.get()){
+                this->a->backward(prop_grad);
+            }
+            break;
+        case RESHAPE_UNSQUEEZE:
+            if (result != this->a.get()){
+                this->a->backward(prop_grad);
+            }
+            break;
+        case RESHAPE_EXPAND: 
+            std::cout<<"\n\nWARNING: This CTensor was expanded in the computational graph, therefore gradients can not be calculated further in this branch\n\n";
+            break;
+        
+        case RESHAPE_REDUCE:
+            break;
+        case RESHAPE_PERMUTE:
+            
+            break;
+        case RESHAPE_TRANSPOSE:
+            if (result != this->a.get()){
+                this->a->backward(prop_grad);
+            }
+            break;
+        default: //should throw exeption
+            break;
+    }
+}
+
+template<typename T>
+requires Scalar<T>
+std::unique_ptr<Function<T>> ReShapeFunction<T>::clone() const{
+    return std::make_unique<ReShapeFunction<T>>(*this);
+}
+
 }//namespace
 
 #endif
\ No newline at end of file
diff --git a/src/CTensorUtils.tpp b/src/CTensorUtils.tpp
index 985de1a..ca0b26a 100644
--- a/src/CTensorUtils.tpp
+++ b/src/CTensorUtils.tpp
@@ -102,39 +102,7 @@ inline size_t stride(size_t idx, const std::vector<size_t> &shape) {
     return stride;
 }
 
-// Math functions
-/* see readable version below
-template<typename T>
-requires Scalar<T>
-std::vector<T> matmul(const std::vector<T> &A, const std::vector<T> &B, const std::vector<size_t> &A_shape, const std::vector<size_t> &B_shape) {
-    size_t batch_size = 1;
-    if (B_shape.size() != A_shape.size()) {
-        throw std::invalid_argument("A_shape.size() and B_shape.size() must be equal");
-        return std::vector<T>(1, 0);
-    }
-
-    if (A_shape.size() > 2) {
-        for (size_t i = 0; i < A_shape.size() - 2; i++) {
-            batch_size *= A_shape[i];
-        }
-    }
-
-    size_t M = A_shape[A_shape.size() - 2], K = A_shape[A_shape.size() - 1], N = B_shape[B_shape.size() - 1];
-    std::vector<T> result(batch_size * M * N);
-
-    for (size_t batch_dim = 0; batch_dim < batch_size; batch_dim++) {
-        for (size_t row = 0; row < M; row++) {
-            for (size_t col = 0; col < N; col++) {
-                T sum = 0.0;
-                for (size_t shared = 0; shared < K; shared++) {
-                    sum += A[batch_dim * M * K + row * K + shared] * B[batch_dim * K * N + shared * N + col];
-                }
-                result[batch_dim * M * N + row * N + col] = sum;
-            }
-        }
-    }
-    return result;
-}*/
+//math funcs
 
 template<typename T>  // Template function that accepts any scalar type 'T' (e.g., float, double)
 requires Scalar<T>   // This constraint ensures that the type 'T' is a scalar (e.g., not a matrix, vector, etc.)
@@ -200,6 +168,14 @@ std::vector<T> permute_vec(const std::vector<T>& A, const std::vector<size_t>& A
     return B;
 }
 
+inline std::vector<size_t> transpose_shape(const std::vector<size_t>& shape) {
+    std::vector<size_t> temp = shape;
+    size_t n_dims = temp.size();
+    temp[n_dims - 2] = shape[n_dims - 1];
+    temp[n_dims - 1] = shape[n_dims - 2];
+    return temp;
+}
+
 }//namespace
 
 #endif
\ No newline at end of file

From c4e970bd8441791e0e8bef98f842fa17a75d693b Mon Sep 17 00:00:00 2001
From: K-T0BIAS <karusseittobi@gmail.com>
Date: Thu, 20 Feb 2025 13:03:21 +0100
Subject: [PATCH 19/19] new ci workflow .yml for the python lib tests

---
 .github/workflows/py_test.yml | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 .github/workflows/py_test.yml

diff --git a/.github/workflows/py_test.yml b/.github/workflows/py_test.yml
new file mode 100644
index 0000000..95b358d
--- /dev/null
+++ b/.github/workflows/py_test.yml
@@ -0,0 +1,34 @@
+name: CI - Python Tests
+
+# Trigger the workflow on pushes and pull requests involving Python files
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - 'src/**'      # Monitor changes in src directory
+      - 'include/**'  # Monitor changes in include directory
+  pull_request:
+    paths:
+      - 'src/**'
+      - 'include/**'
+
+jobs:
+  python-tests:
+    runs-on: ubuntu-latest  # Use the latest Ubuntu virtual environment
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'  # Specify the Python version you need
+
+      - name: Install project and dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .  # Install the project from the root directory
+
+      - name: Run Python unit tests
+        run: |
+          python -m unittest discover -s tests -p '*_test.py'  # Discover and run all unittests in the 'tests' folder
\ No newline at end of file