diff --git a/.gitignore b/.gitignore index f3219dd9..8b80a3f0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ CMakeFiles CMakeCache.txt cmake_install.cmake CMakeLists.txt.user +.idea \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..dba15f30 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "libs/glfw"] + path = libs/glfw + url = https://github.com/glfw/glfw.git diff --git a/CMakeLists.txt b/CMakeLists.txt index bed4e57a..c9bf2069 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,7 @@ -################## -# Thanda # -################## - # credit - base CMake config : Yining Karl Li , edited CMake config: Akshay Shah & Debanshu Singh #name your project -project(Thanda) +project(FluidSolver) cmake_minimum_required(VERSION 2.8) # set creates a variable @@ -16,6 +12,8 @@ include_directories( ${NUPARU}/src ) +set(CMAKE_INCLUDE_CURRENT_DIR ON) + # Add path for pre-compiled libraries here (we will later link them with our compiled source) # Add Nuparu library to path for OSX, linux and windows if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") @@ -33,10 +31,20 @@ set(GLFW_LIBRARY_DIR ${CMAKE_LIBRARY_PATH}) set(GLEW_LIBRARY_DIR ${CMAKE_LIBRARY_PATH}) # Use find_package & find_library to link with -find_package(OPENGL REQUIRED) +find_package(OpenGL REQUIRED) find_package(GLEW) find_library(GLFW_LIBRARY "glfw3" HINTS ${GLFW_LIBRARY_DIR}) -find_library(JSONCPP "jsoncpp") +find_library(JSONCPP "jsoncpp" REQUIRED) +find_library(BOOST_IOSTREAMS boost_iostreams) +find_library(BOOST_SYSTEM boost_system) +find_library(OPENVDB openvdb REQUIRED) +find_library(OPENVDB_POINTS openvdb_points) +find_library(HALF Half REQUIRED) +find_library(TBB NAMES tbb tbbmalloc) +find_library(ZLIB z) +find_library(ANT AntTweakBar) + +#Iex IexMath Imath IlmThread add_definitions( -DTW_STATIC @@ -46,7 +54,20 @@ add_definitions( -D_CRT_SECURE_NO_WARNINGS ) -set(CORE_LIBS ${GLFW_LIBRARY} ${GLUT_LIBRARY} ${GLEW_LIBRARY} ${JSONCPP} ${OPENGL_LIBRARY} ) +set(CORE_LIBS + ${GLFW_LIBRARY} + ${GLUT_LIBRARY} + ${GLEW_LIBRARY} + ${JSONCPP} + ${OPENGL_LIBRARY} + ${BOOST_IOSTREAMS} + ${BOOST_SYSTEM} + ${OPENVDB} + ${OPENVDB_POINTS} + ${TBB} + ${ZLIB} + ${HALF} + ${ANT}) # OSX-specific hacks/fixes if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") @@ -59,22 +80,23 @@ endif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # Linux specific hacks/fixes if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lX11 -lXxf86vm -lXrandr -lpthread -lXi") + set(CORE_LIBS ${CORE_LIBS} X11 Xxf86vm Xrandr pthread Xi) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lX11 -lXxf86vm -lXrandr -lpthread -lXi -lz") endif() # set compiler flags for c++11 if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O3 -m64 -msse2 -w") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Ofast -m64 -msse2 -w") elseif(WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") endif() if(MSVC) - set(COMPILER_FLAGS + set(COMPILER_FLAGS CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_C_FLAGS + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE ) @@ -82,13 +104,24 @@ endif() # Add source files you want to compile (.cpp) set(CORE_SRC - src/main.cpp - src/camera/camera.cpp - src/viewer/viewer.cpp - src/fluidSolver/fluidSolver.cpp - src/scene/scene.cpp - src/geom/geom.cpp -) + main.cpp + core/display/InputHandler.cpp + core/display/InputHandler.h + core/display/Window.cpp + core/display/Window.h + core/fileIO/SceneLoader.cpp + core/fileIO/SceneLoader.h + core/geometry/Bound.cpp + core/geometry/Bound.h + core/geometry/Box.cpp + core/geometry/Box.h + core/geometry/Geo.cpp + core/geometry/Geo.h + core/geometry/GeoObject.h + core/solver/FluidSolver.cpp + core/solver/FluidSolver.h + core/util/math.h + core/display/shaders/particle.frag.h core/display/shaders/particle.vert.h core/display/painters/ParticlesPainter.cpp core/display/painters/ParticlesPainter.h core/display/painters/Painter.cpp core/display/painters/Painter.h core/display/painters/BoxPainter.cpp core/display/painters/BoxPainter.h core/display/shaders/flat.vert.h core/display/shaders/flat.frag.h core/scenes/default.h core/camera/Camera.cpp core/camera/Camera.h core/solver/grid/Grid.cpp core/solver/grid/Grid.h core/solver/grid/MACGrid.cpp core/solver/grid/MACGrid.h core/solver/FluidParticle.h core/fileIO/ParticlesWriter.cpp core/fileIO/ParticlesWriter.h core/util/hacks.h core/display/painters/GridVectorAttributePainter.cpp core/display/painters/GridVectorAttributePainter.h core/display/shaders/gridAttr.geo.h core/util/flags.h core/display/painters/GridScalarAttributePainter.cpp core/display/painters/GridScalarAttributePainter.h core/display/shaders/gridScal.vert.h core/display/shaders/gridScal.frag.h) -add_executable(Thanda ${CORE_SRC}) -target_link_libraries(Thanda ${CORE_LIBS}) +add_executable(FluidSolver ${CORE_SRC}) +target_link_libraries(FluidSolver ${CORE_LIBS}) diff --git a/README.md b/README.md index 54283261..ebc5feed 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,59 @@ -# CIS563-FluidSolver -(Credit : CIS565 README) +Fluid Solver +=========== -Fluid Solver Submission guidelines: +First steps toward building a fluid solver. +## User Interface: -- If you have modified any of the CMakeLists.txt files at all (aside from the list of CORE_SRC), you must test that your project can build. Beware of any build issues. +* Middle mouse scroll to zoom in/out +* Middle mouse click and drag to orbit the camera +* SHIFT + middle mouse click and drag to track/slide -- Open a GitHub pull request so that we can see that you have finished. The title should be "Submission: YOUR NAME". +## Code Overview: -- In the body of the pull request, include a link to your repository. +### Scene Loading -- Submit on canvas with a direct link to your pull request on GitHub +Scenes are loaded by passing a path to the scene file as the first program argument. +It does a simple parsing with jsoncpp to create objects. +* containerDim: size and position of the container +* particleDim: size and position of initial fluid object +* resolution: number of grid divisions on the largest axis -And you're done! \ No newline at end of file +#### AntTweakBar + +I've added AntTweakBar, but haven't gotten around to having it do anything yet. I also don't have a MAC so I was unable to compile the libraries for it. + +### Geometry + +All geometry objects implement functions for collision detection. These come in a few different forms, allowing collision detection given next and previous points, given a point and distance tolerance, as well as given a point, ray, and timestep. +All geometry objects also have a bounding box which at the moment is used to assist in converting the geometry to particles. + +### Fluid Solver + +Particles are created by looping over all of the geometries' bounding boxes and checking if the point is contained within them. +For solving, particles are simply accelerated by a static gravity constant and then collisions are checked against the container geometry. +All information is stored in a temporary buffer which is swapped with the particle buffer at the end of the solve. + +### Drawing + +A Painter class is used to define the drawing behavior of various elements in the scene. Each sets up their own shaders on initialization and implement methods to draw their respective objects. +I found it much nicer to isolate my code this way so that I didn't have a billion gl calls in my geometry classes and a billion gl calls in my Window class. +Shaders are stored as char arrays in header files. I found that the easiest way to package them with my code. + +The Window sets up a glfw window and a Singleton instance of InputHandler. glfw doesn't let you have non-static callback functions so I instead have callback functions to update the state of my static InputHandler which the Window can subscribe to. +From there, I can get the window/keyboard/mouse data and do the approriate camera calculations. + +## Parallelization: + +TBB is used heavily to parallelize calculations on the particles and grid. This can be toggled on/off by defining/undefining USETBB in . + +For the default scene, the parallelized code computes each frame in an average of 0.0494588 seconds. Without TBB, this is 0.0768101 seconds. + +## OpenVDB: + +Currently, each frame is written out as "particles_{frame}.vdb" in the current directory + +## Known Problems: + +There seems to be an edge indexing problem in the attribute transfer from grid to particle \ No newline at end of file diff --git a/core/camera/Camera.cpp b/core/camera/Camera.cpp new file mode 100644 index 00000000..9b7c5680 --- /dev/null +++ b/core/camera/Camera.cpp @@ -0,0 +1,53 @@ +// +// Created by austin on 2/29/16. +// + +#include "Camera.h" + +Camera::Camera(int w, int h) : + zoom(35), + eye(glm::vec3(0,0,zoom)), + tgt(glm::vec3(0,0,0)), + width(w), + height(h), + fovy(45), + near_clip(0.001f), + far_clip(1000.f), + world_up(glm::vec3(0,1,0)), + look(tgt - eye), + right(glm::cross(look, world_up)), + up(glm::cross(right, look)), + rotation(glm::mat4(1.f)) { + +// rotation = glm::rotate(rotation, -PI/6, glm::vec3(1,0,0)); +// rotation = glm::rotate(rotation, PI/4, glm::vec3(0,1,0)); + + recomputeEye(); + recompute(); +} + +glm::mat4 Camera::viewProj() { + return _viewProj; +} + +void Camera::recompute() { + float aspect = (float)width/height; + look = glm::normalize(tgt - eye); + right = glm::cross(look, world_up); + up = glm::cross(right, look); + + _viewProj = glm::perspective(fovy, aspect, near_clip, far_clip) * glm::lookAt(eye, tgt, up); +} + +void Camera::resize(int w, int h) { + width = w; + height = h; + recompute(); +} + +void Camera::recomputeEye() { + eye = glm::vec3(rotation * glm::vec4(0,0,zoom,0)) + tgt; + look = glm::normalize(tgt - eye); + right = glm::normalize(glm::cross(look, up)); + up = glm::normalize(glm::cross(right, look)); +} diff --git a/core/camera/Camera.h b/core/camera/Camera.h new file mode 100644 index 00000000..62be261a --- /dev/null +++ b/core/camera/Camera.h @@ -0,0 +1,35 @@ +// +// Created by austin on 2/29/16. +// + +#ifndef FLUIDSOLVER_CAMERA_H +#define FLUIDSOLVER_CAMERA_H + +#include + +class Camera { +private: + glm::vec3 world_up; +public: + float zoom; + glm::vec3 eye; + glm::vec3 tgt; + + Camera(int w, int h); + glm::mat4 viewProj(); + void recomputeEye(); + void recompute(); + void resize(int w, int h); + + glm::vec3 look, up, right; + int width, height; + float fovy, near_clip, far_clip; + glm::mat4 rotation; + +private: + + glm::mat4 _viewProj; +}; + + +#endif //FLUIDSOLVER_CAMERA_H diff --git a/core/display/InputHandler.cpp b/core/display/InputHandler.cpp new file mode 100644 index 00000000..799eb5f5 --- /dev/null +++ b/core/display/InputHandler.cpp @@ -0,0 +1,216 @@ +// +// Created by austin on 2/26/16. +// + +#include "InputHandler.h" +#include +#include + +InputHandler::InputHandler() { + +} + +double InputHandler::x() const { + return _mouseState.x; +} + +double InputHandler::y() const { + return _mouseState.y; +} + +double InputHandler::delX() const { + return _mouseState.delX; +} + +double InputHandler::delY() const { + return _mouseState.delY; +} + +double InputHandler::delWheel() const { + return _mouseState.delWheel; +} + +bool InputHandler::leftDown() const { + return _mouseState.leftDown; +} + +bool InputHandler::wheelDown() const { + return _mouseState.wheelDown; +} + +bool InputHandler::rightDown() const { + return _mouseState.rightDown; +} + +bool InputHandler::key(int key) const { + return _keyboard.find(key) != _keyboard.end(); +} + +void InputHandler::x(double val, bool events) { + std::swap(_mouseState.x, val); + + if (events) { + _mouseState.delX = _mouseState.x - val; + + mouseMoved(); + + emit(_mouseState); + _mouseState.leftDragInit = false; + _mouseState.wheelDragInit = false; + _mouseState.rightDragInit = false; + } +} + +void InputHandler::y(double val, bool events) { + std::swap(_mouseState.y, val); + + if (events) { + _mouseState.delY = _mouseState.y - val; + + mouseMoved(); + + emit(_mouseState); + _mouseState.leftDragInit = false; + _mouseState.wheelDragInit = false; + _mouseState.rightDragInit = false; + } +} + +void InputHandler::mouseMoved() { + if (_mouseState.leftDown) { + if (!_mouseState.leftDragInit) { + _mouseState.leftDragInit = true; + _mouseState.startLeftX = _mouseState.x; + _mouseState.startLeftY = _mouseState.y; + } + _mouseState.leftDragging = true; + } + if (_mouseState.wheelDown) { + if (!_mouseState.wheelDragInit) { + _mouseState.wheelDragInit = true; + _mouseState.startWheelX = _mouseState.x; + _mouseState.startWheelY = _mouseState.y; + } + _mouseState.wheelDragging = true; + } + if (_mouseState.rightDown) { + if (!_mouseState.rightDragInit) { + _mouseState.rightDragInit = true; + _mouseState.startRightX = _mouseState.x; + _mouseState.startRightY = _mouseState.y; + } + _mouseState.rightDragging = true; + } +} + +void InputHandler::pos(double x, double y, bool events) { + std::swap(_mouseState.x, x); + std::swap(_mouseState.y, y); + + if (events) { + _mouseState.delX = _mouseState.x - x; + _mouseState.delY = _mouseState.y - y; + + mouseMoved(); + + emit(_mouseState); + _mouseState.leftDragInit = false; + _mouseState.wheelDragInit = false; + _mouseState.rightDragInit = false; + } +} + +void InputHandler::delX(double val, bool events) { + _mouseState.delX = val; + if (events) { + emit(_mouseState); + } +} + +void InputHandler::delY(double val, bool events) { + _mouseState.delY = val; + if (events) { + emit(_mouseState); + } +} + + +void InputHandler::delWheel(double val, bool events) { + _mouseState.delWheel = val; + if (events) { + emit(_mouseState); + } + _mouseState.delWheel = 0; +} + +void InputHandler::leftDown(bool val, bool events) { + _mouseState.leftDown = val; + if (events) { + if (!val) { + _mouseState.leftDragging = false; + _mouseState.leftDragFinish = true; + } + emit(_mouseState); + _mouseState.leftDragFinish = false; + } +} + +void InputHandler::wheelDown(bool val, bool events) { + _mouseState.wheelDown = val; + if (events) { + if (!val) { + _mouseState.wheelDragging = false; + _mouseState.wheelDragFinish = true; + } + emit(_mouseState); + _mouseState.wheelDragFinish = false; + } +} + +void InputHandler::rightDown(bool val, bool events) { + _mouseState.rightDown = val; + if (events) { + if (!val) { + _mouseState.rightDragging = false; + _mouseState.rightDragFinish = true; + } + emit(_mouseState); + _mouseState.rightDragFinish = false; + } +} + +void InputHandler::key(int key, bool down, bool events) { + if (down) { + _keyboard.insert(key); + } else { + _keyboard.erase(_keyboard.find((key))); + } +} + +void InputHandler::emit(MouseState &event) { + for (int i = 0; i < mouseSubscribers.size(); i++) { + mouseSubscribers.at(i)(event); + } +} + +void InputHandler::registerMouseListener(InputHandler::MouseListener listener) { + mouseSubscribers.push_back(listener); +} + +//void InputHandler::deregisterMouseListener(MouseListener listener) { +// mouseSubscribers.erase(std::remove(mouseSubscribers.begin(), mouseSubscribers.end(), listener), mouseSubscribers.end()); +//} + +void InputHandler::windowResized(int w, int h) { + for (int i = 0; i < windowSubscribers.size(); i++) { + windowSubscribers.at(i)(w, h); + } +} + +void InputHandler::registerWindowListener(InputHandler::WindowListener listener) { + windowSubscribers.push_back(listener); +} + +//void InputHandler::deregisterWindowListener(InputHandler::WindowListener listener) { +// windowSubscribers.erase(std::remove(windowSubscribers.begin(), windowSubscribers.end(), listener), windowSubscribers.end()); +//} diff --git a/core/display/InputHandler.h b/core/display/InputHandler.h new file mode 100644 index 00000000..8ab69b35 --- /dev/null +++ b/core/display/InputHandler.h @@ -0,0 +1,94 @@ +// +// Created by austin on 2/26/16. +// + +#ifndef FLUIDSOLVER_INPUTHANDLER_H +#define FLUIDSOLVER_INPUTHANDLER_H + +#include +#include +#include + +class InputHandler { + +public: + static InputHandler& getInputHandler() { + static InputHandler inputHandler; + return inputHandler; + } + + double x() const; + double y() const; + double delX() const; + double delY() const; + double delWheel() const; + bool leftDown() const; + bool wheelDown() const; + bool rightDown() const; + bool key(int key) const; + + void x(double val, bool events = true); + void y(double val, bool events = true); + void pos(double x, double y, bool events = true); + void delX(double val, bool events = true); + void delY(double val, bool events = true); + void delWheel(double val, bool events = true); + void leftDown(bool val, bool events = true); + void wheelDown(bool val, bool events = true); + void rightDown(bool val, bool events = true); + void key(int key, bool down, bool events = true); + + struct MouseState { + double x; + double y; + double delX; + double delY; + double startLeftX; + double startLeftY; + double startWheelX; + double startWheelY; + double startRightX; + double startRightY; + double delWheel; + bool leftDown; + bool wheelDown; + bool rightDown; + bool leftDragInit; + bool wheelDragInit; + bool rightDragInit; + bool leftDragging; + bool wheelDragging; + bool rightDragging; + bool leftDragFinish; + bool wheelDragFinish; + bool rightDragFinish; + }; + + //typedef void(*MouseListener)(MouseState&); + //typedef void (*WindowListener)(int w, int h); + typedef std::function WindowListener; + typedef std::function MouseListener; + void registerMouseListener(MouseListener listener); + //void deregisterMouseListener(MouseListener listener); + void registerWindowListener(WindowListener listener); + //void deregisterWindowListener(WindowListener listener); + void windowResized(int w, int h); + +private: + InputHandler(); + InputHandler(InputHandler const&) {} // prevent copies + void operator=(InputHandler const&) {} // prevent assignments + + std::set _keyboard; + + MouseState _mouseState; + + void mouseMoved(); + + std::vector mouseSubscribers; + std::vector windowSubscribers; + void emit(MouseState &event); +}; + + +#endif //FLUIDSOLVER_INPUTHANDLER_H diff --git a/core/display/Window.cpp b/core/display/Window.cpp new file mode 100644 index 00000000..c5b129a8 --- /dev/null +++ b/core/display/Window.cpp @@ -0,0 +1,322 @@ +// +// Created by austin on 2/25/16. +// + +#include "Window.h" +#include +#include +#include + +static void error_callback(int error, const char* description) { + fputs(description, stderr); +} + +InputHandler &inputHandler = InputHandler::getInputHandler(); + +Window::Window(const char *title) : Window(1200, 800, title) { } +Window::Window(int w, int h) : Window(w, h, "GL Window"){ } +Window::Window(int w, int h, const char* title) : _window(nullptr), camera(w, h), _w(w), _h(h), + loadSceneCB(NULL) { + glfwSetErrorCallback(error_callback); + + if (!glfwInit()) exit(EXIT_FAILURE); + + // use antialiasing + glfwWindowHint(GLFW_SAMPLES, 4); + + // set version to OpenGL 3.3 + glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); + glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3); + #ifdef __APPLE__ + glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); + #endif + glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); + glfwWindowHint(GLFW_RESIZABLE, GL_TRUE); + + _window = glfwCreateWindow(w, h, title, NULL, NULL); + pixels.resize(_w*_h*4); + + if (!_window) { + glfwTerminate(); + exit(EXIT_FAILURE); + } + + glfwMakeContextCurrent(_window); + glfwSwapInterval(1); + setupInputCBs(); + + glewExperimental= GL_TRUE; + if (glewInit() != GLEW_OK) { + fprintf(stderr, "Failed to initialize GLEW\n"); + exit(EXIT_FAILURE); + } + + glGenTextures(1, &texture); + glBindTexture(GL_TEXTURE_2D, texture); + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR ); + glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR ); + glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); + + glGenFramebuffers(1, &fbo); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, texture, 0); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + +Window::~Window() { + +} + +void Window::initializeTweakBar() { + int w, h; + glfwGetWindowSize(_window, &w, &h); + TwInit(TW_OPENGL_CORE, NULL); + TwWindowSize(w, h); + TwBar *myBar; + myBar = TwNewBar("Settings"); + TwAddButton(myBar, "loadsceneBtn", loadSceneCB, NULL, " label='Load Scene'"); +} + + +void Window::setupInputCBs() { + + glfwSetKeyCallback(_window, [](GLFWwindow *window, int key, int scancode, int action, int mods) { + switch(action) { + case GLFW_PRESS: + inputHandler.key(key, true); + break; + case GLFW_RELEASE: + inputHandler.key(key, false); + break; + default: break; + } + if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) + glfwSetWindowShouldClose(window, GL_TRUE); + + switch(action) { + case GLFW_PRESS: + inputHandler.key(scancode, true); + break; + case GLFW_RELEASE: + inputHandler.key(scancode, false); + break; + default:break; + } + + TwEventKeyGLFW(key, action); + TwEventCharGLFW(key, action); + }); + glfwSetCursorPosCallback(_window, [](GLFWwindow* window, double xpos, double ypos) { + inputHandler.pos(xpos, ypos); + TwEventMousePosGLFW(xpos, ypos); + }); + glfwSetMouseButtonCallback(_window, [](GLFWwindow* window, int button, int action, int mods) { + switch(button) { + case GLFW_MOUSE_BUTTON_LEFT: + switch(action) { + case GLFW_PRESS: + inputHandler.leftDown(true); + break; + case GLFW_RELEASE: + inputHandler.leftDown(false); + break; + default:break; + } + break; + case GLFW_MOUSE_BUTTON_MIDDLE: + switch(action) { + case GLFW_PRESS: + inputHandler.wheelDown(true); + break; + case GLFW_RELEASE: + inputHandler.wheelDown(false); + break; + default:break; + } + break; + case GLFW_MOUSE_BUTTON_RIGHT: + switch(action) { + case GLFW_PRESS: + inputHandler.rightDown(true); + break; + case GLFW_RELEASE: + inputHandler.rightDown(false); + break; + default:break; + } + break; + default:break; + } + TwEventMouseButtonGLFW(button, action); + }); + glfwSetScrollCallback(_window, [&](GLFWwindow* window, double xoffset, double yoffset) { + inputHandler.delWheel(yoffset); + TwEventMouseWheelGLFW(yoffset); + }); + + glfwSetWindowSizeCallback(_window, [](GLFWwindow *window, int width, int height) { + inputHandler.windowResized(width, height); + TwWindowSize(width, height); + }); + + inputHandler.registerMouseListener([&](InputHandler::MouseState &mouseState) { + if (!fequal(mouseState.delWheel, 0.0)) { + // change camera zoom level based on scroll direction + glm::vec3 vec = camera.tgt - camera.eye; + // limit zoom when very near to target + float fac = glm::min(glm::abs(glm::length(vec)/5.f), 1.f); + camera.zoom -= (float)mouseState.delWheel * fac; + camera.recomputeEye(); + updateCamera(); + } + + if (mouseState.wheelDragging) { + if (inputHandler.key(340)) { + // pixel position offset from center + float x = (float) (_w/2 - mouseState.delX); + float y = (float) (_h/2 - mouseState.delY); + + // offset in ndc + float sx = (2*x / _w) - 1.f; + float sy = 1.f - (2*y / _h); + + // project camera up amd right axes + float alpha = camera.fovy / 2; + float len = glm::length(camera.tgt - camera.eye); + glm::vec3 V = camera.up*(float)(len*tan(alpha)); + glm::vec3 H = camera.right*(float)(len*(_w / _h)*tan(alpha)); + + camera.tgt = camera.tgt + sx*H + sy*V; + camera.recomputeEye(); + updateCamera(); + return; + } + + glm::vec4 y = glm::vec4(0,1,0,0); // y axis vector + glm::vec4 diff(mouseState.delX / _w, mouseState.delY / _h, 0, 0); // mouse offset + float a = (float) acos(glm::dot(y, diff) / (glm::length(y) * (glm::length(diff)))); // calculate offset angle from y axis + glm::vec4 para; // parallel axis to mouse movement + if (diff[0] > 0) { + para = glm::mat4_cast(glm::angleAxis(-a, glm::vec3(camera.look[0], camera.look[1], camera.look[2]))) * glm::vec4(camera.up, 1); + } else { + para = glm::mat4_cast(glm::angleAxis(a, glm::vec3(camera.look[0], camera.look[1], camera.look[2]))) * glm::vec4(camera.up, 1); + } + glm::vec3 perp = glm::normalize(glm::cross(camera.look, glm::vec3(para))); // perpendicular axis to mouse movement + + // rotate camera on perpendicular axis + glm::mat4 rot = glm::mat4_cast(glm::angleAxis(-2*PI*glm::length(diff), perp)); + camera.rotation = rot * camera.rotation; + camera.up = glm::vec3(rot * glm::vec4(camera.up, 0)); + camera.right = glm::vec3(rot * glm::vec4(camera.right, 0)); + camera.look = glm::vec3(rot * glm::vec4(camera.look, 0)); + camera.recomputeEye(); + updateCamera(); + } + }); + + inputHandler.registerWindowListener([&](int w, int h){ + _w = w; + _h = h; + glViewport(0, 0, w, h); + camera.resize(w, h); + updateCamera(); + }); +} + +//https://danielbeard.wordpress.com/2011/06/06/image-saving-code-c/ +void Window::saveImage(const std::string &filename) { + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glViewport(0,0,_w,_h); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + for (Painter* painter : _painters) { + painter->draw(); + } +// std::vector pixels(this->_w*this->_h*4); + + glReadPixels(0,0,_w,_h, GL_BGRA, GL_UNSIGNED_BYTE, &(pixels[0])); + + std::ofstream o(filename.c_str(), std::ios::out | std::ios::binary); + o.put(0); + o.put(0); + o.put(2); /* uncompressed RGB */ + o.put(0); o.put(0); + o.put(0); o.put(0); + o.put(0); + o.put(0); o.put(0); /* X origin */ + o.put(0); o.put(0); /* y origin */ + o.put((_w & 0x00FF)); + o.put((_w & 0xFF00) / 256); + o.put((_h & 0x00FF)); + o.put((_h & 0xFF00) / 256); + o.put(32); /* 24 bit bitmap */ + o.put(0); + + for (int i=0;i<_w*_h*4;i+=4) { +// std::cout << (unsigned int)pixels[i] << "," << (unsigned int)pixels[i+1] << "," << (unsigned int)pixels[i+2] << "," << (unsigned int)pixels[i+3] << std::endl; + o.put(pixels[i+0]); + o.put(pixels[i+1]); + o.put(pixels[i+2]); + o.put(255); + } + + o.close(); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + +void Window::initloop(std::function predraw) { + GLuint vao; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + + for (Painter* painter : _painters) { + painter->setViewProj(glm::value_ptr(camera.viewProj())); + } + + glClearColor(0.2f, 0.2f, 0.2f, 1.f); + glEnable(GL_DEPTH_TEST); + + while (!glfwWindowShouldClose(_window)) { + predraw(); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glBindVertexArray(vao); + + for (Painter* painter : _painters) { + painter->draw(); + } + + +// TwDraw(); + + glfwSwapBuffers(_window); + glfwPollEvents(); + } + +// TwTerminate(); + + glfwDestroyWindow(_window); + glfwTerminate(); +} + +void Window::handleMouseInput(InputHandler::MouseState &mouseState) { + +} + +void Window::addPainter(Painter *painter) { + _painters.push_back(painter); +} + +void Window::removePainter(Painter *painter) { + _painters.erase(std::remove(_painters.begin(), _painters.end(), painter), _painters.end()); +} + +void Window::updateCamera() { + camera.recompute(); + // send camera uniforms to painters + for (Painter* painter : _painters) { + painter->setViewProj(glm::value_ptr(camera.viewProj())); + } +} diff --git a/core/display/Window.h b/core/display/Window.h new file mode 100644 index 00000000..e8881ab4 --- /dev/null +++ b/core/display/Window.h @@ -0,0 +1,49 @@ +// +// Created by austin on 2/25/16. +// + +#ifndef FLUID_SIMULATOR_WINDOW_H +#define FLUID_SIMULATOR_WINDOW_H + +#include +#include +#include "InputHandler.h" +#include +#include +#include +#include + +class Window { +public: + Window(const char* title); + Window(int w = 1200, int h = 800); + Window(int w, int h, const char* title); + ~Window(); + void initloop(std::function predraw = [](){}); + + void saveImage(const std::string &filename); + + void addPainter(Painter* painter); + void removePainter(Painter* painter); + + void initializeTweakBar(); + TwButtonCallback loadSceneCB; + +private: + GLFWwindow* _window; + void setupInputCBs(); + void handleMouseInput(InputHandler::MouseState &mouseState); + std::vector _painters; + Camera camera; + void updateCamera(); + + GLuint texture; + GLuint fbo; + std::vector pixels; + + int _w; + int _h; +}; + + +#endif //FLUID_SIMULATOR_WINDOW_H diff --git a/core/display/painters/BoxPainter.cpp b/core/display/painters/BoxPainter.cpp new file mode 100644 index 00000000..daf93e00 --- /dev/null +++ b/core/display/painters/BoxPainter.cpp @@ -0,0 +1,114 @@ +// +// Created by austin on 2/29/16. +// + +#include "BoxPainter.h" +#include +#include + +struct vert { + glm::vec3 pos; + glm::vec3 col; +}; + +BoxPainter::BoxPainter(Box *box) : _box(box) { + GLuint vert = compileShader(flat_vert, GL_VERTEX_SHADER); + GLuint frag = compileShader(flat_frag, GL_FRAGMENT_SHADER); + + std::vector programs = {vert, frag}; + prog = makeProgram(programs); + + unifViewProj = glGetUniformLocation(prog, "u_viewProj"); + attrPos = glGetAttribLocation(prog, "v_pos"); + attrCol = glGetAttribLocation(prog, "v_col"); + + glGenBuffers(1, &vertex_buffer); + glGenBuffers(1, &index_buffer); + + create(); +} + +void BoxPainter::update() { + if (_box != nullptr) { + create(); + destroy(); + } +} + +void BoxPainter::draw() const { + if (_box != nullptr) { + glUseProgram(prog); + + glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); + + glEnableVertexAttribArray(attrPos); + glVertexAttribPointer(attrPos, 3, GL_FLOAT, GL_FALSE, sizeof(vert), (void*)offsetof(vert, pos)); + + glEnableVertexAttribArray(attrCol); + glVertexAttribPointer(attrCol, 3, GL_FLOAT, GL_FALSE, sizeof(vert), (void*)offsetof(vert, col)); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer); + glDrawElements(GL_LINES, 24, GL_UNSIGNED_INT, (void*)0); + + glDisableVertexAttribArray(attrPos); + glDisableVertexAttribArray(attrCol); + } +} + +void BoxPainter::create() { + if (_box != nullptr) { + vert verts[8]; + GLuint indices[24]; + + float minX = _box->minX(); + float maxX = _box->maxX(); + float minY = _box->minY(); + float maxY = _box->maxY(); + float minZ = _box->minZ(); + float maxZ = _box->maxZ(); + + verts[0].pos = glm::vec3(minX, minY, minZ); + verts[1].pos = glm::vec3(minX, maxY, minZ); + verts[2].pos = glm::vec3(minX, maxY, maxZ); + verts[3].pos = glm::vec3(minX, minY, maxZ); + verts[4].pos = glm::vec3(maxX, minY, minZ); + verts[5].pos = glm::vec3(maxX, maxY, minZ); + verts[6].pos = glm::vec3(maxX, maxY, maxZ); + verts[7].pos = glm::vec3(maxX, minY, maxZ); + + indices[0] = 0; indices[1] = 1; + indices[2] = 1; indices[3] = 2; + indices[4] = 2; indices[5] = 3; + indices[6] = 3; indices[7] = 0; + + indices[8] = 4; indices[9] = 5; + indices[10] = 5; indices[11] = 6; + indices[12] = 6; indices[13] = 7; + indices[14] = 7; indices[15] = 4; + + indices[16] = 0; indices[17] = 4; + indices[18] = 1; indices[19] = 5; + indices[20] = 2; indices[21] = 6; + indices[22] = 3; indices[23] = 7; + + for (int i = 0; i < 8; ++i) { + verts[i].col = glm::vec3(1,1,1); + } + + glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, 8 * sizeof(vert), verts, GL_STATIC_DRAW); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, 24 * sizeof(GLuint), indices, GL_STATIC_DRAW); + } +} + +void BoxPainter::destroy() { + glDeleteBuffers(1, &vertex_buffer); + glDeleteBuffers(1, &index_buffer); +} + +void BoxPainter::setViewProj(const float *viewProj) { + glUseProgram(prog); + glUniformMatrix4fv(unifViewProj, 1, GL_FALSE, viewProj); +} diff --git a/core/display/painters/BoxPainter.h b/core/display/painters/BoxPainter.h new file mode 100644 index 00000000..a162eeb1 --- /dev/null +++ b/core/display/painters/BoxPainter.h @@ -0,0 +1,33 @@ +// +// Created by austin on 2/29/16. +// + +#ifndef FLUIDSOLVER_BOXPAINTER_H +#define FLUIDSOLVER_BOXPAINTER_H + +#include "Painter.h" +#include + +class BoxPainter : public Painter { +public: + BoxPainter(Box* box); + void update(); + void draw() const; + virtual void setViewProj(const float* viewProj); + +private: + GLuint vertex_buffer; + GLuint index_buffer; + + GLint unifViewProj; + GLint attrPos; + GLint attrCol; + + Box* _box; + + void create(); + void destroy(); +}; + + +#endif //FLUIDSOLVER_BOXPAINTER_H diff --git a/core/display/painters/GridScalarAttributePainter.cpp b/core/display/painters/GridScalarAttributePainter.cpp new file mode 100644 index 00000000..2fd1c3df --- /dev/null +++ b/core/display/painters/GridScalarAttributePainter.cpp @@ -0,0 +1,159 @@ +// +// Created by austin on 3/28/16. +// + +#include "GridScalarAttributePainter.h" +#include +#include +#include + +GridScalarAttributePainter::GridScalarAttributePainter( + Grid *grid, float rangeStart, float rangeEnd, + float ptSizeStart, float ptSizeEnd, const glm::vec3 &colorStart, + const glm::vec3 &colorEnd) + : + GridScalarAttributePainter(grid, rangeStart, rangeEnd, ptSizeStart, ptSizeEnd, colorStart, colorEnd, FLOAT) { + +} + +GridScalarAttributePainter::GridScalarAttributePainter( + Grid *grid, float rangeStart, float rangeEnd, + float ptSizeStart, float ptSizeEnd, const glm::vec3 &colorStart, + const glm::vec3 &colorEnd) + : + GridScalarAttributePainter(grid, rangeStart, rangeEnd, ptSizeStart, ptSizeEnd, colorStart, colorEnd, INT) { + +} + +template +GridScalarAttributePainter::GridScalarAttributePainter(Grid *grid, float rangeStart, float rangeEnd, + float ptSizeStart, float ptSizeEnd, const glm::vec3 &colorStart, + const glm::vec3 &colorEnd, Type type) : type(type) { + + if (type == FLOAT) { + _attributesF = &dynamic_cast*>(grid)->_contents; + MAX_ATTRIBUTES = _attributesF->size(); + } else if (type == INT) { + _attributesI = &dynamic_cast*>(grid)->_contents; + MAX_ATTRIBUTES = _attributesI->size(); + } + + std::vector indices; + for (int i = 0; i < MAX_ATTRIBUTES; i++) { + indices.push_back(i); + } + + // compile shaders + GLuint gridScalVert = compileShader(gridScal_vert, GL_VERTEX_SHADER); + GLuint gridScalFrag = compileShader(gridScal_frag, GL_FRAGMENT_SHADER); + + std::vector programs = {gridScalVert, gridScalFrag}; + prog = makeProgram(programs); + + // setup shader locations + unifViewProj = glGetUniformLocation(prog, "u_viewProj"); + if (type == FLOAT) { + attrData = glGetAttribLocation(prog, "f_data"); + } else if (type == INT) { + attrData = glGetAttribLocation(prog, "i_data"); + } + + unifColStart = glGetUniformLocation(prog, "u_colStart"); + unifColEnd = glGetUniformLocation(prog, "u_colEnd"); + unifSizeStart = glGetUniformLocation(prog, "u_sizeStart"); + unifSizeEnd = glGetUniformLocation(prog, "u_sizeEnd"); + unifRangeStart = glGetUniformLocation(prog, "u_rangeStart"); + unifRangeEnd = glGetUniformLocation(prog, "u_rangeEnd"); + unifType = glGetUniformLocation(prog, "u_type"); + + unifCellSize = glGetUniformLocation(prog, "u_cellSize"); + unifCellCount = glGetUniformLocation(prog, "u_cellCount"); + unifOrigin = glGetUniformLocation(prog, "u_origin"); + + // make a buffer for the indices + glGenBuffers(1, &index_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, MAX_ATTRIBUTES * sizeof(int), indices.data(), GL_STATIC_DRAW); + + // make a buffer for the attributes + if (type == FLOAT) { + glGenBuffers(1, &attribute_buffer); + glBindBuffer(GL_ARRAY_BUFFER, attribute_buffer); + glBufferData(GL_ARRAY_BUFFER, MAX_ATTRIBUTES * sizeof(float), NULL, GL_STREAM_DRAW); + } else if (type == INT) { + glGenBuffers(1, &attribute_buffer); + glBindBuffer(GL_ARRAY_BUFFER, attribute_buffer); + glBufferData(GL_ARRAY_BUFFER, MAX_ATTRIBUTES * sizeof(int), NULL, GL_STREAM_DRAW); + } + + // set grid uniforms + glUseProgram(prog); + if (type == FLOAT) { + glUniform1i(unifType, 0); + } else if (type == INT) { + glUniform1i(unifType, 1); + } + glUniform1f(unifCellSize, grid->_cellSize); + glm::ivec3 count = glm::ivec3(grid->_countX, grid->_countY, grid->_countZ); + glUniform3iv(unifCellCount, 1, &(count.x)); + glm::vec3 o = grid->_origin + grid->_offset; + glUniform3fv(unifOrigin, 1, &(o[0])); + + glUniform3fv(unifColStart, 1, &(colorStart[0])); + glUniform3fv(unifColEnd, 1, &(colorEnd[0])); + glUniform1f(unifRangeStart, rangeStart); + glUniform1f(unifRangeEnd, rangeEnd); + glUniform1f(unifSizeStart, ptSizeStart); + glUniform1f(unifSizeEnd, ptSizeEnd); +} + +void GridScalarAttributePainter::draw() const { + if (type == FLOAT) { + if (_attributesF != nullptr) { +// for (float f : *_attributesF) { +// if (!fequal(f, 0.f) && f < 0) { +// std:: cout << f << std::endl; +// } +// } + + glUseProgram(prog); + glEnable(GL_PROGRAM_POINT_SIZE); + + // bind and send new data + glBindBuffer(GL_ARRAY_BUFFER, attribute_buffer); + glBufferData(GL_ARRAY_BUFFER, MAX_ATTRIBUTES * sizeof(float), NULL, GL_STREAM_DRAW); + glBufferSubData(GL_ARRAY_BUFFER, 0, MAX_ATTRIBUTES * sizeof(float), _attributesF->data()); + + glEnableVertexAttribArray(attrData); + glVertexAttribPointer(attrData, 1, GL_FLOAT, GL_FALSE, sizeof(float), (void*)0); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer); + glDrawElements(GL_POINTS, _attributesF->size(), GL_UNSIGNED_INT, 0); + + glDisableVertexAttribArray(attrData); + } + } else if (type == INT) { + if (_attributesI != nullptr) { + glUseProgram(prog); + glEnable(GL_PROGRAM_POINT_SIZE); + + // bind and send new data + glBindBuffer(GL_ARRAY_BUFFER, attribute_buffer); + glBufferData(GL_ARRAY_BUFFER, MAX_ATTRIBUTES * sizeof(int), NULL, GL_STREAM_DRAW); + glBufferSubData(GL_ARRAY_BUFFER, 0, MAX_ATTRIBUTES * sizeof(int), _attributesI->data()); + + glEnableVertexAttribArray(attrData); + glVertexAttribIPointer(attrData, 1, GL_INT, sizeof(int), (void*)0); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer); + glDrawElements(GL_POINTS, _attributesI->size(), GL_UNSIGNED_INT, 0); + + glDisableVertexAttribArray(attrData); + } + } +} + +void GridScalarAttributePainter::setViewProj(const float *viewProj) { + glUseProgram(prog); + glUniformMatrix4fv(unifViewProj, 1, GL_FALSE, viewProj); +} diff --git a/core/display/painters/GridScalarAttributePainter.h b/core/display/painters/GridScalarAttributePainter.h new file mode 100644 index 00000000..b96024f4 --- /dev/null +++ b/core/display/painters/GridScalarAttributePainter.h @@ -0,0 +1,74 @@ +// +// Created by austin on 3/28/16. +// + +#ifndef FLUIDSOLVER_GRIDSCALARATTRIBUTEPAINTER_H +#define FLUIDSOLVER_GRIDSCALARATTRIBUTEPAINTER_H + +#include "Painter.h" +#include + +class GridScalarAttributePainter : public Painter { +public: + enum Type { + INT, + FLOAT + }; + + template explicit GridScalarAttributePainter(Grid* grid, + float rangeStart, + float rangeEnd, + float ptSizeStart, + float ptSizeEnd, + const glm::vec3 &colorStart, + const glm::vec3 &colorEnd, + Type type); + + explicit GridScalarAttributePainter(Grid* grid, + float rangeStart, + float rangeEnd, + float ptSizeStart, + float ptSizeEnd, + const glm::vec3 &colorStart, + const glm::vec3 &colorEnd); + + explicit GridScalarAttributePainter(Grid* grid, + float rangeStart, + float rangeEnd, + float ptSizeStart, + float ptSizeEnd, + const glm::vec3 &colorStart, + const glm::vec3 &colorEnd); + + virtual void draw() const; + virtual void setViewProj(const float* viewProj); + +private: + Type type; + unsigned int MAX_ATTRIBUTES = 10000; + GLuint index_buffer; + GLuint attribute_buffer; + + GLint attrIndex; + GLint attrData; + + GLint unifViewProj; + + GLint unifColStart; + GLint unifColEnd; + GLint unifSizeStart; + GLint unifSizeEnd; + GLint unifRangeStart; + GLint unifRangeEnd; + GLint unifType; + + GLint unifCellSize; + GLint unifCellCount; + GLint unifOrigin; + + std::vector* _attributesF; + std::vector* _attributesI; +}; + + +#endif //FLUIDSOLVER_GRIDSCALARATTRIBUTEPAINTER_H diff --git a/core/display/painters/GridVectorAttributePainter.cpp b/core/display/painters/GridVectorAttributePainter.cpp new file mode 100644 index 00000000..e7f7121f --- /dev/null +++ b/core/display/painters/GridVectorAttributePainter.cpp @@ -0,0 +1,83 @@ +// +// Created by austin on 3/22/16. +// + +#include "GridVectorAttributePainter.h" +#include +#include +#include + +GridVectorAttributePainter::GridVectorAttributePainter(Grid *grid, + float ptSize, + const glm::vec3 &color, + const glm::vec3 &dir) : + _ptSize(ptSize), _attributes(&grid->_contents) { + MAX_ATTRIBUTES = (unsigned int) _attributes->size(); + + std::vector indices; + for (int i = 0; i < _attributes->size(); i++) { + indices.push_back(i); + } + + // compile shaders + GLuint gridAttrVert = compileShader(gridAttr_vert, GL_VERTEX_SHADER); + GLuint gridAttrGeo = compileShader(gridAttr_geo, GL_GEOMETRY_SHADER); + GLuint gridAttrFrag = compileShader(gridAttr_frag, GL_FRAGMENT_SHADER); + + std::vector programs = {gridAttrVert, gridAttrGeo, gridAttrFrag}; + prog = makeProgram(programs); + + // setup shader locations + unifViewProj = glGetUniformLocation(prog, "u_viewProj"); + attrData = glGetAttribLocation(prog, "v_data"); + unifCol = glGetUniformLocation(prog, "u_col"); + unifCellSize = glGetUniformLocation(prog, "u_cellSize"); + unifCellCount = glGetUniformLocation(prog, "u_cellCount"); + unifOrigin = glGetUniformLocation(prog, "u_origin"); + unifVec = glGetUniformLocation(prog, "u_vec"); + + // make a buffer for the indices + glGenBuffers(1, &index_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, MAX_ATTRIBUTES * sizeof(int), indices.data(), GL_STATIC_DRAW); + + // make a buffer for the attributes + glGenBuffers(1, &attribute_buffer); + glBindBuffer(GL_ARRAY_BUFFER, attribute_buffer); + glBufferData(GL_ARRAY_BUFFER, MAX_ATTRIBUTES * sizeof(float), NULL, GL_STREAM_DRAW); + + // set grid uniforms + glUseProgram(prog); + glUniform1f(unifCellSize, grid->_cellSize); + glm::ivec3 count = glm::ivec3(grid->_countX, grid->_countY, grid->_countZ); + glUniform3iv(unifCellCount, 1, &(count.x)); + glm::vec3 o = grid->_origin + grid->_offset; + glUniform3fv(unifOrigin, 1, &(o[0])); + glUniform3fv(unifCol, 1, &(color[0])); + glUniform3fv(unifVec, 1, &(dir[0])); +} + +void GridVectorAttributePainter::draw() const { + if (_attributes != nullptr) { + glUseProgram(prog); + + // bind and send new data + glBindBuffer(GL_ARRAY_BUFFER, attribute_buffer); + glBufferData(GL_ARRAY_BUFFER, MAX_ATTRIBUTES * sizeof(float), NULL, GL_STREAM_DRAW); + glBufferSubData(GL_ARRAY_BUFFER, 0, MAX_ATTRIBUTES * sizeof(float), _attributes->data()); + + glEnableVertexAttribArray(attrData); + glVertexAttribPointer(attrData, 1, GL_FLOAT, GL_FALSE, sizeof(float), (void*)0); + + glPointSize(_ptSize); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer); + glDrawElements(GL_POINTS, _attributes->size(), GL_UNSIGNED_INT, 0); + + glDisableVertexAttribArray(attrData); + } +} + +void GridVectorAttributePainter::setViewProj(const float *viewProj) { + glUseProgram(prog); + glUniformMatrix4fv(unifViewProj, 1, GL_FALSE, viewProj); +} diff --git a/core/display/painters/GridVectorAttributePainter.h b/core/display/painters/GridVectorAttributePainter.h new file mode 100644 index 00000000..68042ddd --- /dev/null +++ b/core/display/painters/GridVectorAttributePainter.h @@ -0,0 +1,36 @@ +// +// Created by austin on 3/22/16. +// + +#ifndef FLUIDSOLVER_GRIDVECTORATTRIBUTEPAINTER_H +#define FLUIDSOLVER_GRIDVECTORATTRIBUTEPAINTER_H + +#include "Painter.h" +#include + +class GridVectorAttributePainter : public Painter { +public: + explicit GridVectorAttributePainter(Grid* grid, float ptSize, const glm::vec3 &color, const glm::vec3 &dir); + virtual void draw() const; + virtual void setViewProj(const float* viewProj); + +private: + unsigned int MAX_ATTRIBUTES = 10000; + GLuint index_buffer; + GLuint attribute_buffer; + + GLint unifViewProj; + GLint attrIndex; + GLint attrData; + GLint unifCol; + GLint unifCellSize; + GLint unifCellCount; + GLint unifOrigin; + GLint unifVec; + + GLfloat _ptSize; + std::vector* _attributes; +}; + + +#endif //FLUIDSOLVER_GRIDATTRIBUTEPAINTER_H diff --git a/core/display/painters/Painter.cpp b/core/display/painters/Painter.cpp new file mode 100644 index 00000000..a5c0d0ab --- /dev/null +++ b/core/display/painters/Painter.cpp @@ -0,0 +1,62 @@ +// +// Created by austin on 2/29/16. +// + +#include "Painter.h" +#include + +GLuint Painter::compileShader(const char* shader, GLenum type) { + GLuint shaderId = glCreateShader(type); + glShaderSourceARB(shaderId, 1, &shader, NULL); + glCompileShader(shaderId); + + GLint success = 0; + glGetShaderiv(shaderId, GL_COMPILE_STATUS, &success); + if (success == GL_FALSE) { + fprintf(stderr, "Failed to compile shader!\n%s\n", shader); + + GLint maxLength = 0; + glGetShaderiv(shaderId, GL_INFO_LOG_LENGTH, &maxLength); + + std::vector errorLog(maxLength); + glGetShaderInfoLog(shaderId, maxLength, &maxLength, &errorLog[0]); + + fprintf(stderr, "%s\n", &errorLog[0]); + + glDeleteShader(shaderId); + } + + return shaderId; +} + +GLuint Painter::makeProgram(const std::vector &programs) { + GLuint prog = glCreateProgram(); + for (GLuint program : programs) { + glAttachShader(prog, program); + } + glLinkProgram(prog); + + for (GLuint program : programs) { + glDetachShader(prog, program); + glDeleteShader(program); + } + + GLint linked; + glGetProgramiv(prog, GL_LINK_STATUS, &linked); + if (!linked) { + std::cerr << "Failed to link program!" << std::endl; + + GLint length; + glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &length); + if ( length > 0 ){ + std::vector ProgramErrorMessage(length+1); + glGetProgramInfoLog(prog, length, NULL, &ProgramErrorMessage[0]); + fprintf(stderr, "%s\n", &ProgramErrorMessage[0]); + } + } + return prog; +} + +void Painter::setViewProj(const float *viewProj) { + +} diff --git a/core/display/painters/Painter.h b/core/display/painters/Painter.h new file mode 100644 index 00000000..42d2a714 --- /dev/null +++ b/core/display/painters/Painter.h @@ -0,0 +1,24 @@ +// +// Created by austin on 2/29/16. +// + +#ifndef FLUIDSOLVER_PAINTER_H +#define FLUIDSOLVER_PAINTER_H + +#include +#include + +class Painter { +public: + virtual void draw() const = 0; + virtual void setViewProj(const float* viewProj); + +protected: + GLuint prog; + + GLuint compileShader(const char* shader, GLenum type); + GLuint makeProgram(const std::vector &programs); +}; + + +#endif //FLUIDSOLVER_PAINTER_H diff --git a/core/display/painters/ParticlesPainter.cpp b/core/display/painters/ParticlesPainter.cpp new file mode 100644 index 00000000..fc7bbac2 --- /dev/null +++ b/core/display/painters/ParticlesPainter.cpp @@ -0,0 +1,64 @@ +// +// Created by austin on 2/29/16. +// + +#include "ParticlesPainter.h" +#include +#include + +ParticlesPainter::ParticlesPainter(FluidSolver* solver, float ptSize) : _ptSize(ptSize), _particles(&solver->_particles) { + MAX_PARTICLES = (unsigned int) _particles->size(); + + // compile shaders + GLuint particleVert = compileShader(particle_vert, GL_VERTEX_SHADER); + GLuint particleFrag = compileShader(particle_frag, GL_FRAGMENT_SHADER); + + std::vector programs = {particleVert, particleFrag}; + prog = makeProgram(programs); + + // setup shader locations + unifViewProj = glGetUniformLocation(prog, "u_viewProj"); + attrPos = glGetAttribLocation(prog, "v_pos"); + attrVel = glGetAttribLocation(prog, "v_vel"); + attrCol = glGetAttribLocation(prog, "v_col"); + + // make a buffer for the particles + glGenBuffers(1, &particle_buffer); + glBindBuffer(GL_ARRAY_BUFFER, particle_buffer); + glBufferData(GL_ARRAY_BUFFER, MAX_PARTICLES * sizeof(FluidParticle), NULL, GL_STREAM_DRAW); +} + +void ParticlesPainter::draw() const { + if (_particles != nullptr) { + glUseProgram(prog); + + // bind and send new data + glBindBuffer(GL_ARRAY_BUFFER, particle_buffer); + glBufferData(GL_ARRAY_BUFFER, MAX_PARTICLES * sizeof(FluidParticle), NULL, GL_STREAM_DRAW); + glBufferSubData(GL_ARRAY_BUFFER, 0, MAX_PARTICLES * sizeof(FluidParticle), &((*_particles)[0])); + + // particle positions, offset by pos attribute, jumping by FluidParticle size + glEnableVertexAttribArray(attrPos); + glVertexAttribPointer(attrPos, 3, GL_FLOAT, GL_FALSE, sizeof(FluidParticle), (void*)offsetof(FluidParticle, pos)); + + // particle velocities, offset by vel attribute + glEnableVertexAttribArray(attrVel); + glVertexAttribPointer(attrVel, 3, GL_FLOAT, GL_FALSE, sizeof(FluidParticle), (void*)offsetof(FluidParticle, vel)); + + // particle colors, offset by col attribute + glEnableVertexAttribArray(attrCol); + glVertexAttribPointer(attrCol, 3, GL_FLOAT, GL_FALSE, sizeof(FluidParticle), (void*)offsetof(FluidParticle, col)); + + glPointSize(_ptSize); + glDrawArrays(GL_POINTS, 0, (GLsizei) _particles->size()); + + glDisableVertexAttribArray(attrPos); + glDisableVertexAttribArray(attrVel); + glDisableVertexAttribArray(attrCol); + } +} + +void ParticlesPainter::setViewProj(const float *viewProj) { + glUseProgram(prog); + glUniformMatrix4fv(unifViewProj, 1, GL_FALSE, viewProj); +} diff --git a/core/display/painters/ParticlesPainter.h b/core/display/painters/ParticlesPainter.h new file mode 100644 index 00000000..2b1cd41b --- /dev/null +++ b/core/display/painters/ParticlesPainter.h @@ -0,0 +1,31 @@ +// +// Created by austin on 2/29/16. +// + +#ifndef FLUIDSOLVER_PARTICLESPAINTER_H +#define FLUIDSOLVER_PARTICLESPAINTER_H + +#include "Painter.h" +#include + +class ParticlesPainter : public Painter { +public: + ParticlesPainter(FluidSolver* solver, float ptSize = 3.f); + virtual void draw() const; + virtual void setViewProj(const float* viewProj); + +private: + unsigned int MAX_PARTICLES = 10000; + GLuint particle_buffer; + + GLint unifViewProj; + GLint attrPos; + GLint attrVel; + GLint attrCol; + + GLfloat _ptSize; + std::vector* _particles; +}; + + +#endif //FLUIDSOLVER_PARTICLESPAINTER_H diff --git a/core/display/shaders/flat.frag.h b/core/display/shaders/flat.frag.h new file mode 100644 index 00000000..6e0ac8b7 --- /dev/null +++ b/core/display/shaders/flat.frag.h @@ -0,0 +1,14 @@ +// +// Created by austin on 2/29/16. +// + +const char* flat_frag = R"( +#version 150 + +in vec3 f_col; +out vec4 out_Col; + +void main() { + out_Col = vec4(f_col.rgb, 1); +} +)"; diff --git a/core/display/shaders/flat.vert.h b/core/display/shaders/flat.vert.h new file mode 100644 index 00000000..cfb8ade4 --- /dev/null +++ b/core/display/shaders/flat.vert.h @@ -0,0 +1,24 @@ +// +// Created by austin on 2/29/16. +// + +#ifndef FLUIDSOLVER_FLAT_VERT_H_H +#define FLUIDSOLVER_FLAT_VERT_H_H + +const char* flat_vert = R"( +#version 150 + +uniform mat4 u_viewProj; + +in vec3 v_pos; +in vec3 v_col; + +out vec3 f_col; + +void main() { + f_col = v_col; + gl_Position = u_viewProj * vec4(v_pos, 1); +} +)"; + +#endif //FLUIDSOLVER_FLAT_VERT_H_H \ No newline at end of file diff --git a/core/display/shaders/gridAttr.frag.h b/core/display/shaders/gridAttr.frag.h new file mode 100644 index 00000000..9c6910aa --- /dev/null +++ b/core/display/shaders/gridAttr.frag.h @@ -0,0 +1,19 @@ +// +// Created by austin on 2/28/16. +// + +#ifndef FLUIDSOLVER_GRIDATTR_FRAG_H +#define FLUIDSOLVER_GRIDATTR_FRAG_H + +const char* gridAttr_frag = R"( +#version 150 + +uniform vec3 u_col; +out vec4 out_Col; +in float f_scale; + +void main() { + out_Col = vec4(f_scale*u_col.rgb, 1); +} +)"; +#endif \ No newline at end of file diff --git a/core/display/shaders/gridAttr.geo.h b/core/display/shaders/gridAttr.geo.h new file mode 100644 index 00000000..b17c128b --- /dev/null +++ b/core/display/shaders/gridAttr.geo.h @@ -0,0 +1,32 @@ +// +// Created by austin on 3/22/16. +// + +#ifndef FLUIDSOLVER_GRIDATTR_GEO_H +#define FLUIDSOLVER_GRIDATTR_GEO_H + +const char* gridAttr_geo = R"( +#version 150 + +layout(points) in; +layout(line_strip, max_vertices = 2) out; + +uniform mat4 u_viewProj; +uniform vec3 u_vec; + +in float g_data[]; +out float f_scale; + +void main() { + f_scale = abs(g_data[0]); + + gl_Position = u_viewProj * gl_in[0].gl_Position; + EmitVertex(); + + gl_Position = u_viewProj * (gl_in[0].gl_Position + g_data[0]*vec4(u_vec, 0)); + EmitVertex(); + EndPrimitive(); +} +)"; + +#endif //FLUIDSOLVER_GRIDATTR_GEO_H diff --git a/core/display/shaders/gridAttr.vert.h b/core/display/shaders/gridAttr.vert.h new file mode 100644 index 00000000..b524d604 --- /dev/null +++ b/core/display/shaders/gridAttr.vert.h @@ -0,0 +1,32 @@ +// +// Created by austin on 2/28/16. +// + +#ifndef FLUIDSOLVER_GRIDATTR_VERT_H +#define FLUIDSOLVER_GRIDATTR_VERT_H + +const char* gridAttr_vert = R"( +#version 150 + +uniform float u_cellSize; +uniform ivec3 u_cellCount; +uniform vec3 u_origin; + +in float v_data; +out float g_data; + +void main() { + g_data = v_data; + + int i = int(mod(gl_VertexID, u_cellCount.x)); + int j = int(mod(gl_VertexID / u_cellCount.x, u_cellCount.y)); + int k = int(gl_VertexID / (u_cellCount.x * u_cellCount.y)); + + vec3 pos = vec3(float(i), float(j), float(k)) * u_cellSize + u_origin; + + gl_Position = vec4(pos, 1); +} +)"; + + +#endif \ No newline at end of file diff --git a/core/display/shaders/gridScal.frag.h b/core/display/shaders/gridScal.frag.h new file mode 100644 index 00000000..63b352fb --- /dev/null +++ b/core/display/shaders/gridScal.frag.h @@ -0,0 +1,22 @@ +// +// Created by austin on 3/28/16. +// + +#ifndef FLUIDSOLVER_GRIDSCAL_FRAG_H +#define FLUIDSOLVER_GRIDSCAL_FRAG_H + +const char* gridScal_frag = R"( +#version 150 + +uniform vec3 u_colStart; +uniform vec3 u_colEnd; + +in float amount; +out vec4 out_Col; + +void main() { + out_Col = vec4((1 - amount)*u_colStart + amount*u_colEnd, 1); +} +)"; + +#endif //FLUIDSOLVER_GRIDSCAL_FRAG_H diff --git a/core/display/shaders/gridScal.vert.h b/core/display/shaders/gridScal.vert.h new file mode 100644 index 00000000..19861d6f --- /dev/null +++ b/core/display/shaders/gridScal.vert.h @@ -0,0 +1,48 @@ +// +// Created by austin on 3/28/16. +// + +#ifndef FLUIDSOLVER_GRIDSCAL_VERT_H +#define FLUIDSOLVER_GRIDSCAL_VERT_H + +const char* gridScal_vert = R"( +#version 150 + +uniform mat4 u_viewProj; + +uniform float u_cellSize; +uniform ivec3 u_cellCount; +uniform vec3 u_origin; + +uniform float u_sizeStart; +uniform float u_sizeEnd; +uniform float u_rangeStart; +uniform float u_rangeEnd; + +uniform int u_type; + +in float f_data; +in int i_data; +out float amount; + +void main() { + + int i = int(mod(gl_VertexID, u_cellCount.x)); + int j = int(mod(gl_VertexID / u_cellCount.x, u_cellCount.y)); + int k = int(gl_VertexID / (u_cellCount.x * u_cellCount.y)); + + vec3 pos = vec3(float(i), float(j), float(k)) * u_cellSize + u_origin; + + gl_Position = u_viewProj * vec4(pos, 1); + + if (u_type > 0) { + amount = (float(i_data) - u_rangeStart) / (u_rangeEnd - u_rangeStart); + } else { + amount = (f_data - u_rangeStart) / (u_rangeEnd - u_rangeStart); + } + + gl_PointSize = u_sizeStart + amount * (u_sizeEnd - u_sizeStart); +} +)"; + +#endif //FLUIDSOLVER_GRIDSCAL_VERT_H diff --git a/core/display/shaders/particle.frag.h b/core/display/shaders/particle.frag.h new file mode 100644 index 00000000..ed0d9e43 --- /dev/null +++ b/core/display/shaders/particle.frag.h @@ -0,0 +1,21 @@ +// +// Created by austin on 2/28/16. +// + +#ifndef FLUIDSOLVER_PARTICLE_FRAG_H +#define FLUIDSOLVER_PARTICLE_FRAG_H + +const char* particle_frag = R"( +#version 150 + +in vec3 f_col; +in vec3 f_vel; +out vec4 out_Col; + +void main() { + float fac = length(f_vel) / 20.0; + vec3 col = f_col + vec3(fac,fac,fac); + out_Col = vec4(col, 1); +} +)"; +#endif \ No newline at end of file diff --git a/core/display/shaders/particle.vert.h b/core/display/shaders/particle.vert.h new file mode 100644 index 00000000..d5bed065 --- /dev/null +++ b/core/display/shaders/particle.vert.h @@ -0,0 +1,28 @@ +// +// Created by austin on 2/28/16. +// + +#ifndef FLUIDSOLVER_PARTICLE_VERT_H +#define FLUIDSOLVER_PARTICLE_VERT_H + +const char* particle_vert = R"( +#version 150 + +uniform mat4 u_viewProj; + +in vec3 v_pos; +in vec3 v_vel; +in vec3 v_col; + +out vec3 f_col; +out vec3 f_vel; + +void main() { + f_col = v_col; + f_vel = v_vel; + gl_Position = u_viewProj * vec4(v_pos, 1); + gl_PointSize = 3; +} +)"; + +#endif \ No newline at end of file diff --git a/core/fileIO/ParticlesWriter.cpp b/core/fileIO/ParticlesWriter.cpp new file mode 100644 index 00000000..4f5b0eca --- /dev/null +++ b/core/fileIO/ParticlesWriter.cpp @@ -0,0 +1,54 @@ +// +// Created by austin on 3/21/16. +// + +#include "ParticlesWriter.h" +//#include +//#include +//#include + +//using namespace openvdb::tools; + +ParticlesWriter::ParticlesWriter() { + openvdb::initialize(); +// openvdb::points::initialize(); +} + +ParticlesWriter::~ParticlesWriter() { + +} + +void ParticlesWriter::writeData(const FluidSolver* const solver, const std::string &filename) { +// openvdb::FloatGrid::Ptr grid = openvdb::FloatGrid::create(); +// openvdb::FloatGrid::Accessor accessor = grid->getAccessor(); +// openvdb::Coord xyz(0, 0, 0); + +// for (const FluidParticle &particle : solver->_particles) { +// xyz.reset(particle.cell.x, particle.cell.y, particle.cell.z); +// accessor.setValue(xyz, 1.f); +// } +// +// openvdb::io::File file(filename); +// openvdb::GridPtrVec grids; +// grids.push_back(grid); +// +// file.write(grids); +// file.close(); + +// const float voxelSize = 10.0f; +// openvdb::math::Transform::Ptr transform = openvdb::math::Transform::createLinearTransform(voxelSize); +// +// std::vector positions; +// for (const FluidParticle &particle : solver->_particles) { +// positions.push_back(openvdb::Vec3f(particle.pos.x, particle.pos.y, particle.pos.z)); +// } +// +// PointDataGrid::Ptr grid = createPointDataGrid(positions, TypedAttributeArray::attributeType(), *transform); +// +// openvdb::io::File file("filename"); +// openvdb::GridPtrVec grids; +// grids.push_back(grid); +// +// file.write(grids); +// file.close(); +} diff --git a/core/fileIO/ParticlesWriter.h b/core/fileIO/ParticlesWriter.h new file mode 100644 index 00000000..c2e28767 --- /dev/null +++ b/core/fileIO/ParticlesWriter.h @@ -0,0 +1,22 @@ +// +// Created by austin on 3/21/16. +// + +#ifndef FLUIDSOLVER_PARTICLESWRITER_H +#define FLUIDSOLVER_PARTICLESWRITER_H + +#include +//#include +#include + +class ParticlesWriter { + +public: + ParticlesWriter(); + ~ParticlesWriter(); + + void writeData(const FluidSolver* const solver, const std::string &filename); +}; + + +#endif //FLUIDSOLVER_PARTICLESWRITER_H diff --git a/core/fileIO/SceneLoader.cpp b/core/fileIO/SceneLoader.cpp new file mode 100644 index 00000000..be81cad9 --- /dev/null +++ b/core/fileIO/SceneLoader.cpp @@ -0,0 +1,69 @@ +// +// Created by austin on 2/26/16. +// + +#include +#include +#include "SceneLoader.h" +#include + +FluidSolver* SceneLoader::LoadScene(const char *filepath) { + Json::Reader reader; + + std::ifstream fileStream(filepath, std::ifstream::binary); + + Json::Value root; + if (filepath != nullptr) { + if (!reader.parse(fileStream, root, false)) { + fprintf(stderr, "Failed to load json file %s!", filepath); + exit(EXIT_FAILURE); + } + } else { + return LoadScene(std::string(default_scene)); + } + + return parseJson(root); +} + +FluidSolver* SceneLoader::LoadScene(const std::string &jsonstring) { + Json::Reader reader; + Json::Value root; + + if (!reader.parse(jsonstring, root, false)) { + fprintf(stderr, "Failed to load json string!\n %s", jsonstring); + exit(EXIT_FAILURE); + } + + return parseJson(root); +} + +FluidSolver* SceneLoader::parseJson(const Json::Value &root) { + Json::Value containerDim = root["containerDim"]; + Json::Value particleDim = root["particleDim"]; + Json::Value resolution = root["resolution"]; + + glm::vec3 containerSize(containerDim["scale"][0].asFloat(), + containerDim["scale"][1].asFloat(), + containerDim["scale"][2].asFloat()); + glm::vec3 containerPos(containerDim["position"][0].asFloat(), + containerDim["position"][1].asFloat(), + containerDim["position"][2].asFloat()); + + glm::vec3 fluidSize(particleDim["scale"][0].asFloat(), + particleDim["scale"][1].asFloat(), + particleDim["scale"][2].asFloat()); + glm::vec3 fluidPos(particleDim["position"][0].asFloat(), + particleDim["position"][1].asFloat(), + particleDim["position"][2].asFloat()); + + float cellSize = std::max(std::max(containerSize.x, containerSize.y), containerSize.z) / resolution.asFloat(); + + Box* container = new Box(containerPos, containerSize); +// Box* container = new Box(containerPos, containerSize + 2.f*glm::vec3(cellSize, cellSize, cellSize)); + Box fluidObject = Box(fluidPos, fluidSize); + + FluidSolver* solver = new FluidSolver(cellSize/2, cellSize); + solver->setContainer(container); + solver->addFluid(fluidObject); + return solver; +} diff --git a/core/fileIO/SceneLoader.h b/core/fileIO/SceneLoader.h new file mode 100644 index 00000000..47c60612 --- /dev/null +++ b/core/fileIO/SceneLoader.h @@ -0,0 +1,23 @@ +// +// Created by austin on 2/26/16. +// + +#ifndef FLUIDSOLVER_SCENELOADER_H +#define FLUIDSOLVER_SCENELOADER_H + +#include +#include + +class SceneLoader { +public: + static FluidSolver* LoadScene(const char* filepath); + static FluidSolver* LoadScene(const std::string &jsonstring); + + static const char * defaultScene; + +private: + static FluidSolver* parseJson(const Json::Value &root); +}; + + +#endif //FLUIDSOLVER_SCENELOADER_H diff --git a/core/geometry/Bound.cpp b/core/geometry/Bound.cpp new file mode 100644 index 00000000..2c0adc7f --- /dev/null +++ b/core/geometry/Bound.cpp @@ -0,0 +1,158 @@ +// +// Created by austin on 2/27/16. +// + +#include "Bound.h" + +Bound::Bound() {} + +Bound::Bound(const glm::vec3 ¢er, const glm::vec3 &dim) : Bound(center, dim.x, dim.y, dim.z) {} + +Bound::Bound(const glm::vec3 ¢er, float sX, float sY, float sZ) : Bound(center.x - sX / 2.f, center.y - sY / 2.f, center.z - sZ / 2.f, + center.x + sX/2.f, center.y + sY/2.f, center.z + sZ/2.f) {} + +Bound::Bound(float cX, float cY, float cZ, const glm::vec3 &dim) : Bound(cX - dim.x, cY - dim.y, cZ - dim.z, + cX + dim.x, cY + dim.y, cZ + dim.z) {} + +Bound::Bound(float minX, float minY, float minZ, float maxX, float maxY, float maxZ) + : _minX(minX), _minY(minY), _minZ(minZ), _maxX(maxX), _maxY(maxY), _maxZ(maxZ) {} + +Bound::~Bound() {} + +float Bound::minX() const { return _minX; } + +float Bound::minY() const { return _minY; } + +float Bound::minZ() const { return _minZ; } + +float Bound::maxX() const { return _maxX; } + +float Bound::maxY() const { return _maxY; } + +float Bound::maxZ() const { return _maxZ; } + +float Bound::width() const { return _maxX - _minX; } + +float Bound::height() const { return _maxY - _minY; } + +float Bound::depth() const { return _maxZ - _minZ; } + +glm::vec3 Bound::dim() const { return glm::vec3(_maxX - _minX, _maxY - _minY, _maxZ - _minZ); } + +glm::vec3 Bound::center() const { return glm::vec3((_minX + _maxX) / 2.f, (_minY + _maxY) / 2.f, (_minZ + _maxZ) / 2.f); } + +bool Bound::contains(const glm::vec3 &pt) const { + return (pt.x >= _minX && pt.x < _maxX && + pt.y >= _minY && pt.y < _maxY && + pt.z >= _minZ && pt.z < _maxZ); +} + +bool Bound::collidesPt(const glm::vec3 &pt, glm::vec3 &normal, float tolerance) const { + if (fequal(pt.x, _minX, tolerance)) { + if (pt.y >= _minY && pt.y < _maxY && pt.z >= _minZ && pt.z < _maxZ) { + if (pt.x < _minX) { + normal = glm::vec3(-1.f, 0.f, 0.f); + } else { + normal = glm::vec3(1.f, 0.f, 0.f); + } + return true; + } + } else if (fequal(pt.x, _maxX, tolerance)) { + if (pt.y >= _minY && pt.y < _maxY && pt.z >= _minZ && pt.z < _maxZ) { + if (pt.x > _maxX) { + normal = glm::vec3(1.f, 0.f, 0.f); + } else { + normal = glm::vec3(-1.f, 0.f, 0.f); + } + return true; + } + } + if (fequal(pt.y, _minY, tolerance)) { + if (pt.x >= _minX && pt.x < _maxX && pt.z >= _minZ && pt.z < _maxZ) { + if (pt.y < _minY) { + normal = glm::vec3(0.f, -1.f, 0.f); + } else { + normal = glm::vec3(0.f, 1.f, 0.f); + } + return true; + } + } else if (fequal(pt.y, _maxY, tolerance)) { + if (pt.x >= _minX && pt.x < _maxX && pt.z >= _minZ && pt.z < _maxZ) { + if (pt.y > _maxY) { + normal = glm::vec3(0.f, 1.f, 0.f); + } else { + normal = glm::vec3(0.f, -1.f, 0.f); + } + return true; + } + } + if (fequal(pt.z, _minZ, tolerance)) { + if (pt.x >= _minX && pt.x < _maxX && pt.y >= _minY && pt.y < _maxY) { + if (pt.z < _minZ) { + normal = glm::vec3(0.f, 0.f, -1.f); + } else { + normal = glm::vec3(0.f, 0.f, 1.f); + } + return true; + } + } else if (fequal(pt.z, _maxZ, tolerance)) { + if (pt.x >= _minX && pt.x < _maxX && pt.y >= _minY && pt.y < _maxY) { + if (pt.z > _maxZ) { + normal = glm::vec3(0.f, 0.f, 1.f); + } else { + normal = glm::vec3(0.f, 0.f, -1.f); + } + return true; + } + } + return false; +} + +bool Bound::collides(const glm::vec3 &prev, const glm::vec3 &next, glm::vec3 &normal) const { + if (prev.y >= _minY && prev.y < _maxY && prev.z >= _minZ && prev.z < _maxZ) { + if (prev.x > _minX && next.x <= _minX) { // cross minX plane + normal = glm::vec3(1.f, 0.f, 0.f); + return true; + } else if (prev.x < _minX && next.x >= _minX) { + normal = glm::vec3(-1.f, 0.f, 0.f); + return true; + } else if (prev.x < _maxX && next.x >= _maxX) { // cross maxX plane + normal = glm::vec3(-1.f, 0.f, 0.f); + return true; + } else if (prev.x > _maxX && next.x <= _maxX) { + normal = glm::vec3(1.f, 0.f, 0.f); + return true; + } + } + if (prev.x >= _minX && prev.x < _maxX && prev.z >= _minZ && prev.z < _maxZ) { + if (prev.y > _minY && next.y <= _minY) { // cross minY plane + normal = glm::vec3(0.f, 1.f, 0.f); + return true; + } else if (prev.y < _minY && next.y >= _minY) { + normal = glm::vec3(0.f, -1.f, 0.f); + return true; + } else if (prev.y < _maxY && next.y >= _maxY) { // cross maxY plane + normal = glm::vec3(0.f, -1.f, 0.f); + return true; + } else if (prev.y > _maxY && next.y <= _maxY) { + normal = glm::vec3(0.f, 1.f, 0.f); + return true; + } + } + if (prev.x >= _minX && prev.x < _maxX && prev.y >= _minY && prev.y < _maxY) { + if (prev.z > _minZ && next.z <= _minZ) { // cross minZ plane + normal = glm::vec3(0.f, 1.f, 0.f); + return true; + } else if (prev.z < _minZ && next.z >= _minZ) { + normal = glm::vec3(0.f, -1.f, 0.f); + return true; + } else if (prev.z < _maxZ && next.z >= _maxZ) { // cross maxZ plane + normal = glm::vec3(0.f, -1.f, 0.f); + return true; + } else if (prev.z > _maxZ && next.z <= _maxZ) { + normal = glm::vec3(0.f, 1.f, 0.f); + return true; + } + } + return false; +} \ No newline at end of file diff --git a/core/geometry/Bound.h b/core/geometry/Bound.h new file mode 100644 index 00000000..5ce97bef --- /dev/null +++ b/core/geometry/Bound.h @@ -0,0 +1,45 @@ +// +// Created by austin on 2/27/16. +// + +#ifndef FLUIDSOLVER_BOUND_H +#define FLUIDSOLVER_BOUND_H + +#include "Geo.h" + +class Bound : public Geo { +public: + Bound(); + Bound(const glm::vec3 ¢er, const glm::vec3 &dim); + Bound(const glm::vec3 ¢er, float sX, float sY, float sZ); + Bound(float cX, float cY, float cZ, const glm::vec3 &dim); + Bound(float minX, float minY, float minZ, float maxX, float maxY, float maxZ); + virtual ~Bound(); + + float minX() const; + float minY() const; + float minZ() const; + float maxX() const; + float maxY() const; + float maxZ() const; + float width() const; + float height() const; + float depth() const; + glm::vec3 dim() const; + glm::vec3 center() const; + + virtual bool contains(const glm::vec3 &pt) const; + virtual bool collidesPt(const glm::vec3 &pt, glm::vec3 &normal, float tolerance = 0.001f) const; + virtual bool collides(const glm::vec3 &prev, const glm::vec3 &next, glm::vec3 &normal) const; + +private: + float _minX; + float _minY; + float _minZ; + float _maxX; + float _maxY; + float _maxZ; +}; + + +#endif //FLUIDSOLVER_BOUND_H diff --git a/core/geometry/Box.cpp b/core/geometry/Box.cpp new file mode 100644 index 00000000..02f69ba2 --- /dev/null +++ b/core/geometry/Box.cpp @@ -0,0 +1,18 @@ +// +// Created by austin on 2/28/16. +// + +#include "Box.h" + +Box::Box(const glm::vec3 ¢er, const glm::vec3 &dim) : Box(center, dim.x, dim.y, dim.z) {} + +Box::Box(const glm::vec3 ¢er, float sX, float sY, float sZ) : Box(center.x - sX / 2.f, center.y - sY / 2.f, center.z - sZ / 2.f, + center.x + sX/2.f, center.y + sY/2.f, center.z + sZ/2.f) {} + +Box::Box(float cX, float cY, float cZ, const glm::vec3 &dim) : Box(cX - dim.x, cY - dim.y, cZ - dim.z, + cX + dim.x, cY + dim.y, cZ + dim.z) {} + +Box::Box(float minX, float minY, float minZ, float maxX, float maxY, float maxZ) : + Bound(minX, minY, minZ, maxX, maxY, maxZ), GeoObject() { + computeBound(); +} diff --git a/core/geometry/Box.h b/core/geometry/Box.h new file mode 100644 index 00000000..2ce520c0 --- /dev/null +++ b/core/geometry/Box.h @@ -0,0 +1,33 @@ +// +// Created by austin on 2/28/16. +// + +#ifndef FLUIDSOLVER_BOX_H +#define FLUIDSOLVER_BOX_H + +#include "GeoObject.h" + +class Box : public Bound, public GeoObject { +public: + Box(const glm::vec3 ¢er, const glm::vec3 &dim); + Box(const glm::vec3 ¢er, float sX, float sY, float sZ); + Box(float cX, float cY, float cZ, const glm::vec3 &dim); + Box(float minX, float minY, float minZ, float maxX, float maxY, float maxZ); + + virtual bool contains(const glm::vec3 &pt) const { + return _bound.contains(pt); + } + virtual bool collidesPt(const glm::vec3 &pt, glm::vec3 &normal, float tolerance = 0.001f) const { + return _bound.collidesPt(pt, normal, tolerance); + } + virtual bool collides(const glm::vec3 &prev, const glm::vec3 &next, glm::vec3 &normal) const { + return _bound.collides(prev, next, normal); + } + + virtual void computeBound() { + _bound = *this; + } +}; + + +#endif //FLUIDSOLVER_BOX_H diff --git a/core/geometry/Geo.cpp b/core/geometry/Geo.cpp new file mode 100644 index 00000000..6d5b6e1a --- /dev/null +++ b/core/geometry/Geo.cpp @@ -0,0 +1,28 @@ +// +// Created by austin on 2/27/16. +// + + +#include "Geo.h" + +bool Geo::collidesPt(const glm::vec3 &pt, float tolerance) const { + glm::vec3 norm; + return collidesPt(pt, norm, tolerance); +} + +bool Geo::collides(const glm::vec3 &prev, const glm::vec3 &next) const { + glm::vec3 norm; + return collides(prev, next, norm); +} + +bool Geo::collidesRay(const glm::vec3 &pt, const glm::vec3 &dir, float step) const { + assert(fequal(glm::length(dir), 1.f)); + glm::vec3 nextPt = pt + dir * step; + return collides(pt, nextPt); +} + +bool Geo::collidesRay(const glm::vec3 &pt, const glm::vec3 &dir, glm::vec3 &normal, float step) const { + assert(fequal(glm::length(dir), 1.f)); + glm::vec3 nextPt = pt + dir * step; + return collides(pt, nextPt, normal); +} \ No newline at end of file diff --git a/core/geometry/Geo.h b/core/geometry/Geo.h new file mode 100644 index 00000000..63f7930d --- /dev/null +++ b/core/geometry/Geo.h @@ -0,0 +1,31 @@ +// +// Created by austin on 2/27/16. +// + +#ifndef FLUIDSOLVER_GEO_H +#define FLUIDSOLVER_GEO_H + +#include + +class Geo { +public: + Geo() { } + virtual ~Geo() { } + + virtual bool contains(const glm::vec3 &pt) const = 0; + + virtual bool collidesPt(const glm::vec3 &pt, float tolerance = 0.001f) const; + + virtual bool collidesPt(const glm::vec3 &pt, glm::vec3 &normal, float tolerance = 0.001f) const = 0; + + virtual bool collides(const glm::vec3 &prev, const glm::vec3 &next) const; + + virtual bool collides(const glm::vec3 &prev, const glm::vec3 &next, glm::vec3 &normal) const = 0; + + virtual bool collidesRay(const glm::vec3 &pt, const glm::vec3 &dir, float step = 0.001f) const; + + virtual bool collidesRay(const glm::vec3 &pt, const glm::vec3 &dir, glm::vec3 &normal, float step = 0.001f) const; +}; + + +#endif //FLUIDSOLVER_GEO_H diff --git a/core/geometry/GeoObject.h b/core/geometry/GeoObject.h new file mode 100644 index 00000000..525a8b37 --- /dev/null +++ b/core/geometry/GeoObject.h @@ -0,0 +1,23 @@ +// +// Created by austin on 2/28/16. +// + +#ifndef FLUIDSOLVER_GEOOBJECT_H +#define FLUIDSOLVER_GEOOBJECT_H + +#include "Geo.h" +#include "Bound.h" + +class GeoObject : public Geo { +public: + GeoObject() { } + virtual ~GeoObject() {} + virtual void computeBound() = 0; + const Bound& bound() const { return _bound; } + +protected: + Bound _bound; +}; + + +#endif //FLUIDSOLVER_GEOOBJECT_H diff --git a/core/scenes/default.h b/core/scenes/default.h new file mode 100644 index 00000000..58527e6f --- /dev/null +++ b/core/scenes/default.h @@ -0,0 +1,29 @@ +// +// Created by austin on 2/29/16. +// + +const char* default_scene = R"( +{ + "containerDim": { + "position": [0,0,0], + "scale" : [6,6,6] + }, + "particleDim": { + "position": [0,0,0], + "scale": [4,4,4] + }, + "resolution": 20 +} + +/*{ + "containerDim" : { + "position": [0, 0, 0], + "scale" : [40, 20, 20] + }, + "particleDim" : { + "position": [-10, 0, 0], + "scale" : [10, 19, 19] + }, + "resolution" : 20 +};*/ +)"; diff --git a/core/solver/FluidParticle.h b/core/solver/FluidParticle.h new file mode 100644 index 00000000..1fd3eb7b --- /dev/null +++ b/core/solver/FluidParticle.h @@ -0,0 +1,25 @@ +// +// Created by austin on 3/20/16. +// + +#ifndef FLUIDSOLVER_FLUIDPARTICLE_H +#define FLUIDSOLVER_FLUIDPARTICLE_H + +#include + +struct FluidParticle { + glm::vec3 pos; + glm::vec3 pos_old; + glm::vec3 vel; + glm::vec3 col; + glm::ivec3 cell; + + FluidParticle() { + pos = glm::vec3(0); + pos_old = glm::vec3(0); + vel = glm::vec3(0,0,0); + col = glm::vec3(0.0f, 0.0f, 1.f); + } +}; + +#endif //FLUIDSOLVER_FLUIDPARTICLE_H diff --git a/core/solver/FluidSolver.cpp b/core/solver/FluidSolver.cpp new file mode 100644 index 00000000..0d781b84 --- /dev/null +++ b/core/solver/FluidSolver.cpp @@ -0,0 +1,752 @@ +// +// Created by austin on 2/28/16. +// + +#include +#include "FluidSolver.h" +#include +#include +#include +#include +#include +#include +#include +#include + + +float FluidSolver::g = -9.80665f; + +FluidSolver::FluidSolver(float particleSep, float gridSize) : particle_radius(particleSep), _cell_size(gridSize), frame(0) { +// particle_radius /= 2; +} + +FluidSolver::~FluidSolver() { + delete _container; + delete _MAC; +} + +void FluidSolver::setContainer(GeoObject* container) { + _container = container; + glm::vec3 size = _container->bound().dim(); + glm::vec3 origin = (_container->bound().center() - size / 2.f); + _MAC = new MACGrid >( + origin - glm::vec3(_cell_size, _cell_size, _cell_size), + size + 2.f*glm::vec3(_cell_size, _cell_size, _cell_size), + _cell_size + ); +// _MAC = new MACGrid >(origin, size, _cell_size); + + std::function setSolid = [&](size_t i, size_t j, size_t k) { + _MAC->_gType(i,j,k) = SOLID; + }; + + _MAC->_gType.iterateRegion(0,0,0, 1,_MAC->_gType.countY(),_MAC->_gType.countZ(), setSolid); + _MAC->_gType.iterateRegion(_MAC->_gType.countX()-1,0,0, _MAC->_gType.countX(),_MAC->_gType.countY(),_MAC->_gType.countZ(), setSolid); + _MAC->_gType.iterateRegion(0,0,0, _MAC->_gType.countX(),1,_MAC->_gType.countZ(), setSolid); + _MAC->_gType.iterateRegion(0,_MAC->_gType.countY()-1,0, _MAC->_gType.countX(),_MAC->_gType.countY(),_MAC->_gType.countZ(), setSolid); + _MAC->_gType.iterateRegion(0,0,0, _MAC->_gType.countX(),_MAC->_gType.countY(),1, setSolid); + _MAC->_gType.iterateRegion(0,0,_MAC->_gType.countZ()-1, _MAC->_gType.countX(),_MAC->_gType.countY(),_MAC->_gType.countZ(), setSolid); + + /*_MAC = MACGrid >( + origin, + size, + _cell_size + );*/ +} + +/* + * Loop over fluid bounds to generate particles + */ +void FluidSolver::addFluid(const GeoObject &fluid) { +// FluidParticle p; +// +// p.pos = glm::vec3(-1,5,1); +// _particles.push_back(p); +// _MAC->_gType.at(p.pos) = FLUID; +// +// p.pos = glm::vec3(-1,3,1); +// _particles.push_back(p); +// _MAC->_gType.at(p.pos) = FLUID; +// +// p.pos = glm::vec3(-1,5,3); +// _particles.push_back(p); +// _MAC->_gType.at(p.pos) = FLUID; +// +// p.pos = glm::vec3(-1,3,3); +// _particles.push_back(p); +// _MAC->_gType.at(p.pos) = FLUID; +// +// p.pos = glm::vec3(-3,5,1); +// _particles.push_back(p); +// _MAC->_gType.at(p.pos) = FLUID; +// +// p.pos = glm::vec3(-3,3,1); +// _particles.push_back(p); +// _MAC->_gType.at(p.pos) = FLUID; +// +// p.pos = glm::vec3(-3,5,3); +// _particles.push_back(p); +// _MAC->_gType.at(p.pos) = FLUID; +// +// p.pos = glm::vec3(-3,3,3); +// _particles.push_back(p); +// _MAC->_gType.at(p.pos) = FLUID; +// return; + + const Bound& b = fluid.bound(); + for (float x = b.minX(); x < b.maxX(); x += particle_radius) { + for (float y = b.minY(); y < b.maxY(); y += particle_radius) { + for (float z = b.minZ(); z < b.maxZ(); z += particle_radius) { + glm::vec3 pos = glm::vec3(x, y, z) + glm::vec3(_cell_size)/2.f; + if (fluid.contains(pos)) { + FluidParticle p; + p.pos = pos; + _particles.push_back(p); + _MAC->_gType.at(pos) = FLUID; + } + } + } + } + std::cout << "Added " << _particles.size() << " particles" << std::endl; +} + +void FluidSolver::init() { + + for (FluidParticle &particle : _particles) { + particle.cell = _MAC->indexOf(particle.pos); + _MAC->atIdx(particle.cell).push_back(&particle); + } + _MAC->_gU.clear(0); + _MAC->_gV.clear(0); + _MAC->_gW.clear(0); + +// for (size_t idx = 0; idx < _MAC->_gU.size(); idx++) { +// size_t i, j, k; +// _MAC->_gU.toIJK(idx, i,j,k); +// std::cout << idx << "; " << i << "," << j << "," << k << "; " << _MAC->_gU.fromIJK(i,j,k) << std::endl; +// } +// +// std::size_t velOffset = offsetof(FluidParticle, vel); +// std::size_t U_offset = velOffset + offsetof(glm::vec3, x); +// std::size_t V_offset = velOffset + offsetof(glm::vec3, y); +// std::size_t W_offset = velOffset + offsetof(glm::vec3, z); +// particleAttributeToGrid(U_offset, _MAC->_gU_old, _cell_size, 0.f); +// particleAttributeToGrid(V_offset, _MAC->_gV_old, _cell_size, 0.f); +// particleAttributeToGrid(W_offset, _MAC->_gW_old, _cell_size, 0.f); +// particleAttributeToGrid(U_offset, _MAC->_gU, _cell_size, 0.f); +// particleAttributeToGrid(V_offset, _MAC->_gV, _cell_size, 0.f); +// particleAttributeToGrid(W_offset, _MAC->_gW, _cell_size, 0.f); +} + +void FluidSolver::projectVelocitiesToGrid() { + std::size_t velOffset = offsetof(FluidParticle, vel); + std::size_t U_offset = velOffset + offsetof(glm::vec3, x); + std::size_t V_offset = velOffset + offsetof(glm::vec3, y); + std::size_t W_offset = velOffset + offsetof(glm::vec3, z); + +#ifdef USETBB + tbb::parallel_invoke( + [&](){ + particleAttributeToGrid(U_offset, _MAC->_gU, _cell_size, 0.f); + }, + [&](){ + particleAttributeToGrid(V_offset, _MAC->_gV, _cell_size, 0.f); + }, + [&](){ + particleAttributeToGrid(W_offset, _MAC->_gW, _cell_size, 0.f); + } + ); + tbb::parallel_invoke( + [&]() { + _MAC->_gU_old = _MAC->_gU; + }, + [&]() { + _MAC->_gV_old = _MAC->_gV; + }, + [&]() { + _MAC->_gW_old = _MAC->_gW; + } + ); + +#else + particleAttributeToGrid(U_offset, _MAC->_gU, _cell_size, 0.f); + particleAttributeToGrid(V_offset, _MAC->_gV, _cell_size, 0.f); + particleAttributeToGrid(W_offset, _MAC->_gW, _cell_size, 0.f); + + _MAC->_gU_old = _MAC->_gU; + _MAC->_gV_old = _MAC->_gV; + _MAC->_gW_old = _MAC->_gW; +#endif +} + +void FluidSolver::transferVelocitiesToParticles() { + float smooth = 0.05f; + +#ifdef USETBB + tbb::parallel_invoke( + [&]() { + tbb::parallel_for(tbb::blocked_range(0, _particles.size()), + [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + FluidParticle &particle = _particles[i]; + float vel = interpolateAttribute(particle.pos, _MAC->_gU); + float oldVel = interpolateAttribute(particle.pos, _MAC->_gU_old); + particle.vel.x = vel*smooth + (particle.vel.x +(vel - oldVel))*(1.f-smooth); + } + }); + }, + [&]() { + tbb::parallel_for(tbb::blocked_range(0, _particles.size()), + [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + FluidParticle &particle = _particles[i]; + float vel = interpolateAttribute(particle.pos, _MAC->_gV); + float oldVel = interpolateAttribute(particle.pos, _MAC->_gV_old); + particle.vel.y = vel*smooth + (particle.vel.y +(vel - oldVel))*(1.f-smooth); + } + }); + }, + [&]() { + tbb::parallel_for(tbb::blocked_range(0, _particles.size()), + [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + FluidParticle &particle = _particles[i]; + float vel = interpolateAttribute(particle.pos, _MAC->_gW); + float oldVel = interpolateAttribute(particle.pos, _MAC->_gW_old); + particle.vel.z = vel*smooth + (particle.vel.z +(vel - oldVel))*(1.f-smooth); + } + }); + } + ); +#else + for (FluidParticle &particle : _particles) { + float vel = interpolateAttribute(particle.pos, _MAC->_gU); + float oldVel = interpolateAttribute(particle.pos, _MAC->_gU_old); + particle.vel.x = vel*smooth + (particle.vel.x +(vel - oldVel))*(1.f-smooth); + } + for (FluidParticle &particle : _particles) { + float vel = interpolateAttribute(particle.pos, _MAC->_gV); + float oldVel = interpolateAttribute(particle.pos, _MAC->_gV_old); + particle.vel.y = vel*smooth + (particle.vel.y +(vel - oldVel))*(1.f-smooth); + } + for (FluidParticle &particle : _particles) { + float vel = interpolateAttribute(particle.pos, _MAC->_gW); + float oldVel = interpolateAttribute(particle.pos, _MAC->_gW_old); + particle.vel.z = vel*smooth + (particle.vel.z +(vel - oldVel))*(1.f-smooth); + } +#endif +} + +void FluidSolver::enforceBoundary() { + _MAC->_gType.iterate([&](size_t i, size_t j, size_t k) { + switch (_MAC->_gType(i,j,k)) { + case EMPTY:break; + case FLUID:break; + case SOLID: + /*_MAC->_gU(i,j,k) = 0; + _MAC->_gV(i,j,k) = 0; + _MAC->_gW(i,j,k) = 0; + _MAC->_gU(i+1,j,k) = 0; + _MAC->_gV(i,j+1,k) = 0; + _MAC->_gW(i,j,k+1) = 0; + */ + if (i == 0 || _MAC->_gType(i-1,j,k) != SOLID) { + _MAC->_gU(i,j,k) = std::min(0.f, _MAC->_gU(i,j,k)); + } + if (i == _MAC->_gType.countX() - 1 || _MAC->_gType(i+1,j,k) != SOLID) { + _MAC->_gU(i+1,j,k) = std::max(0.f, _MAC->_gU(i+1,j,k)); + } + if (j == 0 || _MAC->_gType(i,j-1,k) != SOLID) { + _MAC->_gV(i,j,k) = std::min(0.f, _MAC->_gV(i,j,k)); + } + if (j == _MAC->_gType.countY() - 1 || _MAC->_gType(i,j+1,k) != SOLID) { + _MAC->_gV(i,j+1,k) = std::max(0.f, _MAC->_gV(i,j+1,k)); + } + if (k == 0 || _MAC->_gType(i,j,k-1) != SOLID) { + _MAC->_gW(i,j,k) = std::min(0.f, _MAC->_gW(i,j,k)); + } + if (k == _MAC->_gType.countZ() - 1 || _MAC->_gType(i,j,k+1) != SOLID) { + _MAC->_gW(i,j,k+1) = std::max(0.f, _MAC->_gW(i,j,k+1)); + } + break; + default:break; + } + }); + //return; + + + /*_MAC->_gU.iterateRegion(0,0,0, 1,_MAC->_gU.countY(),_MAC->_gU.countZ(), [&](size_t i, size_t j, size_t k) { + _MAC->_gU(i,j,k) = std::min(0.f, _MAC->_gU(i,j,k)); + }); + _MAC->_gU.iterateRegion(_MAC->_gU.countX()-1,0,0, _MAC->_gU.countX(),_MAC->_gU.countY(),_MAC->_gU.countZ(), [&](size_t i, size_t j, size_t k) { + _MAC->_gU(i,j,k) = std::max(0.f, _MAC->_gU(i,j,k)); + }); + _MAC->_gV.iterateRegion(0,0,0, _MAC->_gV.countX(),1,_MAC->_gV.countZ(), [&](size_t i, size_t j, size_t k) { + _MAC->_gV(i,j,k) = 0; + }); + _MAC->_gV.iterateRegion(0,_MAC->_gV.countY()-1,0, _MAC->_gV.countX(),_MAC->_gV.countY(),_MAC->_gV.countZ(), [&](size_t i, size_t j, size_t k) { + _MAC->_gV(i,j,k) = 0; + }); + _MAC->_gW.iterateRegion(0,0,0, _MAC->_gW.countX(),_MAC->_gW.countY(),1, [&](size_t i, size_t j, size_t k) { + _MAC->_gW(i,j,k) = 0; + }); + _MAC->_gW.iterateRegion(0,0,_MAC->_gW.countZ()-1, _MAC->_gW.countX(),_MAC->_gW.countY(),_MAC->_gW.countZ(), [&](size_t i, size_t j, size_t k) { + _MAC->_gW(i,j,k) = 0; + });*/ +} + +inline void pressureMatrixHelper(std::vector> &coeffs, const Grid &grid, size_t &IDX, + int &count, const float &scale, size_t i, size_t j, size_t k) { + size_t idx = grid.fromIJK(i,j,k); + if (grid(i,j,k) == FLUID || grid(i,j,k) == EMPTY) { + count++; + if (grid(i,j,k) == FLUID) { + coeffs.push_back(Eigen::Triplet(IDX, idx, -scale)); + } + } +} + +void FluidSolver::pressureSolve(float step) { + typedef Eigen::Triplet T; + std::vector coefficientsA; + std::vector coefficientsB; + Eigen::SparseMatrix A(_MAC->_gType.size(), _MAC->_gType.size()); + Eigen::SparseMatrix b(_MAC->_gType.size(), 1); + Eigen::SparseVector x(_MAC->_gType.size()); + A.setZero(); + b.setZero(); + x.setZero(); + + float scale = step / (1.f*_cell_size*_cell_size); + + _MAC->_gType.iterate([&](size_t I, size_t J, size_t K) { + size_t IDX = _MAC->_gType.fromIJK(I,J,K); + if (_MAC->_gType(I,J,K) == FLUID) { + int count = 0; + + if (I > 0) { // if I-1 >= 0 + pressureMatrixHelper(coefficientsA, _MAC->_gType, IDX, count, scale, I-1,J,K); + } + if (I + 1 < _MAC->_gType.countX()) { // if I + 1 < countX + pressureMatrixHelper(coefficientsA, _MAC->_gType, IDX, count, scale, I+1,J,K); + } + if (J > 0) { // if J-1 >= 0 + pressureMatrixHelper(coefficientsA, _MAC->_gType, IDX, count, scale, I,J-1,K); + } + if (J + 1 < _MAC->_gType.countY()) { // if J + 1 < countY + pressureMatrixHelper(coefficientsA, _MAC->_gType, IDX, count, scale, I,J+1,K); + } + if (K > 0) { // if K-1 >= 0 + pressureMatrixHelper(coefficientsA, _MAC->_gType, IDX, count, scale, I,J,K-1); + } + if (K + 1 < _MAC->_gType.countZ()) { // if K + 1 < countZ + pressureMatrixHelper(coefficientsA, _MAC->_gType, IDX, count, scale, I,J,K+1); + } + + coefficientsA.push_back(T(IDX, IDX, count*scale)); + + float div = + -(_MAC->_gU(I+1,J,K) - _MAC->_gU(I,J,K)) / _cell_size + + -(_MAC->_gV(I,J+1,K) - _MAC->_gV(I,J,K)) / _cell_size + + -(_MAC->_gW(I,J,K+1) - _MAC->_gW(I,J,K)) / _cell_size; + + if (I == 0 || _MAC->_gType(I-1,J,K) == SOLID) { + div -= (_MAC->_gU(I,J,K) - 0) / _cell_size; + } + if (I == _MAC->_gType.countX() - 1 || _MAC->_gType(I+1,J,K) == SOLID) { + div -= (0 - _MAC->_gU(I+1,J,K)) / _cell_size; + } + if (J == 0 || _MAC->_gType(I,J-1,K) == SOLID) { + div -= (_MAC->_gV(I,J,K) - 0) / _cell_size; + } + if (J == _MAC->_gType.countY() - 1 || _MAC->_gType(I,J+1,K) == SOLID) { + div -= (0 - _MAC->_gV(I,J+1,K)) / _cell_size; + } + if (K == 0 || _MAC->_gType(I,J,K-1) == SOLID) { + div -= (_MAC->_gW(I,J,K) - 0) / _cell_size; + } + if (K == _MAC->_gType.countZ() - 1 || _MAC->_gType(I,J,K+1) == SOLID) { + div -= (0 - _MAC->_gW(I,J,K+1)) / _cell_size; + } + + coefficientsB.push_back(T(IDX,0,div)); + } + }, false); + + A.setFromTriplets(coefficientsA.begin(), coefficientsA.end()); + + b.setFromTriplets(coefficientsB.begin(), coefficientsB.end()); +// Eigen::ConjugateGradient, Eigen::Lower, Eigen::IncompleteCholesky > cg(A); + Eigen::ConjugateGradient, Eigen::Lower, Eigen::IdentityPreconditioner> cg(A); + x = cg.solve(b); + + _MAC->_gP.clear(0); + for (Eigen::SparseVector::InnerIterator it(x); it; ++it) { + if (_MAC->_gType(it.index()) == FLUID) { + _MAC->_gP(it.index()) = it.value(); + } + } + + scale = step/(1.f*_cell_size); + _MAC->_gU.iterate([&](size_t i, size_t j, size_t k) { + bool leftExists = i > 0; + bool rightExists = i < _MAC->_gP.countX(); + bool leftFluid = leftExists && _MAC->_gType(i-1,j,k) == FLUID; + bool rightFluid = rightExists && _MAC->_gType(i,j,k) == FLUID; + if ((leftFluid || rightFluid)) { + float delP = _MAC->_gP(i,j,k) - _MAC->_gP(i-1,j,k); + _MAC->_gU(i,j,k) -= scale * delP; + } + }); + + _MAC->_gV.iterate([&](size_t i, size_t j, size_t k) { + bool leftExists = j > 0; + bool rightExists = j < _MAC->_gP.countY(); + bool leftFluid = leftExists && _MAC->_gType(i,j-1,k) == FLUID; + bool rightFluid = rightExists && _MAC->_gType(i,j,k) == FLUID; + if ((leftFluid || rightFluid)) { + float delP = _MAC->_gP(i,j,k) - _MAC->_gP(i,j-1,k); + _MAC->_gV(i,j,k) -= scale * delP; + } + }); + + _MAC->_gW.iterate([&](size_t i, size_t j, size_t k) { + bool leftExists = k > 0; + bool rightExists = k < _MAC->_gP.countZ(); + bool leftFluid = leftExists && _MAC->_gType(i,j,k-1) == FLUID; + bool rightFluid = rightExists && _MAC->_gType(i,j,k) == FLUID; + if ((leftFluid || rightFluid)) { + float delP = _MAC->_gP(i,j,k) - _MAC->_gP(i,j,k-1); + _MAC->_gW(i,j,k) -= scale * delP; + } + }); + +} + +void FluidSolver::gravitySolve(float step) { + _MAC->_gV.iterate([&](size_t i, size_t j, size_t k) { + _MAC->_gV(i,j,k) += g*step; + }); +} + +void FluidSolver::extrapolateVelocity() { + _MAC->_gU.iterate([&](size_t i, size_t j, size_t k) { + bool shouldExtrapolate = (_MAC->_gType.checkIdx(i-1,j,k) && _MAC->_gType(i-1,j,k) != FLUID) || (_MAC->_gType.checkIdx(i,j,k) && _MAC->_gType(i,j,k) != FLUID); + if (shouldExtrapolate) { + bool fromUp = (_MAC->_gType.checkIdx(i-1,j+1,k) && _MAC->_gType(i-1,j+1,k) == FLUID) || (_MAC->_gType.checkIdx(i,j+1,k) && _MAC->_gType(i,j+1,k) == FLUID); + bool fromDown = (_MAC->_gType.checkIdx(i-1,j-1,k) && _MAC->_gType(i-1,j-1,k) == FLUID) || (_MAC->_gType.checkIdx(i,j-1,k) && _MAC->_gType(i,j-1,k) == FLUID); + bool fromFront = (_MAC->_gType.checkIdx(i-1,j,k+1) && _MAC->_gType(i-1,j,k+1) == FLUID) || (_MAC->_gType.checkIdx(i,j,k+1) && _MAC->_gType(i,j,k+1) == FLUID); + bool fromBack = (_MAC->_gType.checkIdx(i-1,j,k-1) && _MAC->_gType(i-1,j,k-1) == FLUID) || (_MAC->_gType.checkIdx(i,j,k-1) && _MAC->_gType(i,j,k-1) == FLUID); + + float val = 0; + int count = fromUp + fromDown + fromFront + fromBack; + if (fromUp) val += _MAC->_gU(i,j+1,k); + if (fromDown) val += _MAC->_gU(i,j-1,k); + if (fromFront) val += _MAC->_gU(i,j,k+1); + if (fromBack) val += _MAC->_gU(i,j,k-1); + + if (count > 0) { + _MAC->_gU(i,j,k) = val / count; + } + } + }); + + _MAC->_gV.iterate([&](size_t i, size_t j, size_t k) { + bool shouldExtrapolate = (_MAC->_gType.checkIdx(i,j-1,k) && _MAC->_gType(i,j-1,k) != FLUID) || (_MAC->_gType.checkIdx(i,j,k) && _MAC->_gType(i,j,k) != FLUID); + if (shouldExtrapolate) { + bool fromUp = (_MAC->_gType.checkIdx(i+1,j-1,k) && _MAC->_gType(i+1,j-1,k) == FLUID) || (_MAC->_gType.checkIdx(i+1,j,k) && _MAC->_gType(i+1,j,k) == FLUID); + bool fromDown = (_MAC->_gType.checkIdx(i-1,j-1,k) && _MAC->_gType(i-1,j-1,k) == FLUID) || (_MAC->_gType.checkIdx(i-1,j,k) && _MAC->_gType(i-1,j,k) == FLUID); + bool fromFront = (_MAC->_gType.checkIdx(i,j-1,k+1) && _MAC->_gType(i,j-1,k+1) == FLUID) || (_MAC->_gType.checkIdx(i,j,k+1) && _MAC->_gType(i,j,k+1) == FLUID); + bool fromBack = (_MAC->_gType.checkIdx(i,j-1,k-1) && _MAC->_gType(i,j-1,k-1) == FLUID) || (_MAC->_gType.checkIdx(i,j,k-1) && _MAC->_gType(i,j,k-1) == FLUID); + + float val = 0; + int count = fromUp + fromDown + fromFront + fromBack; + if (fromUp) val += _MAC->_gV(i+1,j,k); + if (fromDown) val += _MAC->_gV(i-1,j,k); + if (fromFront) val += _MAC->_gV(i,j,k+1); + if (fromBack) val += _MAC->_gV(i,j,k-1); + + if (count > 0) { + _MAC->_gV(i,j,k) = val / count; + } + } + }); + + _MAC->_gW.iterate([&](size_t i, size_t j, size_t k) { + bool shouldExtrapolate = (_MAC->_gType.checkIdx(i,j,k-1) && _MAC->_gType(i,j,k-1) != FLUID) || (_MAC->_gType.checkIdx(i,j,k) && _MAC->_gType(i,j,k) != FLUID); + if (shouldExtrapolate) { + bool fromUp = (_MAC->_gType.checkIdx(i+1,j,k-1) && _MAC->_gType(i+1,j,k-1) == FLUID) || (_MAC->_gType.checkIdx(i+1,j,k) && _MAC->_gType(i+1,j,k) == FLUID); + bool fromDown = (_MAC->_gType.checkIdx(i-1,j,k-1) && _MAC->_gType(i-1,j,k-1) == FLUID) || (_MAC->_gType.checkIdx(i-1,j,k) && _MAC->_gType(i-1,j,k) == FLUID); + bool fromFront = (_MAC->_gType.checkIdx(i,j+1,k-1) && _MAC->_gType(i,j+1,k-1) == FLUID) || (_MAC->_gType.checkIdx(i,j+1,k) && _MAC->_gType(i,j+1,k) == FLUID); + bool fromBack = (_MAC->_gType.checkIdx(i,j-1,k-1) && _MAC->_gType(i,j-1,k-1) == FLUID) || (_MAC->_gType.checkIdx(i,j-1,k) && _MAC->_gType(i,j-1,k) == FLUID); + + float val = 0; + int count = fromUp + fromDown + fromFront + fromBack; + if (fromUp) val += _MAC->_gW(i+1,j,k); + if (fromDown) val += _MAC->_gW(i-1,j,k); + if (fromFront) val += _MAC->_gW(i,j+1,k); + if (fromBack) val += _MAC->_gW(i,j-1,k); + + if (count > 0) { + _MAC->_gW(i,j,k) = val / count; + } + } + }); + +} + +void FluidSolver::updateParticlePositions(float step) { + +#ifdef USETBB + tbb::parallel_for(tbb::blocked_range(0, _particles.size()), [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + FluidParticle &particle = _particles[i]; + particle.pos_old = particle.pos; + + glm::vec3 k1 = step*particle.vel; + particle.pos += step * glm::vec3( + interpolateAttribute(particle.pos + 0.5f*k1, _MAC->_gU), + interpolateAttribute(particle.pos + 0.5f*k1, _MAC->_gV), + interpolateAttribute(particle.pos + 0.5f*k1, _MAC->_gW) + ); + } + }); +#else + for (FluidParticle &particle : _particles) { + particle.pos_old = particle.pos; + particle.pos += particle.vel * step; + } +#endif +} + +void FluidSolver::resolveCollisions() { + +#ifdef USETBB + tbb::parallel_for(tbb::blocked_range(0, _particles.size()), [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + FluidParticle &particle = _particles[i]; + glm::vec3 normal; + if (_container->collides(particle.pos_old, particle.pos, normal)) { + //particle.col = glm::vec3(1,0,0); + glm::vec3 mask = glm::vec3(1,1,1) - glm::abs(normal); + particle.vel *= mask; + particle.pos = particle.pos_old; + } + } + }); +#else + for (FluidParticle &particle : _particles) { + glm::vec3 normal; + if (_container->collides(particle.pos_old, particle.pos, normal)) { + particle.col = glm::vec3(1,0,0); + glm::vec3 mask = glm::vec3(1,1,1) - glm::abs(normal); + //particle.vel *= mask; + particle.pos = particle.pos_old; + } + } +#endif +} + +void FluidSolver::updateCells() { + _MAC->clear(std::vector()); + _MAC->_gType.clear(EMPTY); + for (FluidParticle &particle : _particles) { + particle.cell = _MAC->indexOf(particle.pos); + _MAC->_gType(particle.cell) = FLUID; + if (_MAC->checkIdx(particle.cell)) { + _MAC->atIdx(particle.cell).push_back(&particle); + } else { + //std::cerr << "particle out of bounds" << std::endl; + } + + } + + std::function setSolid = [&](size_t i, size_t j, size_t k) { + _MAC->_gType(i,j,k) = SOLID; + }; + + _MAC->_gType.iterateRegion(0,0,0, 1,_MAC->_gType.countY(),_MAC->_gType.countZ(), setSolid); + _MAC->_gType.iterateRegion(_MAC->_gType.countX()-1,0,0, _MAC->_gType.countX(),_MAC->_gType.countY(),_MAC->_gType.countZ(), setSolid); + _MAC->_gType.iterateRegion(0,0,0, _MAC->_gType.countX(),1,_MAC->_gType.countZ(), setSolid); + _MAC->_gType.iterateRegion(0,_MAC->_gType.countY()-1,0, _MAC->_gType.countX(),_MAC->_gType.countY(),_MAC->_gType.countZ(), setSolid); + _MAC->_gType.iterateRegion(0,0,0, _MAC->_gType.countX(),_MAC->_gType.countY(),1, setSolid); + _MAC->_gType.iterateRegion(0,0,_MAC->_gType.countZ()-1, _MAC->_gType.countX(),_MAC->_gType.countY(),_MAC->_gType.countZ(), setSolid); + +} + +inline float kernel(float r, float h) { +// float e = 2*r/h; +// return 1.f/(PI*h*h*h) * MATHIFELSE( +// 1.f - 3.f/2.f * e*e + 3.f/4.f * e*e*e, +// MATHIFELSE( +// 1.f/4.f * (2-e)*(2-e)*(2-e), +// 0, +// e > 2 +// ), +// e > 1 +// ); +// return (float) (315 / (64 * PI * pow(h, 9)) * MATHIFELSE(0, pow(h * h - r * r, 3), 0 <= r && r <= h)); + return MATHIFELSE(0, 1.f-r/h, 0 <= r && r <= h); +} + + +template void FluidSolver::particleAttributeToGrid(std::size_t offset, Grid &grid, float radius, T zeroVal) { + std::size_t attributeSize = sizeof(T); + std::size_t cellRadius = (size_t) glm::ceil(radius / _cell_size); + +// grid.clear(zeroVal); +// std::vector weights(grid.countX() * grid.countY() * grid.countZ()); +// +// iterParticles([&](FluidParticle &particle) { +// size_t I,J,K; +// grid.indexOf(particle.pos, I, J, K); +// glm::vec3 gridPos = grid.positionOf(I,J,K); +// +// grid.iterateNeighborhood(I,J,K,cellRadius, [&](size_t i, size_t j, size_t k) { +// float dist = glm::distance(particle.pos, gridPos); +// size_t idx = grid.fromIJK(i,j,k); +// weights[idx] += kernel(dist, radius); +// }); +// }, false); +// +// iterParticles([&](FluidParticle &particle) { +// size_t I,J,K; +// grid.indexOf(particle.pos, I, J, K); +// glm::vec3 gridPos = grid.positionOf(I,J,K); +// +// grid.iterateNeighborhood(I,J,K,cellRadius, [&](size_t i, size_t j, size_t k) { +// float dist = glm::distance(particle.pos, gridPos); +// size_t idx = grid.fromIJK(i,j,k); +// T temp; +// void *address = (void *) &particle + offset; +// std::memcpy(&temp, address, attributeSize); +// grid(i,j,k) += temp * (kernel(dist, radius) / weights[idx]); +// }); +// }, false); +// +// return; + + grid.iterate([&](size_t I, size_t J, size_t K) { + glm::vec3 gridPos = grid.positionOf(I,J,K); + + size_t mI, mJ, mK, si, ei, sj, ej, sk, ek; + _MAC->indexOf(gridPos, mI, mJ, mK); + _MAC->getNeighboorhood(mI, mJ, mK, cellRadius, si, ei, sj, ej, sk, ek); + + float totalWeight = 0.f; + +// totalWeight = tbb::parallel_reduce(tbb::blocked_range3d(si, ei, sj, ej, sk, ek), 0.f, [&](const tbb::blocked_range3d &r, float init)->float { +// for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { +// for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { +// for (size_t k = r.pages().begin(); k < r.pages().end(); k++) { +// for (FluidParticle const *particle : _MAC(i, j, k)) { +// float dist = glm::distance(particle->pos, gridPos); +// float weight = kernel(dist, 2*radius); +// init += weight; +// } +// } +// } +// } +// return init; +// }, std::plus() +// ); + + for (size_t i = si; i < ei; i++) { + for (size_t j = sj; j < ej; j++) { + for (size_t k = sk; k < ek; k++) { + for (FluidParticle const *particle : _MAC->atIdx(i, j, k)) { + float dist = glm::distance2(particle->pos, gridPos); + float weight = kernel(dist, 2*radius*radius); + totalWeight += weight; + } + } + } + } + + if (totalWeight == 0) { + grid(I,J,K) = zeroVal; + return; + } + + T temp; + T gridVal = zeroVal; + for (size_t i = si; i < ei; i++) { + for (size_t j = sj; j < ej; j++) { + for (size_t k = sk; k < ek; k++) { + for (FluidParticle const *particle : _MAC->atIdx(i, j, k)) { + float dist = glm::distance2(particle->pos, gridPos); + float weight = kernel(dist, 2*radius*radius); + void *address = (void *) particle + offset; + std::memcpy(&temp, address, attributeSize); + gridVal += temp * (weight / totalWeight); + } + } + } + } + + grid(I,J,K) = gridVal; + }); + +} + +template T FluidSolver::interpolateAttribute(const glm::vec3 &pos, Grid &grid) { + glm::vec3 idx = grid.fractionalIndexOf(pos); + size_t i = (size_t) floor(idx.x); + size_t j = (size_t) floor(idx.y); + size_t k = (size_t) floor(idx.z); + size_t I = (size_t) MATHIFELSE(ceil(idx.x), grid.countX()-1, ceil(idx.x) >= grid.countX()); + size_t J = (size_t) MATHIFELSE(ceil(idx.y), grid.countY()-1, ceil(idx.y) >= grid.countY()); + size_t K = (size_t) MATHIFELSE(ceil(idx.z), grid.countZ()-1, ceil(idx.z) >= grid.countZ()); + + T k1, k2, k3, k4, j1, j2, val; + + // this is reverse from what is expected because we want smaller value (closer distance) to have larger influence + k1 = MATHIFELSE((K-idx.z) * grid(i,j,k) + (idx.z-k) * grid(i,j,K), grid(i,j,k), k==K); + k2 = MATHIFELSE((K-idx.z) * grid(i,J,k) + (idx.z-k) * grid(i,J,K), grid(i,J,k), k==K); + k3 = MATHIFELSE((K-idx.z) * grid(I,j,k) + (idx.z-k) * grid(I,j,K), grid(I,j,k), k==K); + k4 = MATHIFELSE((K-idx.z) * grid(I,J,k) + (idx.z-k) * grid(i,J,K), grid(I,J,k), k==K); + + j1 = MATHIFELSE((J-idx.y) * k1 + (idx.y-j) * k2, k1, j==J); + j2 = MATHIFELSE((J-idx.y) * k3 + (idx.y-j) * k4, k3, j==J); + + val = MATHIFELSE((I-idx.x) * j1 + (idx.x-i) * j2, j1, i==I); + + return val; +} + +void FluidSolver::update(float step) { + projectVelocitiesToGrid(); + gravitySolve(step); + enforceBoundary(); + pressureSolve(step); + enforceBoundary(); + extrapolateVelocity(); + enforceBoundary(); + transferVelocitiesToParticles(); + updateParticlePositions(step); + resolveCollisions(); + updateCells(); + + frame++; +} + +void FluidSolver::iterParticles(const std::function &cb, bool parallel) { +#ifdef USETBB + if (parallel) { + tbb::parallel_for(tbb::blocked_range(0, _particles.size()), [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + FluidParticle &particle = _particles[i]; + cb(particle); + } + }); + } else { + for (FluidParticle &particle : _particles) { + cb(particle); + } + } +#else + for (FluidParticle &particle : _particles) { + cb(particle); + } +#endif +} \ No newline at end of file diff --git a/core/solver/FluidSolver.h b/core/solver/FluidSolver.h new file mode 100644 index 00000000..6074dd97 --- /dev/null +++ b/core/solver/FluidSolver.h @@ -0,0 +1,55 @@ +// +// Created by austin on 2/28/16. +// + +#ifndef FLUIDSOLVER_FLUIDSOLVER_H +#define FLUIDSOLVER_FLUIDSOLVER_H + +#include +#include +#include +#include "grid/MACGrid.h" +#include "FluidParticle.h" + +class FluidSolver { + friend class ParticlesPainter; + friend class ParticlesWriter; +public: + FluidSolver(float particleSep, float gridSize); + ~FluidSolver(); + + void setContainer(GeoObject* container); + void addFluid(const GeoObject &fluid); + void init(); + + void projectVelocitiesToGrid(); + void transferVelocitiesToParticles(); + void enforceBoundary(); + void pressureSolve(float step); + void gravitySolve(float step); + void extrapolateVelocity(); + void updateParticlePositions(float step); + void resolveCollisions(); + void updateCells(); + + void update(float step = 0.04166f); + + GeoObject* _container; + MACGrid >* _MAC; + +private: + std::vector _particles; + float particle_radius; + float _cell_size; + int frame; + + template void particleAttributeToGrid(std::size_t offset, Grid &grid, float radius, T zeroVal); + template T interpolateAttribute(const glm::vec3 &pos, Grid &grid); + + void iterParticles(const std::function &cb, bool parallel=true); + + static float g; +}; + + +#endif //FLUIDSOLVER_FLUIDSOLVER_H diff --git a/core/solver/grid/Grid.cpp b/core/solver/grid/Grid.cpp new file mode 100644 index 00000000..30e5bd75 --- /dev/null +++ b/core/solver/grid/Grid.cpp @@ -0,0 +1,315 @@ +// +// Created by austin on 3/20/16. +// + +#include "Grid.h" +#include + +#include +#include +#include +#include + +template Grid::Grid() { + +} + +template Grid::~Grid() { + +} + +template Grid::Grid(const glm::vec3 &origin, const glm::vec3 &offset, const glm::vec3 &dim, float size) : + _origin(origin), + _offset(offset), + _dim(dim), + _cellSize(size), + //_countX((size_t) (std::ceil((_dim.x - _offset.x) / _cellSize))), + //_countY((size_t) (std::ceil((_dim.y - _offset.y) / _cellSize))), + //_countZ((size_t) (std::ceil((_dim.z - _offset.z) / _cellSize))) { + _countX((size_t) (std::floor((_dim.x - _offset.x) / _cellSize)+1)), + _countY((size_t) (std::floor((_dim.y - _offset.y) / _cellSize)+1)), + _countZ((size_t) (std::floor((_dim.z - _offset.z) / _cellSize)+1)) { + _contents = std::vector((unsigned long) (_countX * _countY * _countZ)); + std::cout << "Constructing " << _countX << "x" << _countY << "x" << _countZ << " grid..." << glm::to_string(origin) << " to " << glm::to_string(origin+dim) << std::endl; +} +/* +template template Grid::Grid(const Grid &rhs) : + _origin(rhs._origin), + _offset(rhs._offset), + _dim(rhs._dim), + _cellSize(rhs._cellSize), + _countX(rhs._countX), + _countY(rhs._countY), + _countZ(rhs._countZ) { + _contents = std::vector((unsigned long) (_countX * _countY * _countZ)); +}*/ + +template T& Grid::operator()(std::size_t idx) { + return _contents[idx]; +} + +template const T& Grid::operator()(std::size_t idx) const { + return _contents[idx]; +} + +template T& Grid::operator()(std::size_t i, std::size_t j, std::size_t k) { + return _contents[k*_countX*_countY + j*_countX + i]; +} + +template const T& Grid::operator()(std::size_t i, std::size_t j, std::size_t k) const { + return _contents[k*_countX*_countY + j*_countX + i]; +} + + +template T& Grid::atIdx(std::size_t i, std::size_t j, std::size_t k) { + return _contents[k*_countX*_countY + j*_countX + i]; +} + +template const T& Grid::atIdx(std::size_t i, std::size_t j, std::size_t k) const { + return _contents[k*_countX*_countY + j*_countX + i]; +} + + +template T& Grid::operator()(const glm::ivec3 &idx) { + return _contents[idx.z*_countX*_countY + idx.y*_countX + idx.x]; +} + +template const T& Grid::operator()(const glm::ivec3 &idx) const { + return _contents[idx.z*_countX*_countY + idx.y*_countX + idx.x];; +} + +template T& Grid::atIdx(const glm::ivec3 &idx) { + return _contents[idx.z*_countX*_countY + idx.y*_countX + idx.x]; +} + +template const T& Grid::atIdx(const glm::ivec3 &idx) const { + return _contents[idx.z*_countX*_countY + idx.y*_countX + idx.x]; +} + + +template T& Grid::at(float x, float y, float z) { + return at(glm::vec3(x, y, z)); +} + +template const T& Grid::at(float x, float y, float z) const { + return at(glm::vec3(x, y, z));; +} + +template T& Grid::at(const glm::vec3 &pos) { + glm::ivec3 indices = indexOf(pos); + return this->operator()((size_t) indices.x, (size_t) indices.y, (size_t) indices.z); +} + +template const T& Grid::at(const glm::vec3 &pos) const { + glm::ivec3 indices = indexOf(pos); + return this->operator()((size_t) indices.x, (size_t) indices.y, (size_t) indices.z); +} + +template glm::ivec3 Grid::indexOf(const glm::vec3 &pos) const { + glm::vec3 indices = (pos - 0.f*_offset - _origin) / _cellSize; + int i = (int) indices.x; + int j = (int) indices.y; + int k = (int) indices.z; + //if (i >= _countX ) i = -1; + //if (j >= _countY ) j = -1; + //if (k >= _countZ ) k = -1; + return glm::clamp(glm::ivec3(i, j, k), glm::ivec3(0,0,0), glm::ivec3(_countX-1, _countY-1, _countZ-1)); +} + +template void Grid::indexOf(const glm::vec3 &pos, size_t &i, size_t &j, size_t &k) const { + glm::vec3 indices = (pos - 0.f*_offset - _origin) / _cellSize; + indices = glm::clamp(indices, glm::vec3(0,0,0), glm::vec3(_countX-1, _countY-1, _countZ-1)); + i = (size_t) indices.x; + j = (size_t) indices.y; + k = (size_t) indices.z; +// int ii = ((indices.x < _countX) * indices.x + (indices.x > _countX-1) * (_countX-1)); // clamp at countX +// int jj = ((indices.y < _countY) * indices.y + (indices.y > _countY-1) * (_countY-1)); // clamp at countY +// int kk = ((indices.z < _countZ) * indices.z + (indices.z > _countZ-1) * (_countZ-1)); // clamp at countZ +// ii = (ii > 0) * ii; +// jj = (jj > 0) * jj; +// kk = (kk > 0) * kk; +// i = ii; +// j = jj; +// k = kk; +} + + +template glm::vec3 Grid::positionOf(const glm::ivec3 &idx) const { + return glm::vec3(idx.x * _cellSize, idx.y * _cellSize, idx.z * _cellSize) + _offset + _origin; +} + +template glm::vec3 Grid::positionOf(size_t i, size_t j, size_t k) const { + return glm::vec3(i * _cellSize, j * _cellSize, k * _cellSize) + _offset + _origin; +} + +template glm::vec3 Grid::fractionalIndexOf(const glm::vec3 &pos) const { + return glm::clamp((pos - _offset - _origin) / _cellSize, glm::vec3(0,0,0), glm::vec3(_countX, _countY, _countZ)); +} + +template glm::ivec3 Grid::toIJK(const std::size_t index) const { + size_t i,j,k; + toIJK(index, i,j,k); + return glm::ivec3(i,j,k); +} + +template void Grid::toIJK(const std::size_t index, size_t &i, size_t &j, size_t &k) const { + i = (index % _countX); + j = ((index / _countX) % _countY); + k = (index / (_countX * _countY)); +} + +template std::size_t Grid::fromIJK(const std::size_t i, const std::size_t j, const std::size_t k) const { + return (size_t) (k * _countX * _countY + j * _countX + i); +} + +template std::size_t Grid::fromIJK(const glm::ivec3 &ijk) const { + return (size_t) (ijk.z * _countX * _countY + ijk.y * _countX + ijk.x); +} + +template void Grid::iterate(const std::function &cb, bool parallel) { +#ifdef USETBB + if (parallel) { + tbb::parallel_for(tbb::blocked_range(0, _contents.size()), [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + glm::ivec3 ijk = toIJK(i); + cb(ijk.x, ijk.y, ijk.z); + } + }); + } else { + for (size_t idx = 0; idx < _contents.size(); idx++) { + glm::ivec3 ijk = toIJK(idx); + cb(ijk.x, ijk.y, ijk.z); + } + } +#else + for (size_t idx = 0; idx < _contents.size(); idx++) { + glm::ivec3 ijk = toIJK(idx); + cb(ijk.x, ijk.y, ijk.z); + } +#endif +} + +template void Grid::iterateRegion(size_t i, size_t j, size_t k, size_t I, size_t J, size_t K, const std::function &cb, bool parallel) { +#ifdef USETBB + tbb::blocked_range3d test(i,j,k,I,J,K); + if (parallel) { + tbb::parallel_for(tbb::blocked_range3d(i,I,j,J,k,K), [&](const tbb::blocked_range3d &r) { + for(size_t ii=r.pages().begin(), i_end=r.pages().end(); ii void Grid::iterateNeighborhood(size_t i, size_t j, size_t k, size_t r, const std::function &cb, bool parallel) { + size_t si = MATHIFELSE(i - r, 0, i == 0); + size_t sj = MATHIFELSE(j - r, 0, j == 0); + size_t sk = MATHIFELSE(k - r, 0, k == 0); + size_t ei = MATHIFELSE(i + r, _countX-1, i + r >= _countX); + size_t ej = MATHIFELSE(j + r, _countY-1, j + r >= _countY); + size_t ek = MATHIFELSE(k + r, _countZ-1, k + r >= _countZ); + +#ifdef USETBB + if (parallel) { + tbb::parallel_for(tbb::blocked_range3d(si,ei,sj,ej,sk,ek), [&](const tbb::blocked_range3d &r) { + for(size_t i=r.pages().begin(), i_end=r.pages().end(); i void Grid::getNeighboorhood(size_t i, size_t j, size_t k, size_t r, size_t &si, size_t &ei, size_t &sj, size_t &ej, size_t &sk, size_t &ek) { + si = MATHIFELSE(i - r, 0, i - r > i); + sj = MATHIFELSE(j - r, 0, j - r > j); + sk = MATHIFELSE(k - r, 0, k - r > k); + ei = std::min(i+r+1, _countX); //MATHIFELSE(i + r, _countX, i + r >= _countX); + ej = std::min(j+r+1, _countY); //MATHIFELSE(j + r, _countY, j + r >= _countY); + ek = std::min(k+r+1, _countZ); //MATHIFELSE(k + r, _countZ, k + r >= _countZ); +} + + +template void Grid::clear(const T &zeroVal) { +#ifdef USETBB + tbb::parallel_for(tbb::blocked_range(0, _contents.size()), [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + _contents[i] = zeroVal; + } + }); +#else + for (size_t i = 0; i < _contents.size(); i++) { + _contents[i] = zeroVal; + } +#endif +} + +template bool Grid::checkIdx(size_t i, size_t j, size_t k) const { + return i >= 0 && i < _countX && + j >= 0 && j < _countY && + k >= 0 && k < _countZ; +} +template bool Grid::checkIdx(const glm::ivec3 &idx) const { + return checkIdx((size_t) idx.x, (size_t) idx.y, (size_t) idx.z); +} + +template size_t Grid::countX() const { + return _countX; +} + +template size_t Grid::countY() const { + return _countY; +} + +template size_t Grid::countZ() const { + return _countZ; +} + +template size_t Grid::size() const { + return _contents.size(); +} + +template class Grid; +template class Grid; +template class Grid > >; diff --git a/core/solver/grid/Grid.h b/core/solver/grid/Grid.h new file mode 100644 index 00000000..b7fe527d --- /dev/null +++ b/core/solver/grid/Grid.h @@ -0,0 +1,78 @@ +// +// Created by austin on 3/20/16. +// + +#ifndef FLUIDSOLVER_GRID_H +#define FLUIDSOLVER_GRID_H + +#include +#include +#include +#include + +template class Grid { + friend class GridIterator; + friend class GridVectorAttributePainter; + friend class GridScalarAttributePainter; +public: + Grid(); + Grid(const glm::vec3 &origin, const glm::vec3 &offset, const glm::vec3 &dim, float size); + //template Grid(const Grid &rhs); + + T& operator()(std::size_t idx); + const T& operator()(std::size_t idx) const; + T& operator()(std::size_t i, std::size_t j, std::size_t k); + const T& operator()(std::size_t i, std::size_t j, std::size_t k) const; + T& atIdx(std::size_t i, std::size_t j, std::size_t k); + const T& atIdx(std::size_t i, std::size_t j, std::size_t k) const; + T& operator()(const glm::ivec3 &idx); + const T& operator()(const glm::ivec3 &idx) const; + T& atIdx(const glm::ivec3 &idx); + const T& atIdx(const glm::ivec3 &idx) const; + + T& at(float x, float y, float z); + const T& at(float x, float y, float z) const; + T& at(const glm::vec3 &pos); + const T& at(const glm::vec3 &pos) const; + + glm::ivec3 indexOf(const glm::vec3 &pos) const; + void indexOf(const glm::vec3 &pos, size_t &i, size_t &j, size_t &k) const; + glm::vec3 positionOf(const glm::ivec3 &idx) const; + glm::vec3 positionOf(size_t i, size_t j, size_t k) const; + glm::vec3 fractionalIndexOf(const glm::vec3 &pos) const; + + glm::ivec3 toIJK(const std::size_t index) const; + void toIJK(const std::size_t index, size_t &i, size_t &j, size_t &k) const; + std::size_t fromIJK(const std::size_t i, const std::size_t j, const std::size_t k) const; + std::size_t fromIJK(const glm::ivec3 &ijk) const; + + void iterate(const std::function &cb, bool parallel=true); + void iterateRegion(size_t i, size_t j, size_t k, size_t I, size_t J, size_t K, const std::function &cb, bool parallel=true); + + void iterateNeighborhood(size_t i, size_t j, size_t k, size_t r, const std::function &cb, bool parallel=true); + void getNeighboorhood(size_t i, size_t j, size_t k, size_t r, size_t &si, size_t &ei, size_t &sj, size_t &ej, size_t &sk, size_t &ek); + + void clear(const T &zeroVal); + + bool checkIdx(size_t i, size_t j, size_t k) const; + bool checkIdx(const glm::ivec3 &idx) const; + + size_t countX() const; + size_t countY() const; + size_t countZ() const; + size_t size() const; + + virtual ~Grid(); + +private: + std::vector _contents; + glm::vec3 _origin; + glm::vec3 _offset; + glm::vec3 _dim; + float _cellSize; + size_t _countX; + size_t _countY; + size_t _countZ; +}; + +#endif //FLUIDSOLVER_GRID_H diff --git a/core/solver/grid/MACGrid.cpp b/core/solver/grid/MACGrid.cpp new file mode 100644 index 00000000..c304c9b5 --- /dev/null +++ b/core/solver/grid/MACGrid.cpp @@ -0,0 +1,29 @@ +// +// Created by austin on 3/20/16. +// + +#include "MACGrid.h" +#include + +template MACGrid::MACGrid() { + +} + +template MACGrid::~MACGrid() { + +} + +template MACGrid::MACGrid(const glm::vec3 &origin, const glm::vec3 &dim, float size) : + Grid(origin, size*glm::vec3(0.5f,0.5f,0.5f), dim, size), + _gU(Grid(origin, size*glm::vec3(0.0f,0.5f,0.5f), dim, size)), + _gV(Grid(origin, size*glm::vec3(0.5f,0.0f,0.5f), dim, size)), + _gW(Grid(origin, size*glm::vec3(0.5f,0.5f,0.0f), dim, size)), + _gU_old(Grid(origin, size*glm::vec3(0.0f,0.5f,0.5f), dim, size)), + _gV_old(Grid(origin, size*glm::vec3(0.5f,0.0f,0.5f), dim, size)), + _gW_old(Grid(origin, size*glm::vec3(0.5f,0.5f,0.0f), dim, size)), + _gP(Grid(origin, size*glm::vec3(0.5f,0.5f,0.5f), dim, size)), + _gType(Grid(origin, size*glm::vec3(0.5f,0.5f,0.5f), dim, size)) { + +} + +template class MACGrid > >; \ No newline at end of file diff --git a/core/solver/grid/MACGrid.h b/core/solver/grid/MACGrid.h new file mode 100644 index 00000000..85a3c71a --- /dev/null +++ b/core/solver/grid/MACGrid.h @@ -0,0 +1,40 @@ +// +// Created by austin on 3/20/16. +// + +#ifndef FLUIDSOLVER_MACGRID_H +#define FLUIDSOLVER_MACGRID_H + +#include "Grid.h" +#include + +enum CellType { + EMPTY, + FLUID, + SOLID +}; + +template class MACGrid : public Grid { + friend class FluidSolver; +public: + + MACGrid(); + MACGrid(const glm::vec3 &origin, const glm::vec3 &dim, float size); + virtual ~MACGrid(); + + Grid _gU; + Grid _gV; + Grid _gW; + Grid _gU_old; + Grid _gV_old; + Grid _gW_old; + Grid _gP; + Grid _gType; +// Grid _gDiv; + +private: + std::vector _contents; +}; + + +#endif //FLUIDSOLVER_MACGRID_H diff --git a/core/util/flags.h b/core/util/flags.h new file mode 100644 index 00000000..265b643c --- /dev/null +++ b/core/util/flags.h @@ -0,0 +1,11 @@ +// +// Created by austin on 3/22/16. +// + +#ifndef FLUIDSOLVER_FLAGS_H +#define FLUIDSOLVER_FLAGS_H + +#endif //FLUIDSOLVER_FLAGS_H + +#define USETBB +//#define SPLATTING \ No newline at end of file diff --git a/core/util/hacks.h b/core/util/hacks.h new file mode 100644 index 00000000..d5297554 --- /dev/null +++ b/core/util/hacks.h @@ -0,0 +1,10 @@ +// +// Created by austin on 3/21/16. +// + +#ifndef FLUIDSOLVER_HACKS_H +#define FLUIDSOLVER_HACKS_H + +#define MATHIFELSE(expr1, expr2, cond) ( expr1*(1-(cond)) + (expr2)*(cond) ) + +#endif //FLUIDSOLVER_HACKS_H diff --git a/core/util/math.h b/core/util/math.h new file mode 100644 index 00000000..c0bcf7f8 --- /dev/null +++ b/core/util/math.h @@ -0,0 +1,37 @@ +// +// Created by austin on 2/27/16. +// + +#ifndef FLUIDSOLVER_MATH_H +#define FLUIDSOLVER_MATH_H + +#define GLM_FORCE_RADIANS + +#include +#include +#include +#include + +static const float PI = 3.14159265358979323846f; +static const float TWO_PI = 2 * PI; +static const float DEG2RAD = PI / 180.f; +static const float RAD2DEG = 180.f / PI; + +// Float approximate-equality comparison +template +inline bool fequal(T a, T b, T epsilon = 0.0001){ + if (a == b) { + // Shortcut + return true; + } + + const T diff = std::abs(a - b); + if (a * b == 0) { + // a or b or both are zero; relative error is not meaningful here + return diff < (epsilon * epsilon); + } + + return diff / (std::abs(a) + std::abs(b)) < epsilon; +} + +#endif //FLUIDSOLVER_MATH_H diff --git a/main.cpp b/main.cpp new file mode 100644 index 00000000..dddba300 --- /dev/null +++ b/main.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char* argv[]) { + Window* window = new Window("Fluid Solver"); + + FluidSolver* solver = SceneLoader::LoadScene(argv[1]); + +// ParticlesWriter particlesWriter; + solver->init(); + +// particlesWriter.writeData(solver, "particles_0.vdb"); + + ParticlesPainter particlesPainter(solver, 2); + BoxPainter boxPainter((Box *) solver->_container); + GridVectorAttributePainter uPainter (&solver->_MAC->_gU, 1.f, glm::vec3(1,0,0), glm::vec3(0.2,0,0)); + GridVectorAttributePainter vPainter (&solver->_MAC->_gV, 1.f, glm::vec3(0,1,0), glm::vec3(0,0.2,0)); + GridVectorAttributePainter wPainter (&solver->_MAC->_gW, 1.f, glm::vec3(0,0,1), glm::vec3(0,0,0.2)); + GridScalarAttributePainter tPainter ( + &solver->_MAC->_gType, 0.f, 2.f, 2.f, 8.f, glm::vec3(0,1,1), glm::vec3(0,0,0)); + GridScalarAttributePainter posPressurePainter ( + &solver->_MAC->_gP, 0.f, 50.f, 0.f, 5.f, glm::vec3(1,1,0), glm::vec3(1,0,0)); + GridScalarAttributePainter negPressurePainter ( + &solver->_MAC->_gP, 0.f, -2.f, 0.f, 5.f, glm::vec3(0,1,1), glm::vec3(0,0,1)); + + window->addPainter(&particlesPainter); + window->addPainter(&boxPainter); +// window->addPainter(&uPainter); +// window->addPainter(&vPainter); +// window->addPainter(&wPainter); +// window->addPainter(&tPainter); +// window->addPainter(&posPressurePainter); +// window->addPainter(&negPressurePainter); + + window->loadSceneCB = [](void*) { + std::cout << "what" << std::endl; + }; +// window->initializeTweakBar(); + + int framerate = 24; + double start = glfwGetTime(); + int frame = 0; + + float totalComputeTime = 0; + + window->initloop([&]() { + double now = glfwGetTime(); + float duration = (float) (now - start); + + // limit solver update to 24fps + if (duration >= 1.f / framerate) { + start = now; + //solver->update(duration); + solver->update(1.f / framerate); + totalComputeTime += glfwGetTime() - start; + + std::string filename = "particles_"; + filename.append(std::to_string(++frame)); + filename.append(".tga"); +// window->saveImage(filename); +// particlesWriter.writeData(solver, filename); + if (frame >= 960) { + exit(0); + } + } + }); +// solver->update(0.1f); + + std::cout << "Average compute time: " << totalComputeTime / frame << " seconds" << std::endl; + + delete window; + delete solver; + + return 0; +} \ No newline at end of file diff --git a/nuparu/include/AntTweakBar/AntTweakBar.h b/nuparu/include/AntTweakBar/AntTweakBar.h new file mode 100644 index 00000000..a1cf277d --- /dev/null +++ b/nuparu/include/AntTweakBar/AntTweakBar.h @@ -0,0 +1,378 @@ +// ---------------------------------------------------------------------------- +// +// @file AntTweakBar.h +// +// @brief AntTweakBar is a light and intuitive graphical user interface +// that can be readily integrated into OpenGL and DirectX +// applications in order to interactively tweak parameters. +// +// @author Philippe Decaudin +// +// @doc http://anttweakbar.sourceforge.net/doc +// +// @license This file is part of the AntTweakBar library. +// AntTweakBar is a free software released under the zlib license. +// For conditions of distribution and use, see License.txt +// +// ---------------------------------------------------------------------------- + + +#if !defined TW_INCLUDED +#define TW_INCLUDED + +#include + +#define TW_VERSION 116 // Version Mmm : M=Major mm=minor (e.g., 102 is version 1.02) + + +#ifdef __cplusplus +# if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4995 4530) +# include +# pragma warning(pop) +# else +# include +# endif + extern "C" { +#endif // __cplusplus + + +// ---------------------------------------------------------------------------- +// OS specific definitions +// ---------------------------------------------------------------------------- + +#if (defined(_WIN32) || defined(_WIN64)) && !defined(TW_STATIC) +# define TW_CALL __stdcall +# define TW_CDECL_CALL __cdecl +# define TW_EXPORT_API __declspec(dllexport) +# define TW_IMPORT_API __declspec(dllimport) +#else +# define TW_CALL +# define TW_CDECL_CALL +# define TW_EXPORT_API +# define TW_IMPORT_API +#endif + +#if defined TW_EXPORTS +# define TW_API TW_EXPORT_API +#elif defined TW_STATIC +# define TW_API +# if defined(_MSC_VER) && !defined(TW_NO_LIB_PRAGMA) +# ifdef _WIN64 +# pragma comment(lib, "AntTweakBarStatic64") +# else +# pragma comment(lib, "AntTweakBarStatic") +# endif +# endif +#else +# define TW_API TW_IMPORT_API +# if defined(_MSC_VER) && !defined(TW_NO_LIB_PRAGMA) +# ifdef _WIN64 +# pragma comment(lib, "AntTweakBar64") +# else +# pragma comment(lib, "AntTweakBar") +# endif +# endif +#endif + + +// ---------------------------------------------------------------------------- +// Bar functions and definitions +// ---------------------------------------------------------------------------- + +typedef struct CTwBar TwBar; // structure CTwBar is not exposed. + +TW_API TwBar * TW_CALL TwNewBar(const char *barName); +TW_API int TW_CALL TwDeleteBar(TwBar *bar); +TW_API int TW_CALL TwDeleteAllBars(); +TW_API int TW_CALL TwSetTopBar(const TwBar *bar); +TW_API TwBar * TW_CALL TwGetTopBar(); +TW_API int TW_CALL TwSetBottomBar(const TwBar *bar); +TW_API TwBar * TW_CALL TwGetBottomBar(); +TW_API const char * TW_CALL TwGetBarName(const TwBar *bar); +TW_API int TW_CALL TwGetBarCount(); +TW_API TwBar * TW_CALL TwGetBarByIndex(int barIndex); +TW_API TwBar * TW_CALL TwGetBarByName(const char *barName); +TW_API int TW_CALL TwRefreshBar(TwBar *bar); + +// ---------------------------------------------------------------------------- +// Var functions and definitions +// ---------------------------------------------------------------------------- + +typedef enum ETwType +{ + TW_TYPE_UNDEF = 0, +#ifdef __cplusplus + TW_TYPE_BOOLCPP = 1, +#endif // __cplusplus + TW_TYPE_BOOL8 = 2, + TW_TYPE_BOOL16, + TW_TYPE_BOOL32, + TW_TYPE_CHAR, + TW_TYPE_INT8, + TW_TYPE_UINT8, + TW_TYPE_INT16, + TW_TYPE_UINT16, + TW_TYPE_INT32, + TW_TYPE_UINT32, + TW_TYPE_FLOAT, + TW_TYPE_DOUBLE, + TW_TYPE_COLOR32, // 32 bits color. Order is RGBA if API is OpenGL or Direct3D10, and inversed if API is Direct3D9 (can be modified by defining 'colorOrder=...', see doc) + TW_TYPE_COLOR3F, // 3 floats color. Order is RGB. + TW_TYPE_COLOR4F, // 4 floats color. Order is RGBA. + TW_TYPE_CDSTRING, // Null-terminated C Dynamic String (pointer to an array of char dynamically allocated with malloc/realloc/strdup) +#ifdef __cplusplus +# if defined(_MSC_VER) && (_MSC_VER == 1600) + TW_TYPE_STDSTRING = (0x2ffe0000+sizeof(std::string)), // VS2010 C++ STL string (std::string) +# else + TW_TYPE_STDSTRING = (0x2fff0000+sizeof(std::string)), // C++ STL string (std::string) +# endif +#endif // __cplusplus + TW_TYPE_QUAT4F = TW_TYPE_CDSTRING+2, // 4 floats encoding a quaternion {qx,qy,qz,qs} + TW_TYPE_QUAT4D, // 4 doubles encoding a quaternion {qx,qy,qz,qs} + TW_TYPE_DIR3F, // direction vector represented by 3 floats + TW_TYPE_DIR3D // direction vector represented by 3 doubles +} TwType; +#define TW_TYPE_CSSTRING(n) ((TwType)(0x30000000+((n)&0xfffffff))) // Null-terminated C Static String of size n (defined as char[n], with n<2^28) + +typedef void (TW_CALL * TwSetVarCallback)(const void *value, void *clientData); +typedef void (TW_CALL * TwGetVarCallback)(void *value, void *clientData); +typedef void (TW_CALL * TwButtonCallback)(void *clientData); + +TW_API int TW_CALL TwAddVarRW(TwBar *bar, const char *name, TwType type, void *var, const char *def); +TW_API int TW_CALL TwAddVarRO(TwBar *bar, const char *name, TwType type, const void *var, const char *def); +TW_API int TW_CALL TwAddVarCB(TwBar *bar, const char *name, TwType type, TwSetVarCallback setCallback, TwGetVarCallback getCallback, void *clientData, const char *def); +TW_API int TW_CALL TwAddButton(TwBar *bar, const char *name, TwButtonCallback callback, void *clientData, const char *def); +TW_API int TW_CALL TwAddSeparator(TwBar *bar, const char *name, const char *def); +TW_API int TW_CALL TwRemoveVar(TwBar *bar, const char *name); +TW_API int TW_CALL TwRemoveAllVars(TwBar *bar); + +typedef struct CTwEnumVal +{ + int Value; + const char * Label; +} TwEnumVal; +typedef struct CTwStructMember +{ + const char * Name; + TwType Type; + size_t Offset; + const char * DefString; +} TwStructMember; +typedef void (TW_CALL * TwSummaryCallback)(char *summaryString, size_t summaryMaxLength, const void *value, void *clientData); + +TW_API int TW_CALL TwDefine(const char *def); +TW_API TwType TW_CALL TwDefineEnum(const char *name, const TwEnumVal *enumValues, unsigned int nbValues); +TW_API TwType TW_CALL TwDefineEnumFromString(const char *name, const char *enumString); +TW_API TwType TW_CALL TwDefineStruct(const char *name, const TwStructMember *structMembers, unsigned int nbMembers, size_t structSize, TwSummaryCallback summaryCallback, void *summaryClientData); + +typedef void (TW_CALL * TwCopyCDStringToClient)(char **destinationClientStringPtr, const char *sourceString); +TW_API void TW_CALL TwCopyCDStringToClientFunc(TwCopyCDStringToClient copyCDStringFunc); +TW_API void TW_CALL TwCopyCDStringToLibrary(char **destinationLibraryStringPtr, const char *sourceClientString); +#ifdef __cplusplus +typedef void (TW_CALL * TwCopyStdStringToClient)(std::string& destinationClientString, const std::string& sourceString); +TW_API void TW_CALL TwCopyStdStringToClientFunc(TwCopyStdStringToClient copyStdStringToClientFunc); +TW_API void TW_CALL TwCopyStdStringToLibrary(std::string& destinationLibraryString, const std::string& sourceClientString); +#endif // __cplusplus + +typedef enum ETwParamValueType +{ + TW_PARAM_INT32, + TW_PARAM_FLOAT, + TW_PARAM_DOUBLE, + TW_PARAM_CSTRING // Null-terminated array of char (ie, c-string) +} TwParamValueType; +TW_API int TW_CALL TwGetParam(TwBar *bar, const char *varName, const char *paramName, TwParamValueType paramValueType, unsigned int outValueMaxCount, void *outValues); +TW_API int TW_CALL TwSetParam(TwBar *bar, const char *varName, const char *paramName, TwParamValueType paramValueType, unsigned int inValueCount, const void *inValues); + + +// ---------------------------------------------------------------------------- +// Management functions and definitions +// ---------------------------------------------------------------------------- + +typedef enum ETwGraphAPI +{ + TW_OPENGL = 1, + TW_DIRECT3D9 = 2, + TW_DIRECT3D10 = 3, + TW_DIRECT3D11 = 4, + TW_OPENGL_CORE = 5 +} TwGraphAPI; + +TW_API int TW_CALL TwInit(TwGraphAPI graphAPI, void *device); +TW_API int TW_CALL TwTerminate(); + +TW_API int TW_CALL TwDraw(); +TW_API int TW_CALL TwWindowSize(int width, int height); + +TW_API int TW_CALL TwSetCurrentWindow(int windowID); // multi-windows support +TW_API int TW_CALL TwGetCurrentWindow(); +TW_API int TW_CALL TwWindowExists(int windowID); + +typedef enum ETwKeyModifier +{ + TW_KMOD_NONE = 0x0000, // same codes as SDL keysym.mod + TW_KMOD_SHIFT = 0x0003, + TW_KMOD_CTRL = 0x00c0, + TW_KMOD_ALT = 0x0100, + TW_KMOD_META = 0x0c00 +} TwKeyModifier; +typedef enum EKeySpecial +{ + TW_KEY_BACKSPACE = '\b', + TW_KEY_TAB = '\t', + TW_KEY_CLEAR = 0x0c, + TW_KEY_RETURN = '\r', + TW_KEY_PAUSE = 0x13, + TW_KEY_ESCAPE = 0x1b, + TW_KEY_SPACE = ' ', + TW_KEY_DELETE = 0x7f, + TW_KEY_UP = 273, // same codes and order as SDL 1.2 keysym.sym + TW_KEY_DOWN, + TW_KEY_RIGHT, + TW_KEY_LEFT, + TW_KEY_INSERT, + TW_KEY_HOME, + TW_KEY_END, + TW_KEY_PAGE_UP, + TW_KEY_PAGE_DOWN, + TW_KEY_F1, + TW_KEY_F2, + TW_KEY_F3, + TW_KEY_F4, + TW_KEY_F5, + TW_KEY_F6, + TW_KEY_F7, + TW_KEY_F8, + TW_KEY_F9, + TW_KEY_F10, + TW_KEY_F11, + TW_KEY_F12, + TW_KEY_F13, + TW_KEY_F14, + TW_KEY_F15, + TW_KEY_LAST +} TwKeySpecial; + +TW_API int TW_CALL TwKeyPressed(int key, int modifiers); +TW_API int TW_CALL TwKeyTest(int key, int modifiers); + +typedef enum ETwMouseAction +{ + TW_MOUSE_RELEASED, + TW_MOUSE_PRESSED +} TwMouseAction; +typedef enum ETwMouseButtonID +{ + TW_MOUSE_LEFT = 1, // same code as SDL_BUTTON_LEFT + TW_MOUSE_MIDDLE = 2, // same code as SDL_BUTTON_MIDDLE + TW_MOUSE_RIGHT = 3 // same code as SDL_BUTTON_RIGHT +} TwMouseButtonID; + +TW_API int TW_CALL TwMouseButton(TwMouseAction action, TwMouseButtonID button); +TW_API int TW_CALL TwMouseMotion(int mouseX, int mouseY); +TW_API int TW_CALL TwMouseWheel(int pos); + +TW_API const char * TW_CALL TwGetLastError(); +typedef void (TW_CALL * TwErrorHandler)(const char *errorMessage); +TW_API void TW_CALL TwHandleErrors(TwErrorHandler errorHandler); + + +// ---------------------------------------------------------------------------- +// Helper functions to translate events from some common window management +// frameworks to AntTweakBar. +// They call TwKeyPressed, TwMouse* and TwWindowSize for you (implemented in +// files TwEventWin.c TwEventSDL*.c TwEventGLFW.c TwEventGLUT.c) +// ---------------------------------------------------------------------------- + +// For Windows message proc +#ifndef _W64 // Microsoft specific (detection of 64 bits portability issues) +# define _W64 +#endif // _W64 +#ifdef _WIN64 + TW_API int TW_CALL TwEventWin(void *wnd, unsigned int msg, unsigned __int64 _W64 wParam, __int64 _W64 lParam); +#else + TW_API int TW_CALL TwEventWin(void *wnd, unsigned int msg, unsigned int _W64 wParam, int _W64 lParam); +#endif +#define TwEventWin32 TwEventWin // For compatibility with AntTweakBar versions prior to 1.11 + +// For libSDL event loop +TW_API int TW_CALL TwEventSDL(const void *sdlEvent, unsigned char sdlMajorVersion, unsigned char sdlMinorVersion); + +// For GLFW event callbacks +// You should define GLFW_CDECL before including AntTweakBar.h if your version of GLFW uses cdecl calling convensions +#ifdef GLFW_CDECL + TW_API int TW_CDECL_CALL TwEventMouseButtonGLFWcdecl(int glfwButton, int glfwAction); + TW_API int TW_CDECL_CALL TwEventKeyGLFWcdecl(int glfwKey, int glfwAction); + TW_API int TW_CDECL_CALL TwEventCharGLFWcdecl(int glfwChar, int glfwAction); + TW_API int TW_CDECL_CALL TwEventMousePosGLFWcdecl(int mouseX, int mouseY); + TW_API int TW_CDECL_CALL TwEventMouseWheelGLFWcdecl(int wheelPos); +# define TwEventMouseButtonGLFW TwEventMouseButtonGLFWcdecl +# define TwEventKeyGLFW TwEventKeyGLFWcdecl +# define TwEventCharGLFW TwEventCharGLFWcdecl +# define TwEventMousePosGLFW TwEventMousePosGLFWcdecl +# define TwEventMouseWheelGLFW TwEventMouseWheelGLFWcdecl +#else + TW_API int TW_CALL TwEventMouseButtonGLFW(int glfwButton, int glfwAction); + TW_API int TW_CALL TwEventKeyGLFW(int glfwKey, int glfwAction); + TW_API int TW_CALL TwEventCharGLFW(int glfwChar, int glfwAction); +# define TwEventMousePosGLFW TwMouseMotion +# define TwEventMouseWheelGLFW TwMouseWheel +#endif + +// For GLUT event callbacks (Windows calling convention for GLUT callbacks is cdecl) +#if defined(_WIN32) || defined(_WIN64) +# define TW_GLUT_CALL TW_CDECL_CALL +#else +# define TW_GLUT_CALL +#endif +TW_API int TW_GLUT_CALL TwEventMouseButtonGLUT(int glutButton, int glutState, int mouseX, int mouseY); +TW_API int TW_GLUT_CALL TwEventMouseMotionGLUT(int mouseX, int mouseY); +TW_API int TW_GLUT_CALL TwEventKeyboardGLUT(unsigned char glutKey, int mouseX, int mouseY); +TW_API int TW_GLUT_CALL TwEventSpecialGLUT(int glutKey, int mouseX, int mouseY); +TW_API int TW_CALL TwGLUTModifiersFunc(int (TW_CALL *glutGetModifiersFunc)(void)); +typedef void (TW_GLUT_CALL *GLUTmousebuttonfun)(int glutButton, int glutState, int mouseX, int mouseY); +typedef void (TW_GLUT_CALL *GLUTmousemotionfun)(int mouseX, int mouseY); +typedef void (TW_GLUT_CALL *GLUTkeyboardfun)(unsigned char glutKey, int mouseX, int mouseY); +typedef void (TW_GLUT_CALL *GLUTspecialfun)(int glutKey, int mouseX, int mouseY); + +// For SFML event loop +TW_API int TW_CALL TwEventSFML(const void *sfmlEvent, unsigned char sfmlMajorVersion, unsigned char sfmlMinorVersion); + +// For X11 event loop +#if defined(_UNIX) + TW_API int TW_CDECL_CALL TwEventX11(void *xevent); +#endif + +// ---------------------------------------------------------------------------- +// Make sure the types have the right sizes +// ---------------------------------------------------------------------------- + +#define TW_COMPILE_TIME_ASSERT(name, x) typedef int TW_DUMMY_ ## name[(x) * 2 - 1] + +TW_COMPILE_TIME_ASSERT(TW_CHAR, sizeof(char) == 1); +TW_COMPILE_TIME_ASSERT(TW_SHORT, sizeof(short) == 2); +TW_COMPILE_TIME_ASSERT(TW_INT, sizeof(int) == 4); +TW_COMPILE_TIME_ASSERT(TW_FLOAT, sizeof(float) == 4); +TW_COMPILE_TIME_ASSERT(TW_DOUBLE, sizeof(double) == 8); + +// Check pointer size on Windows +#if !defined(_WIN64) && defined(_WIN32) + // If the following assert failed, the platform is not 32-bit and _WIN64 is not defined. + // When targetting 64-bit Windows platform, _WIN64 must be defined. + TW_COMPILE_TIME_ASSERT(TW_PTR32, sizeof(void*) == 4); +#elif defined(_WIN64) + // If the following assert failed, _WIN64 is defined but the targeted platform is not 64-bit. + TW_COMPILE_TIME_ASSERT(TW_PTR64, sizeof(void*) == 8); +#endif + +// --------------------------------------------------------------------------- + + +#ifdef __cplusplus + } // extern "C" +#endif // __cplusplus + + +#endif // !defined TW_INCLUDED diff --git a/nuparu/include/Eigen/Array b/nuparu/include/Eigen/Array deleted file mode 100644 index 3d004fb6..00000000 --- a/nuparu/include/Eigen/Array +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef EIGEN_ARRAY_MODULE_H -#define EIGEN_ARRAY_MODULE_H - -// include Core first to handle Eigen2 support macros -#include "Core" - -#ifndef EIGEN2_SUPPORT - #error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core. -#endif - -#endif // EIGEN_ARRAY_MODULE_H diff --git a/nuparu/include/Eigen/COPYING.BSD b/nuparu/include/Eigen/COPYING.BSD deleted file mode 100644 index 11971ffe..00000000 --- a/nuparu/include/Eigen/COPYING.BSD +++ /dev/null @@ -1,26 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ \ No newline at end of file diff --git a/nuparu/include/Eigen/COPYING.GPL b/nuparu/include/Eigen/COPYING.GPL deleted file mode 100644 index 94a9ed02..00000000 --- a/nuparu/include/Eigen/COPYING.GPL +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/nuparu/include/Eigen/COPYING.LGPL b/nuparu/include/Eigen/COPYING.LGPL deleted file mode 100644 index 4362b491..00000000 --- a/nuparu/include/Eigen/COPYING.LGPL +++ /dev/null @@ -1,502 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 - - Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts - as the successor of the GNU Library Public License, version 2, hence - the version number 2.1.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Lesser General Public License, applies to some -specially designated software packages--typically libraries--of the -Free Software Foundation and other authors who decide to use it. You -can use it too, but we suggest you first think carefully about whether -this license or the ordinary General Public License is the better -strategy to use in any particular case, based on the explanations below. - - When we speak of free software, we are referring to freedom of use, -not price. Our General Public Licenses are designed to make sure that -you have the freedom to distribute copies of free software (and charge -for this service if you wish); that you receive source code or can get -it if you want it; that you can change the software and use pieces of -it in new free programs; and that you are informed that you can do -these things. - - To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for -you if you distribute copies of the library or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link other code with the library, you must provide -complete object files to the recipients, so that they can relink them -with the library after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - - To protect each distributor, we want to make it very clear that -there is no warranty for the free library. Also, if the library is -modified by someone else and passed on, the recipients should know -that what they have is not the original version, so that the original -author's reputation will not be affected by problems that might be -introduced by others. - - Finally, software patents pose a constant threat to the existence of -any free program. We wish to make sure that a company cannot -effectively restrict the users of a free program by obtaining a -restrictive license from a patent holder. Therefore, we insist that -any patent license obtained for a version of the library must be -consistent with the full freedom of use specified in this license. - - Most GNU software, including some libraries, is covered by the -ordinary GNU General Public License. This license, the GNU Lesser -General Public License, applies to certain designated libraries, and -is quite different from the ordinary General Public License. We use -this license for certain libraries in order to permit linking those -libraries into non-free programs. - - When a program is linked with a library, whether statically or using -a shared library, the combination of the two is legally speaking a -combined work, a derivative of the original library. The ordinary -General Public License therefore permits such linking only if the -entire combination fits its criteria of freedom. The Lesser General -Public License permits more lax criteria for linking other code with -the library. - - We call this license the "Lesser" General Public License because it -does Less to protect the user's freedom than the ordinary General -Public License. It also provides other free software developers Less -of an advantage over competing non-free programs. These disadvantages -are the reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - - For example, on rare occasions, there may be a special need to -encourage the widest possible use of a certain library, so that it becomes -a de-facto standard. To achieve this, non-free programs must be -allowed to use the library. A more frequent case is that a free -library does the same job as widely used non-free libraries. In this -case, there is little to gain by limiting the free library to free -software only, so we use the Lesser General Public License. - - In other cases, permission to use a particular library in non-free -programs enables a greater number of people to use a large body of -free software. For example, permission to use the GNU C Library in -non-free programs enables many more people to use the whole GNU -operating system, as well as its variant, the GNU/Linux operating -system. - - Although the Lesser General Public License is Less protective of the -users' freedom, it does ensure that the user of a program that is -linked with the Library has the freedom and the wherewithal to run -that program using a modified version of the Library. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, whereas the latter must -be combined with the library in order to run. - - GNU LESSER GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library or other -program which contains a notice placed by the copyright holder or -other authorized party saying it may be distributed under the terms of -this Lesser General Public License (also called "this License"). -Each licensee is addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also combine or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a - copy of the library already present on the user's computer system, - rather than copying library functions into the executable, and (2) - will operate properly with a modified version of the library, if - the user installs one, as long as the modified version is - interface-compatible with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - d) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - e) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the materials to be distributed need not include anything that is -normally distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties with -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Lesser General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - , 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! diff --git a/nuparu/include/Eigen/COPYING.MINPACK b/nuparu/include/Eigen/COPYING.MINPACK deleted file mode 100644 index ae7984da..00000000 --- a/nuparu/include/Eigen/COPYING.MINPACK +++ /dev/null @@ -1,52 +0,0 @@ -Minpack Copyright Notice (1999) University of Chicago. All rights reserved - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the -following conditions are met: - -1. Redistributions of source code must retain the above -copyright notice, this list of conditions and the following -disclaimer. - -2. Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following -disclaimer in the documentation and/or other materials -provided with the distribution. - -3. The end-user documentation included with the -redistribution, if any, must include the following -acknowledgment: - - "This product includes software developed by the - University of Chicago, as Operator of Argonne National - Laboratory. - -Alternately, this acknowledgment may appear in the software -itself, if and wherever such third-party acknowledgments -normally appear. - -4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" -WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE -UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND -THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE -OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY -OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR -USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF -THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) -DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION -UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL -BE CORRECTED. - -5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT -HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF -ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, -INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF -ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF -PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER -SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT -(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, -EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE -POSSIBILITY OF SUCH LOSS OR DAMAGES. - diff --git a/nuparu/include/Eigen/COPYING.MPL2 b/nuparu/include/Eigen/COPYING.MPL2 deleted file mode 100644 index 14e2f777..00000000 --- a/nuparu/include/Eigen/COPYING.MPL2 +++ /dev/null @@ -1,373 +0,0 @@ -Mozilla Public License Version 2.0 -================================== - -1. Definitions --------------- - -1.1. "Contributor" - means each individual or legal entity that creates, contributes to - the creation of, or owns Covered Software. - -1.2. "Contributor Version" - means the combination of the Contributions of others (if any) used - by a Contributor and that particular Contributor's Contribution. - -1.3. "Contribution" - means Covered Software of a particular Contributor. - -1.4. "Covered Software" - means Source Code Form to which the initial Contributor has attached - the notice in Exhibit A, the Executable Form of such Source Code - Form, and Modifications of such Source Code Form, in each case - including portions thereof. - -1.5. "Incompatible With Secondary Licenses" - means - - (a) that the initial Contributor has attached the notice described - in Exhibit B to the Covered Software; or - - (b) that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the - terms of a Secondary License. - -1.6. "Executable Form" - means any form of the work other than Source Code Form. - -1.7. "Larger Work" - means a work that combines Covered Software with other material, in - a separate file or files, that is not Covered Software. - -1.8. "License" - means this document. - -1.9. "Licensable" - means having the right to grant, to the maximum extent possible, - whether at the time of the initial grant or subsequently, any and - all of the rights conveyed by this License. - -1.10. "Modifications" - means any of the following: - - (a) any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered - Software; or - - (b) any new file in Source Code Form that contains any Covered - Software. - -1.11. "Patent Claims" of a Contributor - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the - License, by the making, using, selling, offering for sale, having - made, import, or transfer of either its Contributions or its - Contributor Version. - -1.12. "Secondary License" - means either the GNU General Public License, Version 2.0, the GNU - Lesser General Public License, Version 2.1, the GNU Affero General - Public License, Version 3.0, or any later versions of those - licenses. - -1.13. "Source Code Form" - means the form of the work preferred for making modifications. - -1.14. "You" (or "Your") - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that - controls, is controlled by, or is under common control with You. For - purposes of this definition, "control" means (a) the power, direct - or indirect, to cause the direction or management of such entity, - whether by contract or otherwise, or (b) ownership of more than - fifty percent (50%) of the outstanding shares or beneficial - ownership of such entity. - -2. License Grants and Conditions --------------------------------- - -2.1. Grants - -Each Contributor hereby grants You a world-wide, royalty-free, -non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and - -(b) under Patent Claims of such Contributor to make, use, sell, offer - for sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. - -2.2. Effective Date - -The licenses granted in Section 2.1 with respect to any Contribution -become effective for each Contribution on the date the Contributor first -distributes such Contribution. - -2.3. Limitations on Grant Scope - -The licenses granted in this Section 2 are the only rights granted under -this License. No additional rights or licenses will be implied from the -distribution or licensing of Covered Software under this License. -Notwithstanding Section 2.1(b) above, no patent license is granted by a -Contributor: - -(a) for any code that a Contributor has removed from Covered Software; - or - -(b) for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - -(c) under Patent Claims infringed by Covered Software in the absence of - its Contributions. - -This License does not grant any rights in the trademarks, service marks, -or logos of any Contributor (except as may be necessary to comply with -the notice requirements in Section 3.4). - -2.4. Subsequent Licenses - -No Contributor makes additional grants as a result of Your choice to -distribute the Covered Software under a subsequent version of this -License (see Section 10.2) or under the terms of a Secondary License (if -permitted under the terms of Section 3.3). - -2.5. Representation - -Each Contributor represents that the Contributor believes its -Contributions are its original creation(s) or it has sufficient rights -to grant the rights to its Contributions conveyed by this License. - -2.6. Fair Use - -This License is not intended to limit any rights You have under -applicable copyright doctrines of fair use, fair dealing, or other -equivalents. - -2.7. Conditions - -Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted -in Section 2.1. - -3. Responsibilities -------------------- - -3.1. Distribution of Source Form - -All distribution of Covered Software in Source Code Form, including any -Modifications that You create or to which You contribute, must be under -the terms of this License. You must inform recipients that the Source -Code Form of the Covered Software is governed by the terms of this -License, and how they can obtain a copy of this License. You may not -attempt to alter or restrict the recipients' rights in the Source Code -Form. - -3.2. Distribution of Executable Form - -If You distribute Covered Software in Executable Form then: - -(a) such Covered Software must also be made available in Source Code - Form, as described in Section 3.1, and You must inform recipients of - the Executable Form how they can obtain a copy of such Source Code - Form by reasonable means in a timely manner, at a charge no more - than the cost of distribution to the recipient; and - -(b) You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter - the recipients' rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - -You may create and distribute a Larger Work under terms of Your choice, -provided that You also comply with the requirements of this License for -the Covered Software. If the Larger Work is a combination of Covered -Software with a work governed by one or more Secondary Licenses, and the -Covered Software is not Incompatible With Secondary Licenses, this -License permits You to additionally distribute such Covered Software -under the terms of such Secondary License(s), so that the recipient of -the Larger Work may, at their option, further distribute the Covered -Software under the terms of either this License or such Secondary -License(s). - -3.4. Notices - -You may not remove or alter the substance of any license notices -(including copyright notices, patent notices, disclaimers of warranty, -or limitations of liability) contained within the Source Code Form of -the Covered Software, except that You may alter any license notices to -the extent required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - -You may choose to offer, and to charge a fee for, warranty, support, -indemnity or liability obligations to one or more recipients of Covered -Software. However, You may do so only on Your own behalf, and not on -behalf of any Contributor. You must make it absolutely clear that any -such warranty, support, indemnity, or liability obligation is offered by -You alone, and You hereby agree to indemnify every Contributor for any -liability incurred by such Contributor as a result of warranty, support, -indemnity or liability terms You offer. You may include additional -disclaimers of warranty and limitations of liability specific to any -jurisdiction. - -4. Inability to Comply Due to Statute or Regulation ---------------------------------------------------- - -If it is impossible for You to comply with any of the terms of this -License with respect to some or all of the Covered Software due to -statute, judicial order, or regulation then You must: (a) comply with -the terms of this License to the maximum extent possible; and (b) -describe the limitations and the code they affect. Such description must -be placed in a text file included with all distributions of the Covered -Software under this License. Except to the extent prohibited by statute -or regulation, such description must be sufficiently detailed for a -recipient of ordinary skill to be able to understand it. - -5. Termination --------------- - -5.1. The rights granted under this License will terminate automatically -if You fail to comply with any of its terms. However, if You become -compliant, then the rights granted under this License from a particular -Contributor are reinstated (a) provisionally, unless and until such -Contributor explicitly and finally terminates Your grants, and (b) on an -ongoing basis, if such Contributor fails to notify You of the -non-compliance by some reasonable means prior to 60 days after You have -come back into compliance. Moreover, Your grants from a particular -Contributor are reinstated on an ongoing basis if such Contributor -notifies You of the non-compliance by some reasonable means, this is the -first time You have received notice of non-compliance with this License -from such Contributor, and You become compliant prior to 30 days after -Your receipt of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent -infringement claim (excluding declaratory judgment actions, -counter-claims, and cross-claims) alleging that a Contributor Version -directly or indirectly infringes any patent, then the rights granted to -You by any and all Contributors for the Covered Software under Section -2.1 of this License shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all -end user license agreements (excluding distributors and resellers) which -have been validly granted by You or Your distributors under this License -prior to termination shall survive termination. - -************************************************************************ -* * -* 6. Disclaimer of Warranty * -* ------------------------- * -* * -* Covered Software is provided under this License on an "as is" * -* basis, without warranty of any kind, either expressed, implied, or * -* statutory, including, without limitation, warranties that the * -* Covered Software is free of defects, merchantable, fit for a * -* particular purpose or non-infringing. The entire risk as to the * -* quality and performance of the Covered Software is with You. * -* Should any Covered Software prove defective in any respect, You * -* (not any Contributor) assume the cost of any necessary servicing, * -* repair, or correction. This disclaimer of warranty constitutes an * -* essential part of this License. No use of any Covered Software is * -* authorized under this License except under this disclaimer. * -* * -************************************************************************ - -************************************************************************ -* * -* 7. Limitation of Liability * -* -------------------------- * -* * -* Under no circumstances and under no legal theory, whether tort * -* (including negligence), contract, or otherwise, shall any * -* Contributor, or anyone who distributes Covered Software as * -* permitted above, be liable to You for any direct, indirect, * -* special, incidental, or consequential damages of any character * -* including, without limitation, damages for lost profits, loss of * -* goodwill, work stoppage, computer failure or malfunction, or any * -* and all other commercial damages or losses, even if such party * -* shall have been informed of the possibility of such damages. This * -* limitation of liability shall not apply to liability for death or * -* personal injury resulting from such party's negligence to the * -* extent applicable law prohibits such limitation. Some * -* jurisdictions do not allow the exclusion or limitation of * -* incidental or consequential damages, so this exclusion and * -* limitation may not apply to You. * -* * -************************************************************************ - -8. Litigation -------------- - -Any litigation relating to this License may be brought only in the -courts of a jurisdiction where the defendant maintains its principal -place of business and such litigation shall be governed by laws of that -jurisdiction, without reference to its conflict-of-law provisions. -Nothing in this Section shall prevent a party's ability to bring -cross-claims or counter-claims. - -9. Miscellaneous ----------------- - -This License represents the complete agreement concerning the subject -matter hereof. If any provision of this License is held to be -unenforceable, such provision shall be reformed only to the extent -necessary to make it enforceable. Any law or regulation which provides -that the language of a contract shall be construed against the drafter -shall not be used to construe this License against a Contributor. - -10. Versions of the License ---------------------------- - -10.1. New Versions - -Mozilla Foundation is the license steward. Except as provided in Section -10.3, no one other than the license steward has the right to modify or -publish new versions of this License. Each version will be given a -distinguishing version number. - -10.2. Effect of New Versions - -You may distribute the Covered Software under the terms of the version -of the License under which You originally received the Covered Software, -or under the terms of any subsequent version published by the license -steward. - -10.3. Modified Versions - -If you create software not governed by this License, and you want to -create a new license for such software, you may create and use a -modified version of this License if you rename the license and remove -any references to the name of the license steward (except to note that -such modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary -Licenses - -If You choose to distribute Source Code Form that is Incompatible With -Secondary Licenses under the terms of this version of the License, the -notice described in Exhibit B of this License must be attached. - -Exhibit A - Source Code Form License Notice -------------------------------------------- - - This Source Code Form is subject to the terms of the Mozilla Public - License, v. 2.0. If a copy of the MPL was not distributed with this - file, You can obtain one at http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular -file, then You may include the notice in a location (such as a LICENSE -file in a relevant directory) where a recipient would be likely to look -for such a notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - "Incompatible With Secondary Licenses" Notice ---------------------------------------------------------- - - This Source Code Form is "Incompatible With Secondary Licenses", as - defined by the Mozilla Public License, v. 2.0. diff --git a/nuparu/include/Eigen/COPYING.README b/nuparu/include/Eigen/COPYING.README deleted file mode 100644 index de5b6321..00000000 --- a/nuparu/include/Eigen/COPYING.README +++ /dev/null @@ -1,18 +0,0 @@ -Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links: - http://www.mozilla.org/MPL/2.0/ - http://www.mozilla.org/MPL/2.0/FAQ.html - -Some files contain third-party code under BSD or LGPL licenses, whence the other -COPYING.* files here. - -All the LGPL code is either LGPL 2.1-only, or LGPL 2.1-or-later. -For this reason, the COPYING.LGPL file contains the LGPL 2.1 text. - -If you want to guarantee that the Eigen code that you are #including is licensed -under the MPL2 and possibly more permissive licenses (like BSD), #define this -preprocessor symbol: - EIGEN_MPL2_ONLY -For example, with most compilers, you could add this to your project CXXFLAGS: - -DEIGEN_MPL2_ONLY -This will cause a compilation error to be generated if you #include any code that is -LGPL licensed. diff --git a/nuparu/include/Eigen/Cholesky b/nuparu/include/Eigen/Cholesky index f727f5d8..705a04cc 100644 --- a/nuparu/include/Eigen/Cholesky +++ b/nuparu/include/Eigen/Cholesky @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLESKY_MODULE_H #define EIGEN_CHOLESKY_MODULE_H @@ -10,16 +17,17 @@ * * * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. - * Those decompositions are accessible via the following MatrixBase methods: - * - MatrixBase::llt(), + * Those decompositions are also accessible via the following methods: + * - MatrixBase::llt() * - MatrixBase::ldlt() + * - SelfAdjointView::llt() + * - SelfAdjointView::ldlt() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" #include "src/Cholesky/LLT.h" #include "src/Cholesky/LDLT.h" #ifdef EIGEN_USE_LAPACKE diff --git a/nuparu/include/Eigen/CholmodSupport b/nuparu/include/Eigen/CholmodSupport index 745b884e..83e2c1da 100644 --- a/nuparu/include/Eigen/CholmodSupport +++ b/nuparu/include/Eigen/CholmodSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLMODSUPPORT_MODULE_H #define EIGEN_CHOLMODSUPPORT_MODULE_H @@ -33,12 +40,8 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/CholmodSupport/CholmodSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_CHOLMODSUPPORT_MODULE_H diff --git a/nuparu/include/Eigen/Core b/nuparu/include/Eigen/Core index 9131cc3f..63602f4c 100644 --- a/nuparu/include/Eigen/Core +++ b/nuparu/include/Eigen/Core @@ -14,6 +14,48 @@ // first thing Eigen does: stop the compiler from committing suicide #include "src/Core/util/DisableStupidWarnings.h" +// Handle NVCC/CUDA +#ifdef __CUDACC__ + // Do not try asserts on CUDA! + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + // Do not try to vectorize on CUDA! + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif + + // All functions callable from CUDA code must be qualified with __device__ + #define EIGEN_DEVICE_FUNC __host__ __device__ + +#else + #define EIGEN_DEVICE_FUNC + +#endif + +#if defined(__CUDA_ARCH__) + #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; +#endif + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) + #define EIGEN_EXCEPTIONS +#endif + +#ifdef EIGEN_EXCEPTIONS + #include +#endif + // then include this file where all our macros are defined. It's really important to do it first because // it's where we do all the alignment settings (platform detection and honoring the user's will if he // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. @@ -21,7 +63,7 @@ // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. -#if defined(__MINGW32__) && EIGEN_GNUC_AT_LEAST(4,6) +#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) #pragma GCC optimize ("-fno-ipa-cp-clone") #endif @@ -31,26 +73,26 @@ // and inclusion of their respective header files #include "src/Core/util/MKL_support.h" -// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into -// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks -#if !EIGEN_ALIGN +// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into +// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks +#if EIGEN_MAX_ALIGN_BYTES==0 #ifndef EIGEN_DONT_VECTORIZE #define EIGEN_DONT_VECTORIZE #endif #endif -#ifdef _MSC_VER +#if EIGEN_COMP_MSVC #include // for _aligned_malloc -- need it regardless of whether vectorization is enabled - #if (_MSC_VER >= 1500) // 2008 or later + #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later // Remember that usage of defined() in a #define is undefined by the standard. // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. - #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64) + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER #endif #endif #else // Remember that usage of defined() in a #define is undefined by the standard - #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) ) + #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC #endif #endif @@ -82,6 +124,19 @@ #ifdef __SSE4_2__ #define EIGEN_VECTORIZE_SSE4_2 #endif + #ifdef __AVX__ + #define EIGEN_VECTORIZE_AVX + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX2__ + #define EIGEN_VECTORIZE_AVX2 + #endif + #ifdef __FMA__ + #define EIGEN_VECTORIZE_FMA + #endif // include files @@ -95,7 +150,7 @@ extern "C" { // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: - #ifdef __INTEL_COMPILER + #if EIGEN_COMP_ICC >= 1110 #include #else #include @@ -112,8 +167,20 @@ #ifdef EIGEN_VECTORIZE_SSE4_2 #include #endif + #ifdef EIGEN_VECTORIZE_AVX + #include + #endif #endif } // end extern "C" + #elif defined __VSX__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_ALTIVEC @@ -123,13 +190,18 @@ #undef bool #undef vector #undef pixel - #elif defined __ARM_NEON__ + #elif (defined __ARM_NEON) || (defined __ARM_NEON__) #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_NEON #include #endif #endif +#if defined __CUDACC__ + #define EIGEN_VECTORIZE_CUDA + #include +#endif + #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) #define EIGEN_HAS_OPENMP #endif @@ -139,7 +211,7 @@ #endif // MSVC for windows mobile does not have the errno.h file -#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION) +#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM #define EIGEN_HAS_ERRNO #endif @@ -165,23 +237,17 @@ #endif // required for __cpuid, needs to be included after cmath -#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) +#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE #include #endif -#if defined(_CPPUNWIND) || defined(__EXCEPTIONS) - #define EIGEN_EXCEPTIONS -#endif - -#ifdef EIGEN_EXCEPTIONS - #include -#endif - /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { inline static const char *SimdInstructionSetsInUse(void) { -#if defined(EIGEN_VECTORIZE_SSE4_2) +#if defined(EIGEN_VECTORIZE_AVX) + return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_2) return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; #elif defined(EIGEN_VECTORIZE_SSE4_1) return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; @@ -193,6 +259,8 @@ inline static const char *SimdInstructionSetsInUse(void) { return "SSE, SSE2"; #elif defined(EIGEN_VECTORIZE_ALTIVEC) return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; #elif defined(EIGEN_VECTORIZE_NEON) return "ARM NEON"; #else @@ -202,34 +270,9 @@ inline static const char *SimdInstructionSetsInUse(void) { } // end namespace Eigen -#define STAGE10_FULL_EIGEN2_API 10 -#define STAGE20_RESOLVE_API_CONFLICTS 20 -#define STAGE30_FULL_EIGEN3_API 30 -#define STAGE40_FULL_EIGEN3_STRICTNESS 40 -#define STAGE99_NO_EIGEN2_SUPPORT 99 - -#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS -#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API -#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS -#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API -#elif defined EIGEN2_SUPPORT - // default to stage 3, that's what it's always meant - #define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API - #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API -#else - #define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT -#endif - -#ifdef EIGEN2_SUPPORT -#undef minor +#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT +// This will generate an error message: +#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information #endif // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to @@ -249,45 +292,75 @@ using std::ptrdiff_t; */ #include "src/Core/util/Constants.h" -#include "src/Core/util/ForwardDeclarations.h" #include "src/Core/util/Meta.h" +#include "src/Core/util/ForwardDeclarations.h" #include "src/Core/util/StaticAssert.h" #include "src/Core/util/XprHelper.h" #include "src/Core/util/Memory.h" #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/SpecialFunctions.h" #include "src/Core/GenericPacketMath.h" -#if defined EIGEN_VECTORIZE_SSE +#if defined EIGEN_VECTORIZE_AVX + // Use AVX for floats and doubles, SSE for integers + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/AVX/PacketMath.h" + #include "src/Core/arch/AVX/MathFunctions.h" + #include "src/Core/arch/AVX/Complex.h" + #include "src/Core/arch/AVX/TypeCasting.h" +#elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/Complex.h" -#elif defined EIGEN_VECTORIZE_ALTIVEC + #include "src/Core/arch/SSE/TypeCasting.h" +#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/PacketMath.h" + #include "src/Core/arch/AltiVec/MathFunctions.h" #include "src/Core/arch/AltiVec/Complex.h" #elif defined EIGEN_VECTORIZE_NEON #include "src/Core/arch/NEON/PacketMath.h" + #include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/Complex.h" #endif +#if defined EIGEN_VECTORIZE_CUDA + #include "src/Core/arch/CUDA/PacketMath.h" + #include "src/Core/arch/CUDA/MathFunctions.h" +#endif + #include "src/Core/arch/Default/Settings.h" -#include "src/Core/Functors.h" +#include "src/Core/functors/BinaryFunctors.h" +#include "src/Core/functors/UnaryFunctors.h" +#include "src/Core/functors/NullaryFunctors.h" +#include "src/Core/functors/StlFunctors.h" +#include "src/Core/functors/AssignmentFunctors.h" + #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" #include "src/Core/MatrixBase.h" #include "src/Core/EigenBase.h" +#include "src/Core/Product.h" +#include "src/Core/CoreEvaluators.h" +#include "src/Core/AssignEvaluator.h" + #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 // at least confirmed with Doxygen 1.5.5 and 1.5.6 #include "src/Core/Assign.h" #endif +#include "src/Core/ArrayBase.h" #include "src/Core/util/BlasUtil.h" #include "src/Core/DenseStorage.h" #include "src/Core/NestByValue.h" -#include "src/Core/ForceAlignedAccess.h" + +// #include "src/Core/ForceAlignedAccess.h" + #include "src/Core/ReturnByValue.h" #include "src/Core/NoAlias.h" #include "src/Core/PlainObjectBase.h" @@ -300,32 +373,33 @@ using std::ptrdiff_t; #include "src/Core/SelfCwiseBinaryOp.h" #include "src/Core/Dot.h" #include "src/Core/StableNorm.h" -#include "src/Core/MapBase.h" #include "src/Core/Stride.h" +#include "src/Core/MapBase.h" #include "src/Core/Map.h" +#include "src/Core/Ref.h" #include "src/Core/Block.h" #include "src/Core/VectorBlock.h" -#include "src/Core/Ref.h" #include "src/Core/Transpose.h" #include "src/Core/DiagonalMatrix.h" #include "src/Core/Diagonal.h" #include "src/Core/DiagonalProduct.h" -#include "src/Core/PermutationMatrix.h" -#include "src/Core/Transpositions.h" #include "src/Core/Redux.h" #include "src/Core/Visitor.h" #include "src/Core/Fuzzy.h" #include "src/Core/IO.h" #include "src/Core/Swap.h" #include "src/Core/CommaInitializer.h" -#include "src/Core/Flagged.h" -#include "src/Core/ProductBase.h" #include "src/Core/GeneralProduct.h" +#include "src/Core/Solve.h" +#include "src/Core/Inverse.h" +#include "src/Core/SolverBase.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/Parallelizer.h" -#include "src/Core/products/CoeffBasedProduct.h" +#include "src/Core/ProductEvaluators.h" #include "src/Core/products/GeneralMatrixVector.h" #include "src/Core/products/GeneralMatrixMatrix.h" #include "src/Core/SolveTriangular.h" @@ -347,7 +421,6 @@ using std::ptrdiff_t; #include "src/Core/Random.h" #include "src/Core/Replicate.h" #include "src/Core/Reverse.h" -#include "src/Core/ArrayBase.h" #include "src/Core/ArrayWrapper.h" #ifdef EIGEN_USE_BLAS @@ -369,8 +442,4 @@ using std::ptrdiff_t; #include "src/Core/util/ReenableStupidWarnings.h" -#ifdef EIGEN2_SUPPORT -#include "Eigen2Support" -#endif - #endif // EIGEN_CORE_H diff --git a/nuparu/include/Eigen/Eigen b/nuparu/include/Eigen/Eigen index 19b40ea4..654c8dc6 100644 --- a/nuparu/include/Eigen/Eigen +++ b/nuparu/include/Eigen/Eigen @@ -1,2 +1,2 @@ #include "Dense" -//#include "Sparse" +#include "Sparse" diff --git a/nuparu/include/Eigen/Eigen2Support b/nuparu/include/Eigen/Eigen2Support deleted file mode 100644 index 36156d29..00000000 --- a/nuparu/include/Eigen/Eigen2Support +++ /dev/null @@ -1,82 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2SUPPORT_H -#define EIGEN2SUPPORT_H - -#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H)) -#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header -#endif - -#include "src/Core/util/DisableStupidWarnings.h" - -/** \ingroup Support_modules - * \defgroup Eigen2Support_Module Eigen2 support module - * This module provides a couple of deprecated functions improving the compatibility with Eigen2. - * - * To use it, define EIGEN2_SUPPORT before including any Eigen header - * \code - * #define EIGEN2_SUPPORT - * \endcode - * - */ - -#include "src/Eigen2Support/Macros.h" -#include "src/Eigen2Support/Memory.h" -#include "src/Eigen2Support/Meta.h" -#include "src/Eigen2Support/Lazy.h" -#include "src/Eigen2Support/Cwise.h" -#include "src/Eigen2Support/CwiseOperators.h" -#include "src/Eigen2Support/TriangularSolver.h" -#include "src/Eigen2Support/Block.h" -#include "src/Eigen2Support/VectorBlock.h" -#include "src/Eigen2Support/Minor.h" -#include "src/Eigen2Support/MathFunctions.h" - - -#include "src/Core/util/ReenableStupidWarnings.h" - -// Eigen2 used to include iostream -#include - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ -using Eigen::Matrix##SizeSuffix##TypeSuffix; \ -using Eigen::Vector##SizeSuffix##TypeSuffix; \ -using Eigen::RowVector##SizeSuffix##TypeSuffix; - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ - -#define EIGEN_USING_MATRIX_TYPEDEFS \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd) - -#define USING_PART_OF_NAMESPACE_EIGEN \ -EIGEN_USING_MATRIX_TYPEDEFS \ -using Eigen::Matrix; \ -using Eigen::MatrixBase; \ -using Eigen::ei_random; \ -using Eigen::ei_real; \ -using Eigen::ei_imag; \ -using Eigen::ei_conj; \ -using Eigen::ei_abs; \ -using Eigen::ei_abs2; \ -using Eigen::ei_sqrt; \ -using Eigen::ei_exp; \ -using Eigen::ei_log; \ -using Eigen::ei_sin; \ -using Eigen::ei_cos; - -#endif // EIGEN2SUPPORT_H diff --git a/nuparu/include/Eigen/Eigenvalues b/nuparu/include/Eigen/Eigenvalues index 53c5a73a..ea93eb30 100644 --- a/nuparu/include/Eigen/Eigenvalues +++ b/nuparu/include/Eigen/Eigenvalues @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_EIGENVALUES_MODULE_H #define EIGEN_EIGENVALUES_MODULE_H diff --git a/nuparu/include/Eigen/Geometry b/nuparu/include/Eigen/Geometry index efd9d450..06b736e3 100644 --- a/nuparu/include/Eigen/Geometry +++ b/nuparu/include/Eigen/Geometry @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_GEOMETRY_MODULE_H #define EIGEN_GEOMETRY_MODULE_H @@ -9,10 +16,6 @@ #include "LU" #include -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - /** \defgroup Geometry_Module Geometry module * * @@ -33,27 +36,23 @@ #include "src/Geometry/OrthoMethods.h" #include "src/Geometry/EulerAngles.h" -#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - #include "src/Geometry/Homogeneous.h" - #include "src/Geometry/RotationBase.h" - #include "src/Geometry/Rotation2D.h" - #include "src/Geometry/Quaternion.h" - #include "src/Geometry/AngleAxis.h" - #include "src/Geometry/Transform.h" - #include "src/Geometry/Translation.h" - #include "src/Geometry/Scaling.h" - #include "src/Geometry/Hyperplane.h" - #include "src/Geometry/ParametrizedLine.h" - #include "src/Geometry/AlignedBox.h" - #include "src/Geometry/Umeyama.h" - - #if defined EIGEN_VECTORIZE_SSE - #include "src/Geometry/arch/Geometry_SSE.h" - #endif -#endif - -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/Geometry/All.h" +#include "src/Geometry/Homogeneous.h" +#include "src/Geometry/RotationBase.h" +#include "src/Geometry/Rotation2D.h" +#include "src/Geometry/Quaternion.h" +#include "src/Geometry/AngleAxis.h" +#include "src/Geometry/Transform.h" +#include "src/Geometry/Translation.h" +#include "src/Geometry/Scaling.h" +#include "src/Geometry/Hyperplane.h" +#include "src/Geometry/ParametrizedLine.h" +#include "src/Geometry/AlignedBox.h" +#include "src/Geometry/Umeyama.h" + +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX +#include "src/Geometry/arch/Geometry_SSE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/Householder b/nuparu/include/Eigen/Householder index 6e348db5..89cd81b1 100644 --- a/nuparu/include/Eigen/Householder +++ b/nuparu/include/Eigen/Householder @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_HOUSEHOLDER_MODULE_H #define EIGEN_HOUSEHOLDER_MODULE_H diff --git a/nuparu/include/Eigen/IterativeLinearSolvers b/nuparu/include/Eigen/IterativeLinearSolvers index 0f4159dc..957d5750 100644 --- a/nuparu/include/Eigen/IterativeLinearSolvers +++ b/nuparu/include/Eigen/IterativeLinearSolvers @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H #define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H @@ -12,28 +19,29 @@ * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. * Those solvers are accessible via the following classes: * - ConjugateGradient for selfadjoint (hermitian) matrices, + * - LeastSquaresConjugateGradient for rectangular least-square problems, * - BiCGSTAB for general square matrices. * * These iterative solvers are associated with some preconditioners: * - IdentityPreconditioner - not really useful - * - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices. - * - IncompleteILUT - incomplete LU factorization with dual thresholding + * - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices. + * - IncompleteLUT - incomplete LU factorization with dual thresholding * * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. * - * \code - * #include - * \endcode + \code + #include + \endcode */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - +#include "src/IterativeLinearSolvers/SolveWithGuess.h" #include "src/IterativeLinearSolvers/IterativeSolverBase.h" #include "src/IterativeLinearSolvers/BasicPreconditioners.h" #include "src/IterativeLinearSolvers/ConjugateGradient.h" +#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" +#include "src/IterativeLinearSolvers/IncompleteCholesky.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/Jacobi b/nuparu/include/Eigen/Jacobi index ba8a4dc3..17c1d785 100644 --- a/nuparu/include/Eigen/Jacobi +++ b/nuparu/include/Eigen/Jacobi @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_JACOBI_MODULE_H #define EIGEN_JACOBI_MODULE_H diff --git a/nuparu/include/Eigen/LU b/nuparu/include/Eigen/LU index db579550..2d70c92d 100644 --- a/nuparu/include/Eigen/LU +++ b/nuparu/include/Eigen/LU @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_LU_MODULE_H #define EIGEN_LU_MODULE_H @@ -16,7 +23,6 @@ * \endcode */ -#include "src/misc/Solve.h" #include "src/misc/Kernel.h" #include "src/misc/Image.h" #include "src/LU/FullPivLU.h" @@ -25,16 +31,14 @@ #include "src/LU/PartialPivLU_MKL.h" #endif #include "src/LU/Determinant.h" -#include "src/LU/Inverse.h" +#include "src/LU/InverseImpl.h" -#if defined EIGEN_VECTORIZE_SSE +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX #include "src/LU/arch/Inverse_SSE.h" #endif -#ifdef EIGEN2_SUPPORT - #include "src/Eigen2Support/LU.h" -#endif - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_LU_MODULE_H diff --git a/nuparu/include/Eigen/LeastSquares b/nuparu/include/Eigen/LeastSquares deleted file mode 100644 index 35137c25..00000000 --- a/nuparu/include/Eigen/LeastSquares +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef EIGEN_REGRESSION_MODULE_H -#define EIGEN_REGRESSION_MODULE_H - -#ifndef EIGEN2_SUPPORT -#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT) -#endif - -// exclude from normal eigen3-only documentation -#ifdef EIGEN2_SUPPORT - -#include "Core" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include "Eigenvalues" -#include "Geometry" - -/** \defgroup LeastSquares_Module LeastSquares module - * This module provides linear regression and related features. - * - * \code - * #include - * \endcode - */ - -#include "src/Eigen2Support/LeastSquares.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN2_SUPPORT - -#endif // EIGEN_REGRESSION_MODULE_H diff --git a/nuparu/include/Eigen/MetisSupport b/nuparu/include/Eigen/MetisSupport index 6a113f7a..85c41bf3 100644 --- a/nuparu/include/Eigen/MetisSupport +++ b/nuparu/include/Eigen/MetisSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_METISSUPPORT_MODULE_H #define EIGEN_METISSUPPORT_MODULE_H diff --git a/nuparu/include/Eigen/OrderingMethods b/nuparu/include/Eigen/OrderingMethods index 7c0f1fff..d8ea3619 100644 --- a/nuparu/include/Eigen/OrderingMethods +++ b/nuparu/include/Eigen/OrderingMethods @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ORDERINGMETHODS_MODULE_H #define EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/nuparu/include/Eigen/PaStiXSupport b/nuparu/include/Eigen/PaStiXSupport index 7c616ee5..3411dfac 100644 --- a/nuparu/include/Eigen/PaStiXSupport +++ b/nuparu/include/Eigen/PaStiXSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PASTIXSUPPORT_MODULE_H #define EIGEN_PASTIXSUPPORT_MODULE_H @@ -35,12 +42,8 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/PaStiXSupport/PaStiXSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/nuparu/include/Eigen/PardisoSupport b/nuparu/include/Eigen/PardisoSupport index 99330ce7..340edf51 100644 --- a/nuparu/include/Eigen/PardisoSupport +++ b/nuparu/include/Eigen/PardisoSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PARDISOSUPPORT_MODULE_H #define EIGEN_PARDISOSUPPORT_MODULE_H @@ -7,8 +14,6 @@ #include -#include - /** \ingroup Support_modules * \defgroup PardisoSupport_Module PardisoSupport module * diff --git a/nuparu/include/Eigen/QR b/nuparu/include/Eigen/QR index ac5b0269..f74f365f 100644 --- a/nuparu/include/Eigen/QR +++ b/nuparu/include/Eigen/QR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_QR_MODULE_H #define EIGEN_QR_MODULE_H @@ -15,14 +22,15 @@ * * This module provides various QR decompositions * This module also provides some MatrixBase methods, including: - * - MatrixBase::qr(), + * - MatrixBase::householderQr() + * - MatrixBase::colPivHouseholderQr() + * - MatrixBase::fullPivHouseholderQr() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" #include "src/QR/HouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h" @@ -31,15 +39,7 @@ #include "src/QR/ColPivHouseholderQR_MKL.h" #endif -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/QR.h" -#endif - #include "src/Core/util/ReenableStupidWarnings.h" -#ifdef EIGEN2_SUPPORT -#include "Eigenvalues" -#endif - #endif // EIGEN_QR_MODULE_H /* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/nuparu/include/Eigen/QtAlignedMalloc b/nuparu/include/Eigen/QtAlignedMalloc index 46f7d83b..4044d5ac 100644 --- a/nuparu/include/Eigen/QtAlignedMalloc +++ b/nuparu/include/Eigen/QtAlignedMalloc @@ -1,3 +1,9 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_QTMALLOC_MODULE_H #define EIGEN_QTMALLOC_MODULE_H diff --git a/nuparu/include/Eigen/SPQRSupport b/nuparu/include/Eigen/SPQRSupport index 77016442..f9489dcd 100644 --- a/nuparu/include/Eigen/SPQRSupport +++ b/nuparu/include/Eigen/SPQRSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPQRSUPPORT_MODULE_H #define EIGEN_SPQRSUPPORT_MODULE_H @@ -21,8 +28,6 @@ * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" #include "src/CholmodSupport/CholmodSupport.h" #include "src/SPQRSupport/SuiteSparseQRSupport.h" diff --git a/nuparu/include/Eigen/SVD b/nuparu/include/Eigen/SVD index fd310017..b353f3f5 100644 --- a/nuparu/include/Eigen/SVD +++ b/nuparu/include/Eigen/SVD @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SVD_MODULE_H #define EIGEN_SVD_MODULE_H @@ -12,24 +19,25 @@ * * * This module provides SVD decomposition for matrices (both real and complex). - * This decomposition is accessible via the following MatrixBase method: + * Two decomposition algorithms are provided: + * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones. + * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems. + * These decompositions are accessible via the respective classes and following MatrixBase methods: * - MatrixBase::jacobiSvd() + * - MatrixBase::bdcSvd() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" +#include "src/SVD/UpperBidiagonalization.h" +#include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" +#include "src/SVD/BDCSVD.h" #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) #include "src/SVD/JacobiSVD_MKL.h" #endif -#include "src/SVD/UpperBidiagonalization.h" - -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/SVD.h" -#endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/Sparse b/nuparu/include/Eigen/Sparse index 7cc9c091..a2ef7a66 100644 --- a/nuparu/include/Eigen/Sparse +++ b/nuparu/include/Eigen/Sparse @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSE_MODULE_H #define EIGEN_SPARSE_MODULE_H @@ -11,9 +18,9 @@ * - \ref SparseQR_Module * - \ref IterativeLinearSolvers_Module * - * \code - * #include - * \endcode + \code + #include + \endcode */ #include "SparseCore" diff --git a/nuparu/include/Eigen/SparseCholesky b/nuparu/include/Eigen/SparseCholesky index 9f5056aa..b6a320c4 100644 --- a/nuparu/include/Eigen/SparseCholesky +++ b/nuparu/include/Eigen/SparseCholesky @@ -34,8 +34,6 @@ #error The SparseCholesky module has nothing to offer in MPL2 only mode #endif -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" #include "src/SparseCholesky/SimplicialCholesky.h" #ifndef EIGEN_MPL2_ONLY diff --git a/nuparu/include/Eigen/SparseCore b/nuparu/include/Eigen/SparseCore index 9b5be5e1..76966c4c 100644 --- a/nuparu/include/Eigen/SparseCore +++ b/nuparu/include/Eigen/SparseCore @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSECORE_MODULE_H #define EIGEN_SPARSECORE_MODULE_H @@ -14,7 +21,7 @@ /** * \defgroup SparseCore_Module SparseCore module * - * This module provides a sparse matrix representation, and basic associatd matrix manipulations + * This module provides a sparse matrix representation, and basic associated matrix manipulations * and operations. * * See the \ref TutorialSparse "Sparse tutorial" @@ -26,37 +33,35 @@ * This module depends on: Core. */ -namespace Eigen { - -/** The type used to identify a general sparse storage. */ -struct Sparse {}; - -} - #include "src/SparseCore/SparseUtil.h" #include "src/SparseCore/SparseMatrixBase.h" +#include "src/SparseCore/SparseAssign.h" #include "src/SparseCore/CompressedStorage.h" #include "src/SparseCore/AmbiVector.h" +#include "src/SparseCore/SparseCompressedBase.h" #include "src/SparseCore/SparseMatrix.h" +#include "src/SparseCore/SparseMap.h" #include "src/SparseCore/MappedSparseMatrix.h" #include "src/SparseCore/SparseVector.h" -#include "src/SparseCore/SparseBlock.h" -#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseRef.h" #include "src/SparseCore/SparseCwiseUnaryOp.h" #include "src/SparseCore/SparseCwiseBinaryOp.h" +#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseBlock.h" #include "src/SparseCore/SparseDot.h" -#include "src/SparseCore/SparsePermutation.h" #include "src/SparseCore/SparseRedux.h" -#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparseDiagonalProduct.h" #include "src/SparseCore/ConservativeSparseSparseProduct.h" #include "src/SparseCore/SparseSparseProductWithPruning.h" #include "src/SparseCore/SparseProduct.h" #include "src/SparseCore/SparseDenseProduct.h" -#include "src/SparseCore/SparseDiagonalProduct.h" -#include "src/SparseCore/SparseTriangularView.h" #include "src/SparseCore/SparseSelfAdjointView.h" +#include "src/SparseCore/SparseTriangularView.h" #include "src/SparseCore/TriangularSolver.h" -#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparsePermutation.h" +#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseSolverBase.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/SparseLU b/nuparu/include/Eigen/SparseLU index 8527a49b..38b38b53 100644 --- a/nuparu/include/Eigen/SparseLU +++ b/nuparu/include/Eigen/SparseLU @@ -20,9 +20,6 @@ * Please, see the documentation of the SparseLU class for more details. */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - // Ordering interface #include "OrderingMethods" diff --git a/nuparu/include/Eigen/SparseQR b/nuparu/include/Eigen/SparseQR index 4ee42065..a6f3b7f7 100644 --- a/nuparu/include/Eigen/SparseQR +++ b/nuparu/include/Eigen/SparseQR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSEQR_MODULE_H #define EIGEN_SPARSEQR_MODULE_H @@ -21,9 +28,6 @@ * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "OrderingMethods" #include "src/SparseCore/SparseColEtree.h" #include "src/SparseQR/SparseQR.h" diff --git a/nuparu/include/Eigen/StdDeque b/nuparu/include/Eigen/StdDeque index f2723477..be3a7f82 100644 --- a/nuparu/include/Eigen/StdDeque +++ b/nuparu/include/Eigen/StdDeque @@ -14,7 +14,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) diff --git a/nuparu/include/Eigen/StdList b/nuparu/include/Eigen/StdList index 225c1e18..07ba1297 100644 --- a/nuparu/include/Eigen/StdList +++ b/nuparu/include/Eigen/StdList @@ -13,7 +13,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) diff --git a/nuparu/include/Eigen/StdVector b/nuparu/include/Eigen/StdVector index 6b22627f..fdfc3776 100644 --- a/nuparu/include/Eigen/StdVector +++ b/nuparu/include/Eigen/StdVector @@ -14,7 +14,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) diff --git a/nuparu/include/Eigen/SuperLUSupport b/nuparu/include/Eigen/SuperLUSupport index 575e14fb..113f58ee 100644 --- a/nuparu/include/Eigen/SuperLUSupport +++ b/nuparu/include/Eigen/SuperLUSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SUPERLUSUPPORT_MODULE_H #define EIGEN_SUPERLUSUPPORT_MODULE_H @@ -36,6 +43,8 @@ namespace Eigen { struct SluMatrix; } * - class SuperLU: a supernodal sequential LU factorization. * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * + * \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * * \code @@ -48,12 +57,8 @@ namespace Eigen { struct SluMatrix; } * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/SuperLUSupport/SuperLUSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/nuparu/include/Eigen/UmfPackSupport b/nuparu/include/Eigen/UmfPackSupport index 984f64a8..4a9f46a1 100644 --- a/nuparu/include/Eigen/UmfPackSupport +++ b/nuparu/include/Eigen/UmfPackSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_UMFPACKSUPPORT_MODULE_H #define EIGEN_UMFPACKSUPPORT_MODULE_H @@ -26,9 +33,6 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/UmfPackSupport/UmfPackSupport.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/src/Cholesky/LDLT.h b/nuparu/include/Eigen/src/Cholesky/LDLT.h index d19cb396..6fcae01f 100644 --- a/nuparu/include/Eigen/src/Cholesky/LDLT.h +++ b/nuparu/include/Eigen/src/Cholesky/LDLT.h @@ -16,7 +16,10 @@ namespace Eigen { namespace internal { -template struct LDLT_Traits; + template struct LDLT_Traits; + + // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef + enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite }; } /** \ingroup Cholesky_Module @@ -40,7 +43,7 @@ template struct LDLT_Traits; * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky * decomposition to determine whether a system of equations has a solution. * - * \sa MatrixBase::ldlt(), class LLT + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT */ template class LDLT { @@ -56,7 +59,8 @@ template class LDLT }; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; - typedef typename MatrixType::Index Index; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; typedef Matrix TmpMatrixType; typedef Transpositions TranspositionType; @@ -69,7 +73,12 @@ template class LDLT * The default constructor is useful in cases in which the user intends to * perform decompositions via LDLT::compute(const MatrixType&). */ - LDLT() : m_matrix(), m_transpositions(), m_isInitialized(false) {} + LDLT() + : m_matrix(), + m_transpositions(), + m_sign(internal::ZeroSign), + m_isInitialized(false) + {} /** \brief Default Constructor with memory preallocation * @@ -77,10 +86,11 @@ template class LDLT * according to the specified problem \a size. * \sa LDLT() */ - LDLT(Index size) + explicit LDLT(Index size) : m_matrix(size, size), m_transpositions(size), m_temporary(size), + m_sign(internal::ZeroSign), m_isInitialized(false) {} @@ -89,13 +99,15 @@ template class LDLT * This calculates the decomposition for the input \a matrix. * \sa LDLT(Index size) */ - LDLT(const MatrixType& matrix) + template + explicit LDLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_transpositions(matrix.rows()), m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), m_isInitialized(false) { - compute(matrix); + compute(matrix.derived()); } /** Clear any existing decomposition @@ -139,21 +151,14 @@ template class LDLT inline bool isPositive() const { eigen_assert(m_isInitialized && "LDLT is not initialized."); - return m_sign == 1; - } - - #ifdef EIGEN2_SUPPORT - inline bool isPositiveDefinite() const - { - return isPositive(); + return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign; } - #endif /** \returns true if the matrix is negative (semidefinite) */ inline bool isNegative(void) const { eigen_assert(m_isInitialized && "LDLT is not initialized."); - return m_sign == -1; + return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign; } /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A. @@ -169,31 +174,23 @@ template class LDLT * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular. * - * \sa MatrixBase::ldlt() + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() */ template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LDLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); } - #ifdef EIGEN2_SUPPORT - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = this->solve(b); - return true; - } - #endif - template bool solveInPlace(MatrixBase &bAndX) const; - LDLT& compute(const MatrixType& matrix); + template + LDLT& compute(const EigenBase& matrix); template LDLT& rankUpdate(const MatrixBase& w, const RealScalar& alpha=1); @@ -223,8 +220,19 @@ template class LDLT eigen_assert(m_isInitialized && "LDLT is not initialized."); return Success; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } /** \internal * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. @@ -235,7 +243,7 @@ template class LDLT MatrixType m_matrix; TranspositionType m_transpositions; TmpMatrixType m_temporary; - int m_sign; + internal::SignMatrix m_sign; bool m_isInitialized; }; @@ -246,49 +254,32 @@ template struct ldlt_inplace; template<> struct ldlt_inplace { template - static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0) + static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) { using std::abs; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; + typedef typename TranspositionType::StorageIndex IndexType; eigen_assert(mat.rows()==mat.cols()); const Index size = mat.rows(); if (size <= 1) { transpositions.setIdentity(); - if(sign) - *sign = numext::real(mat.coeff(0,0))>0 ? 1:-1; + if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef; + else sign = ZeroSign; return true; } - RealScalar cutoff(0), biggest_in_corner; - for (Index k = 0; k < size; ++k) { // Find largest diagonal element Index index_of_biggest_in_corner; - biggest_in_corner = mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); + mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); index_of_biggest_in_corner += k; - if(k == 0) - { - // The biggest overall is the point of reference to which further diagonals - // are compared; if any diagonal is negligible compared - // to the largest overall, the algorithm bails. - cutoff = abs(NumTraits::epsilon() * biggest_in_corner); - } - - // Finish early if the matrix is not full rank. - if(biggest_in_corner < cutoff) - { - for(Index i = k; i < size; i++) transpositions.coeffRef(i) = i; - if(sign) *sign = 0; - break; - } - - transpositions.coeffRef(k) = index_of_biggest_in_corner; + transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner); if(k != index_of_biggest_in_corner) { // apply the transposition while taking care to consider only @@ -297,7 +288,7 @@ template<> struct ldlt_inplace mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k)); mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s)); std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner)); - for(int i=k+1;i struct ldlt_inplace if(k>0) { - temp.head(k) = mat.diagonal().head(k).asDiagonal() * A10.adjoint(); + temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint(); mat.coeffRef(k,k) -= (A10 * temp.head(k)).value(); if(rs>0) A21.noalias() -= A20 * temp.head(k); } - if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff)) - A21 /= mat.coeffRef(k,k); - if(sign) - { - // LDLT is not guaranteed to work for indefinite matrices, but let's try to get the sign right - int newSign = numext::real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0; - if(k == 0) - *sign = newSign; - else if(*sign != newSign) - *sign = 0; + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot + // was smaller than the cutoff value. However, since LDLT is not rank-revealing + // we should only make sure that we do not introduce INF or NaN values. + // Remark that LAPACK also uses 0 as the cutoff value. + RealScalar realAkk = numext::real(mat.coeffRef(k,k)); + if((rs>0) && (abs(realAkk) > RealScalar(0))) + A21 /= realAkk; + + if (sign == PositiveSemiDef) { + if (realAkk < 0) sign = Indefinite; + } else if (sign == NegativeSemiDef) { + if (realAkk > 0) sign = Indefinite; + } else if (sign == ZeroSign) { + if (realAkk > 0) sign = PositiveSemiDef; + else if (realAkk < 0) sign = NegativeSemiDef; } } @@ -353,7 +349,6 @@ template<> struct ldlt_inplace using numext::isfinite; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; const Index size = mat.rows(); eigen_assert(mat.cols() == size && w.size()==size); @@ -399,7 +394,7 @@ template<> struct ldlt_inplace template<> struct ldlt_inplace { template - static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0) + static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) { Transpose matt(mat); return ldlt_inplace::unblocked(matt, transpositions, temp, sign); @@ -417,16 +412,16 @@ template struct LDLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } }; template struct LDLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } }; } // end namespace internal @@ -434,18 +429,22 @@ template struct LDLT_Traits /** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix */ template -LDLT& LDLT::compute(const MatrixType& a) +template +LDLT& LDLT::compute(const EigenBase& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); - m_matrix = a; + m_matrix = a.derived(); m_transpositions.resize(size); m_isInitialized = false; m_temporary.resize(size); + m_sign = internal::ZeroSign; - internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign); + internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign); m_isInitialized = true; return *this; @@ -458,8 +457,9 @@ LDLT& LDLT::compute(const MatrixType& a) */ template template -LDLT& LDLT::rankUpdate(const MatrixBase& w, const typename NumTraits::Real& sigma) +LDLT& LDLT::rankUpdate(const MatrixBase& w, const typename LDLT::RealScalar& sigma) { + typedef typename TranspositionType::StorageIndex IndexType; const Index size = w.rows(); if (m_isInitialized) { @@ -471,9 +471,9 @@ LDLT& LDLT::rankUpdate(const MatrixBase=0 ? 1 : -1; + m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef; m_isInitialized = true; } @@ -482,48 +482,45 @@ LDLT& LDLT::rankUpdate(const MatrixBase -struct solve_retval, Rhs> - : solve_retval_base, Rhs> +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LDLT<_MatrixType,_UpLo> LDLTType; - EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs) - - template void evalTo(Dest& dst) const + eigen_assert(rhs.rows() == rows()); + // dst = P b + dst = m_transpositions * rhs; + + // dst = L^-1 (P b) + matrixL().solveInPlace(dst); + + // dst = D^-1 (L^-1 P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + const typename Diagonal::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and leads to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits::highest(); + + for (Index i = 0; i < vecD.size(); ++i) { - eigen_assert(rhs().rows() == dec().matrixLDLT().rows()); - // dst = P b - dst = dec().transpositionsP() * rhs(); - - // dst = L^-1 (P b) - dec().matrixL().solveInPlace(dst); - - // dst = D^-1 (L^-1 P b) - // more precisely, use pseudo-inverse of D (see bug 241) - using std::abs; - using std::max; - typedef typename LDLTType::MatrixType MatrixType; - typedef typename LDLTType::Scalar Scalar; - typedef typename LDLTType::RealScalar RealScalar; - const Diagonal vectorD = dec().vectorD(); - RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits::epsilon(), - RealScalar(1) / NumTraits::highest()); // motivated by LAPACK's xGELSS - for (Index i = 0; i < vectorD.size(); ++i) { - if(abs(vectorD(i)) > tolerance) - dst.row(i) /= vectorD(i); - else - dst.row(i).setZero(); - } + if(abs(vecD(i)) > tolerance) + dst.row(i) /= vecD(i); + else + dst.row(i).setZero(); + } - // dst = L^-T (D^-1 L^-1 P b) - dec().matrixU().solveInPlace(dst); + // dst = L^-T (D^-1 L^-1 P b) + matrixU().solveInPlace(dst); - // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b - dst = dec().transpositionsP().transpose() * dst; - } -}; + // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b + dst = m_transpositions.transpose() * dst; } +#endif /** \internal use x = ldlt_object.solve(x); * @@ -566,7 +563,7 @@ MatrixType LDLT::reconstructedMatrix() const // L^* P res = matrixU() * res; // D(L^*P) - res = vectorD().asDiagonal() * res; + res = vectorD().real().asDiagonal() * res; // L(DL^*P) res = matrixL() * res; // P^T (LDL^*P) @@ -575,8 +572,10 @@ MatrixType LDLT::reconstructedMatrix() const return res; } +#ifndef __CUDACC__ /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa MatrixBase::ldlt() */ template inline const LDLT::PlainObject, UpLo> @@ -587,6 +586,7 @@ SelfAdjointView::ldlt() const /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa SelfAdjointView::ldlt() */ template inline const LDLT::PlainObject> @@ -594,6 +594,7 @@ MatrixBase::ldlt() const { return LDLT(derived()); } +#endif // __CUDACC__ } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Cholesky/LLT.h b/nuparu/include/Eigen/src/Cholesky/LLT.h index 2e6189f7..1f0091f3 100644 --- a/nuparu/include/Eigen/src/Cholesky/LLT.h +++ b/nuparu/include/Eigen/src/Cholesky/LLT.h @@ -41,7 +41,7 @@ template struct LLT_Traits; * Example: \include LLT_example.cpp * Output: \verbinclude LLT_example.out * - * \sa MatrixBase::llt(), class LDLT + * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT */ /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) * Note that during the decomposition, only the upper triangular part of A is considered. Therefore, @@ -59,7 +59,8 @@ template class LLT }; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; - typedef typename MatrixType::Index Index; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; enum { PacketSize = internal::packet_traits::size, @@ -83,14 +84,15 @@ template class LLT * according to the specified problem \a size. * \sa LLT() */ - LLT(Index size) : m_matrix(size, size), + explicit LLT(Index size) : m_matrix(size, size), m_isInitialized(false) {} - LLT(const MatrixType& matrix) + template + explicit LLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_isInitialized(false) { - compute(matrix); + compute(matrix.derived()); } /** \returns a view of the upper triangular matrix U */ @@ -115,33 +117,23 @@ template class LLT * Example: \include LLT_solve.cpp * Output: \verbinclude LLT_solve.out * - * \sa solveInPlace(), MatrixBase::llt() + * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() */ template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); } - #ifdef EIGEN2_SUPPORT - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = this->solve(b); - return true; - } - - bool isPositiveDefinite() const { return true; } - #endif - template void solveInPlace(MatrixBase &bAndX) const; - LLT& compute(const MatrixType& matrix); + template + LLT& compute(const EigenBase& matrix); /** \returns the LLT decomposition matrix * @@ -172,8 +164,20 @@ template class LLT template LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + /** \internal * Used to compute and store L * The strict upper part is not used and even not initialized. @@ -188,12 +192,11 @@ namespace internal { template struct llt_inplace; template -static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) +static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) { using std::sqrt; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; typedef typename MatrixType::ColXpr ColXpr; typedef typename internal::remove_all::type ColXprCleaned; typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; @@ -262,10 +265,9 @@ template struct llt_inplace { typedef typename NumTraits::Real RealScalar; template - static typename MatrixType::Index unblocked(MatrixType& mat) + static Index unblocked(MatrixType& mat) { using std::sqrt; - typedef typename MatrixType::Index Index; eigen_assert(mat.rows()==mat.cols()); const Index size = mat.rows(); @@ -283,15 +285,14 @@ template struct llt_inplace return k; mat.coeffRef(k,k) = x = sqrt(x); if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint(); - if (rs>0) A21 *= RealScalar(1)/x; + if (rs>0) A21 /= x; } return -1; } template - static typename MatrixType::Index blocked(MatrixType& m) + static Index blocked(MatrixType& m) { - typedef typename MatrixType::Index Index; eigen_assert(m.rows()==m.cols()); Index size = m.rows(); if(size<32) @@ -322,7 +323,7 @@ template struct llt_inplace } template - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } @@ -333,19 +334,19 @@ template struct llt_inplace typedef typename NumTraits::Real RealScalar; template - static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat) + static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat) { Transpose matt(mat); return llt_inplace::unblocked(matt); } template - static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat) + static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat) { Transpose matt(mat); return llt_inplace::blocked(matt); } template - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) { Transpose matt(mat); return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); @@ -356,8 +357,8 @@ template struct LLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace::blocked(m)==-1; } }; @@ -366,8 +367,8 @@ template struct LLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace::blocked(m)==-1; } }; @@ -382,12 +383,15 @@ template struct LLT_Traits * Output: \verbinclude TutorialLinAlgComputeTwice.out */ template -LLT& LLT::compute(const MatrixType& a) +template +LLT& LLT::compute(const EigenBase& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); m_matrix.resize(size, size); - m_matrix = a; + m_matrix = a.derived(); m_isInitialized = true; bool ok = Traits::inplace_decomposition(m_matrix); @@ -415,22 +419,16 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c return *this; } - -namespace internal { -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LLT<_MatrixType,UpLo> LLTType; - EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs) - - template void evalTo(Dest& dst) const - { - dst = rhs(); - dec().solveInPlace(dst); - } -}; + dst = rhs; + solveInPlace(dst); } +#endif /** \internal use x = llt_object.solve(x); * @@ -465,8 +463,10 @@ MatrixType LLT::reconstructedMatrix() const return matrixL() * matrixL().adjoint().toDenseMatrix(); } +#ifndef __CUDACC__ /** \cholesky_module * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() */ template inline const LLT::PlainObject> @@ -477,6 +477,7 @@ MatrixBase::llt() const /** \cholesky_module * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() */ template inline const LLT::PlainObject, UpLo> @@ -484,7 +485,8 @@ SelfAdjointView::llt() const { return LLT(m_matrix); } - +#endif // __CUDACC__ + } // end namespace Eigen #endif // EIGEN_LLT_H diff --git a/nuparu/include/Eigen/src/Cholesky/LLT_MKL.h b/nuparu/include/Eigen/src/Cholesky/LLT_MKL.h index 64daa445..0d42cb5b 100644 --- a/nuparu/include/Eigen/src/Cholesky/LLT_MKL.h +++ b/nuparu/include/Eigen/src/Cholesky/LLT_MKL.h @@ -46,7 +46,7 @@ template struct mkl_llt; template<> struct mkl_llt \ { \ template \ - static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \ + static inline Index potrf(MatrixType& m, char uplo) \ { \ lapack_int matrix_order; \ lapack_int size, lda, info, StorageOrder; \ @@ -60,30 +60,30 @@ template<> struct mkl_llt \ lda = m.outerStride(); \ \ info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ - info = (info==0) ? Success : NumericalIssue; \ + info = (info==0) ? -1 : info>0 ? info-1 : size; \ return info; \ } \ }; \ template<> struct llt_inplace \ { \ template \ - static typename MatrixType::Index blocked(MatrixType& m) \ + static Index blocked(MatrixType& m) \ { \ return mkl_llt::potrf(m, 'L'); \ } \ template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \ }; \ template<> struct llt_inplace \ { \ template \ - static typename MatrixType::Index blocked(MatrixType& m) \ + static Index blocked(MatrixType& m) \ { \ return mkl_llt::potrf(m, 'U'); \ } \ template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ { \ Transpose matt(mat); \ return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); \ diff --git a/nuparu/include/Eigen/src/CholmodSupport/CholmodSupport.h b/nuparu/include/Eigen/src/CholmodSupport/CholmodSupport.h index 783324b0..06421d5e 100644 --- a/nuparu/include/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/nuparu/include/Eigen/src/CholmodSupport/CholmodSupport.h @@ -48,8 +48,8 @@ void cholmod_configure_matrix(CholmodType& mat) /** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. * Note that the data are shared. */ -template -cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) +template +cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat) { cholmod_sparse res; res.nzmax = mat.nonZeros(); @@ -58,10 +58,12 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) res.p = mat.outerIndexPtr(); res.i = mat.innerIndexPtr(); res.x = mat.valuePtr(); + res.z = 0; res.sorted = 1; if(mat.isCompressed()) { res.packed = 1; + res.nz = 0; } else { @@ -72,11 +74,11 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) res.dtype = 0; res.stype = -1; - if (internal::is_same<_Index,int>::value) + if (internal::is_same<_StorageIndex,int>::value) { res.itype = CHOLMOD_INT; } - else if (internal::is_same<_Index,UF_long>::value) + else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value) { res.itype = CHOLMOD_LONG; } @@ -103,7 +105,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. * The data are not copied but shared. */ template -cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) { cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived()); @@ -136,12 +138,12 @@ cholmod_dense viewAsCholmod(MatrixBase& mat) /** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. * The data are not copied but shared. */ -template -MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) +template +MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) { - return MappedSparseMatrix - (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], - static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); + return MappedSparseMatrix + (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], + static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); } enum CholmodMode { @@ -155,26 +157,35 @@ enum CholmodMode { * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT */ template -class CholmodBase : internal::noncopyable +class CholmodBase : public SparseSolverBase { + protected: + typedef SparseSolverBase Base; + using Base::derived; + using Base::m_isInitialized; public: typedef _MatrixType MatrixType; enum { UpLo = _UpLo }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; typedef MatrixType CholMatrixType; - typedef typename MatrixType::Index Index; + typedef typename MatrixType::StorageIndex StorageIndex; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: CholmodBase() - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + : m_cholmodFactor(0), m_info(Success) { + m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); } - CholmodBase(const MatrixType& matrix) - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + explicit CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success) { m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); @@ -188,11 +199,8 @@ class CholmodBase : internal::noncopyable cholmod_finish(&m_cholmod); } - inline Index cols() const { return m_cholmodFactor->n; } - inline Index rows() const { return m_cholmodFactor->n; } - - Derived& derived() { return *static_cast(this); } - const Derived& derived() const { return *static_cast(this); } + inline StorageIndex cols() const { return internal::convert_index(m_cholmodFactor->n); } + inline StorageIndex rows() const { return internal::convert_index(m_cholmodFactor->n); } /** \brief Reports whether previous computation was successful. * @@ -213,35 +221,7 @@ class CholmodBase : internal::noncopyable return derived(); } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. + /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. * * This function is particularly useful when solving for several problems having the same structure. * @@ -265,7 +245,7 @@ class CholmodBase : internal::noncopyable /** Performs a numeric decomposition of \a matrix * - * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. + * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed. * * \sa analyzePattern() */ @@ -287,7 +267,7 @@ class CholmodBase : internal::noncopyable #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template - void _solve(const MatrixBase &b, MatrixBase &dest) const + void _solve_impl(const MatrixBase &b, MatrixBase &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -301,15 +281,16 @@ class CholmodBase : internal::noncopyable if(!x_cd) { this->m_info = NumericalIssue; + return; } - // TODO optimize this copy by swapping when possible (be carreful with alignment, etc.) + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); cholmod_free_dense(&x_cd, &m_cholmod); } /** \internal */ template - void _solve(const SparseMatrix &b, SparseMatrix &dest) const + void _solve_impl(const SparseMatrix &b, SparseMatrix &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -322,8 +303,9 @@ class CholmodBase : internal::noncopyable if(!x_cs) { this->m_info = NumericalIssue; + return; } - // TODO optimize this copy by swapping when possible (be carreful with alignment, etc.) + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) dest = viewAsEigen(*x_cs); cholmod_free_sparse(&x_cs, &m_cholmod); } @@ -354,7 +336,6 @@ class CholmodBase : internal::noncopyable cholmod_factor* m_cholmodFactor; RealScalar m_shiftOffset[2]; mutable ComputationInfo m_info; - bool m_isInitialized; int m_factorizationIsOk; int m_analysisIsOk; }; @@ -365,14 +346,16 @@ class CholmodBase : internal::noncopyable * * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization * using the Cholmod library. - * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Thefore, it has little practical interest. - * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices + * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices * X and B can be either dense or sparse. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT @@ -392,7 +375,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl CholmodSimplicialLLT(const MatrixType& matrix) : Base() { init(); - compute(matrix); + this->compute(matrix); } ~CholmodSimplicialLLT() {} @@ -412,14 +395,16 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl * * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization * using the Cholmod library. - * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Thefore, it has little practical interest. - * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices + * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices * X and B can be either dense or sparse. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT @@ -439,7 +424,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp CholmodSimplicialLDLT(const MatrixType& matrix) : Base() { init(); - compute(matrix); + this->compute(matrix); } ~CholmodSimplicialLDLT() {} @@ -458,13 +443,15 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization * using the Cholmod library. * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM. - * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices * X and B can be either dense or sparse. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers @@ -484,7 +471,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper CholmodSupernodalLLT(const MatrixType& matrix) : Base() { init(); - compute(matrix); + this->compute(matrix); } ~CholmodSupernodalLLT() {} @@ -501,7 +488,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper * \brief A general Cholesky factorization and solver based on Cholmod * * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization - * using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices + * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices * X and B can be either dense or sparse. * * This variant permits to change the underlying Cholesky method at runtime. @@ -512,6 +499,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers @@ -531,7 +520,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom CholmodDecomposition(const MatrixType& matrix) : Base() { init(); - compute(matrix); + this->compute(matrix); } ~CholmodDecomposition() {} @@ -569,36 +558,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom } }; -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_CHOLMODSUPPORT_H diff --git a/nuparu/include/Eigen/src/Core/Array.h b/nuparu/include/Eigen/src/Core/Array.h index 497efff6..e38eda72 100644 --- a/nuparu/include/Eigen/src/Core/Array.h +++ b/nuparu/include/Eigen/src/Core/Array.h @@ -24,6 +24,9 @@ namespace Eigen { * API for the %Matrix class provides easy access to linear-algebra * operations. * + * See documentation of class Matrix for detailed information on the template parameters + * storage layout. + * * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. * @@ -69,11 +72,27 @@ class Array * the usage of 'using'. This should be done only for operator=. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const EigenBase &other) { return Base::operator=(other); } + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() + */ + /* This overload is needed because the usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Scalar &value) + { + Base::setConstant(value); + return *this; + } + /** Copies the value of the expression \a other into \c *this with automatic resizing. * * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), @@ -84,7 +103,8 @@ class Array * remain row-vectors and vectors remain vectors. */ template - EIGEN_STRONG_INLINE Array& operator=(const ArrayBase& other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const DenseBase& other) { return Base::_set(other); } @@ -92,11 +112,12 @@ class Array /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const Array& other) { return Base::_set(other); } - + /** Default constructor. * * For fixed-size matrices, does nothing. @@ -107,6 +128,7 @@ class Array * * \sa resize(Index,Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array() : Base() { Base::_check_template_params(); @@ -116,6 +138,7 @@ class Array #ifndef EIGEN_PARSED_BY_DOXYGEN // FIXME is it still needed ?? /** \internal */ + EIGEN_DEVICE_FUNC Array(internal::constructor_without_unaligned_array_assert) : Base(internal::constructor_without_unaligned_array_assert()) { @@ -124,41 +147,64 @@ class Array } #endif - /** Constructs a vector or row-vector with given dimension. \only_for_vectors - * - * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, - * it is redundant to pass the dimension here, so it makes more sense to use the default - * constructor Matrix() instead. - */ - EIGEN_STRONG_INLINE explicit Array(Index dim) - : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim) +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Array(Array&& other) + : Base(std::move(other)) { Base::_check_template_params(); - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array) - eigen_assert(dim >= 0); - eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); } + EIGEN_DEVICE_FUNC + Array& operator=(Array&& other) + { + other.swap(*this); + return *this; + } +#endif #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(const T& x) + { + Base::_check_template_params(); + Base::template _init1(x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1) { Base::_check_template_params(); this->template _init2(val0, val1); } #else - /** constructs an uninitialized matrix with \a rows rows and \a cols columns. + /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC explicit Array(const Scalar *data); + /** Constructs a vector or row-vector with given dimension. \only_for_vectors * - * This is useful for dynamic-size matrices. For fixed-size matrices, + * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass the dimension here, so it makes more sense to use the default + * constructor Array() instead. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(Index dim); + /** constructs an initialized 1x1 Array with the given coefficient */ + Array(const Scalar& value); + /** constructs an uninitialized array with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size arrays. For fixed-size arrays, * it is redundant to pass these parameters, so one should use the default constructor - * Matrix() instead. */ + * Array() instead. */ Array(Index rows, Index cols); /** constructs an initialized 2D vector with given coefficients */ Array(const Scalar& val0, const Scalar& val1); #endif /** constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2) { Base::_check_template_params(); @@ -168,6 +214,7 @@ class Array m_storage.data()[2] = val2; } /** constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3) { Base::_check_template_params(); @@ -178,51 +225,21 @@ class Array m_storage.data()[3] = val3; } - explicit Array(const Scalar *data); - - /** Constructor copying the value of the expression \a other */ - template - EIGEN_STRONG_INLINE Array(const ArrayBase& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } /** Copy constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Array& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } - /** Copy constructor with in-place evaluation */ - template - EIGEN_STRONG_INLINE Array(const ReturnByValue& other) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - other.evalTo(*this); - } + : Base(other) + { } /** \sa MatrixBase::operator=(const EigenBase&) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const EigenBase &other) - : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - *this = other; - } - - /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. - */ - template - void swap(ArrayBase const & other) - { this->_swap(other.derived()); } + : Base(other.derived()) + { } - inline Index innerStride() const { return 1; } - inline Index outerStride() const { return this->innerSize(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } #ifdef EIGEN_ARRAY_PLUGIN #include EIGEN_ARRAY_PLUGIN diff --git a/nuparu/include/Eigen/src/Core/ArrayBase.h b/nuparu/include/Eigen/src/Core/ArrayBase.h index 38852600..b4c24a27 100644 --- a/nuparu/include/Eigen/src/Core/ArrayBase.h +++ b/nuparu/include/Eigen/src/Core/ArrayBase.h @@ -46,16 +46,14 @@ template class ArrayBase typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef DenseBase Base; + using Base::operator*; + using Base::operator/; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; @@ -64,8 +62,7 @@ template class ArrayBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -85,22 +82,10 @@ template class ArrayBase #endif // not EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily - * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const - * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either - * PlainObject or const PlainObject&. - */ - typedef Array::Scalar, - internal::traits::RowsAtCompileTime, - internal::traits::ColsAtCompileTime, - AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), - internal::traits::MaxRowsAtCompileTime, - internal::traits::MaxColsAtCompileTime - > PlainObject; - + typedef typename Base::PlainObject PlainObject; /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase @@ -118,40 +103,57 @@ template class ArrayBase /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC Derived& operator=(const ArrayBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } - - Derived& operator+=(const Scalar& scalar) - { return *this = derived() + scalar; } - Derived& operator-=(const Scalar& scalar) - { return *this = derived() - scalar; } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() */ + EIGEN_DEVICE_FUNC + Derived& operator=(const Scalar &value) + { Base::setConstant(value); return derived(); } + + EIGEN_DEVICE_FUNC + Derived& operator+=(const Scalar& scalar); + EIGEN_DEVICE_FUNC + Derived& operator-=(const Scalar& scalar); template + EIGEN_DEVICE_FUNC Derived& operator+=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC Derived& operator-=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC Derived& operator*=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC Derived& operator/=(const ArrayBase& other); public: + EIGEN_DEVICE_FUNC ArrayBase& array() { return *this; } + EIGEN_DEVICE_FUNC const ArrayBase& array() const { return *this; } /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array * \sa MatrixBase::array() */ - MatrixWrapper matrix() { return derived(); } - const MatrixWrapper matrix() const { return derived(); } + EIGEN_DEVICE_FUNC + MatrixWrapper matrix() { return MatrixWrapper(derived()); } + EIGEN_DEVICE_FUNC + const MatrixWrapper matrix() const { return MatrixWrapper(derived()); } // template // inline void evalTo(Dest& dst) const { dst = matrix(); } protected: + EIGEN_DEVICE_FUNC ArrayBase() : Base() {} private: @@ -176,8 +178,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -190,8 +191,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -204,8 +204,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator*=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::mul_assign_op()); return derived(); } @@ -218,8 +217,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator/=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::div_assign_op()); return derived(); } diff --git a/nuparu/include/Eigen/src/Core/ArrayWrapper.h b/nuparu/include/Eigen/src/Core/ArrayWrapper.h index a791bc35..4e484f29 100644 --- a/nuparu/include/Eigen/src/Core/ArrayWrapper.h +++ b/nuparu/include/Eigen/src/Core/ArrayWrapper.h @@ -29,6 +29,11 @@ struct traits > : public traits::type > { typedef ArrayXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + Flags = Flags0 & ~NestByRefBit + }; }; } @@ -39,6 +44,7 @@ class ArrayWrapper : public ArrayBase > typedef ArrayBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + typedef typename internal::remove_all::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue::value, @@ -46,43 +52,56 @@ class ArrayWrapper : public ArrayBase > const Scalar >::type ScalarWithConstIfNotLvalue; - typedef typename internal::nested::type NestedExpressionType; + typedef typename internal::ref_selector::type NestedExpressionType; - inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC + explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index rowId, Index colId) const { return m_expression.coeff(rowId, colId); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) { return m_expression.const_cast_derived().coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.const_cast_derived().coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_expression.const_cast_derived().coeffRef(index); @@ -113,9 +132,11 @@ class ArrayWrapper : public ArrayBase > } template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { dst = m_expression; } const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC nestedExpression() const { return m_expression; @@ -123,10 +144,12 @@ class ArrayWrapper : public ArrayBase > /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index,Index)*/ - void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } protected: NestedExpressionType m_expression; @@ -149,6 +172,11 @@ struct traits > : public traits::type > { typedef MatrixXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + Flags = Flags0 & ~NestByRefBit + }; }; } @@ -159,6 +187,7 @@ class MatrixWrapper : public MatrixBase > typedef MatrixBase > Base; EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + typedef typename internal::remove_all::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue::value, @@ -166,43 +195,56 @@ class MatrixWrapper : public MatrixBase > const Scalar >::type ScalarWithConstIfNotLvalue; - typedef typename internal::nested::type NestedExpressionType; + typedef typename internal::ref_selector::type NestedExpressionType; - inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} + EIGEN_DEVICE_FUNC + explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index rowId, Index colId) const { return m_expression.coeff(rowId, colId); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) { return m_expression.const_cast_derived().coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.derived().coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_expression.const_cast_derived().coeffRef(index); @@ -232,6 +274,7 @@ class MatrixWrapper : public MatrixBase > m_expression.const_cast_derived().template writePacket(index, val); } + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { @@ -240,10 +283,12 @@ class MatrixWrapper : public MatrixBase > /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index,Index)*/ - void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } protected: NestedExpressionType m_expression; diff --git a/nuparu/include/Eigen/src/Core/Assign.h b/nuparu/include/Eigen/src/Core/Assign.h index 1dccc2f4..53806ba3 100644 --- a/nuparu/include/Eigen/src/Core/Assign.h +++ b/nuparu/include/Eigen/src/Core/Assign.h @@ -14,471 +14,6 @@ namespace Eigen { -namespace internal { - -/*************************************************************************** -* Part 1 : the logic deciding a strategy for traversal and unrolling * -***************************************************************************/ - -template -struct assign_traits -{ -public: - enum { - DstIsAligned = Derived::Flags & AlignedBit, - DstHasDirectAccess = Derived::Flags & DirectAccessBit, - SrcIsAligned = OtherDerived::Flags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned - }; - -private: - enum { - InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) - : int(Derived::RowsAtCompileTime), - InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) - : int(Derived::MaxRowsAtCompileTime), - MaxSizeAtCompileTime = Derived::SizeAtCompileTime, - PacketSize = packet_traits::size - }; - - enum { - StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), - MightVectorize = StorageOrdersAgree - && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 - && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), - MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess - && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), - /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, - so it's only good for large enough sizes. */ - MaySliceVectorize = MightVectorize && DstHasDirectAccess - && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) - /* slice vectorization can be slow, so we only want it if the slices are big, which is - indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix */ - }; - -public: - enum { - Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) - : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) - : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) - : int(MayLinearize) ? int(LinearTraversal) - : int(DefaultTraversal), - Vectorized = int(Traversal) == InnerVectorizedTraversal - || int(Traversal) == LinearVectorizedTraversal - || int(Traversal) == SliceVectorizedTraversal - }; - -private: - enum { - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), - MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), - MayUnrollInner = int(InnerSize) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) - }; - -public: - enum { - Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) - ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) - : int(MayUnrollInner) ? int(InnerUnrolling) - : int(NoUnrolling) - ) - : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(Traversal) == int(LinearTraversal) - ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(NoUnrolling) - }; - -#ifdef EIGEN_DEBUG_ASSIGN - static void debug() - { - EIGEN_DEBUG_VAR(DstIsAligned) - EIGEN_DEBUG_VAR(SrcIsAligned) - EIGEN_DEBUG_VAR(JointAlignment) - EIGEN_DEBUG_VAR(InnerSize) - EIGEN_DEBUG_VAR(InnerMaxSize) - EIGEN_DEBUG_VAR(PacketSize) - EIGEN_DEBUG_VAR(StorageOrdersAgree) - EIGEN_DEBUG_VAR(MightVectorize) - EIGEN_DEBUG_VAR(MayLinearize) - EIGEN_DEBUG_VAR(MayInnerVectorize) - EIGEN_DEBUG_VAR(MayLinearVectorize) - EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) - EIGEN_DEBUG_VAR(UnrollingLimit) - EIGEN_DEBUG_VAR(MayUnrollCompletely) - EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) - } -#endif -}; - -/*************************************************************************** -* Part 2 : meta-unrollers -***************************************************************************/ - -/************************ -*** Default traversal *** -************************/ - -template -struct assign_DefaultTraversal_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime - }; - - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeffByOuterInner(outer, inner, src); - assign_DefaultTraversal_CompleteUnrolling::run(dst, src); - } -}; - -template -struct assign_DefaultTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template -struct assign_DefaultTraversal_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.copyCoeffByOuterInner(outer, Index, src); - assign_DefaultTraversal_InnerUnrolling::run(dst, src, outer); - } -}; - -template -struct assign_DefaultTraversal_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template -struct assign_LinearTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeff(Index, src); - assign_LinearTraversal_CompleteUnrolling::run(dst, src); - } -}; - -template -struct assign_LinearTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template -struct assign_innervec_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime, - JointAlignment = assign_traits::JointAlignment - }; - - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.template copyPacketByOuterInner(outer, inner, src); - assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); - } -}; - -template -struct assign_innervec_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.template copyPacketByOuterInner(outer, Index, src); - assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, outer); - } -}; - -template -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template::Traversal, - int Unrolling = assign_traits::Unrolling, - int Version = Specialized> -struct assign_impl; - -/************************ -*** Default traversal *** -************************/ - -template -struct assign_impl -{ - static inline void run(Derived1 &, const Derived2 &) { } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; ++inner) - dst.copyCoeffByOuterInner(outer, inner, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_DefaultTraversal_CompleteUnrolling - ::run(dst, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_DefaultTraversal_InnerUnrolling - ::run(dst, src, outer); - } -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - for(Index i = 0; i < size; ++i) - dst.copyCoeff(i, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_LinearTraversal_CompleteUnrolling - ::run(dst, src); - } -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index packetSize = packet_traits::size; - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; inner+=packetSize) - dst.template copyPacketByOuterInner(outer, inner, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_innervec_CompleteUnrolling - ::run(dst, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_innervec_InnerUnrolling - ::run(dst, src, outer); - } -}; - -/*************************** -*** Linear vectorization *** -***************************/ - -template -struct unaligned_assign_impl -{ - template - static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {} -}; - -template <> -struct unaligned_assign_impl -{ - // MSVC must not inline this functions. If it does, it fails to optimize the - // packet access path. -#ifdef _MSC_VER - template - static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#else - template - static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#endif - { - for (typename Derived::Index index = start; index < end; ++index) - dst.copyCoeff(index, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - typedef packet_traits PacketTraits; - enum { - packetSize = PacketTraits::size, - dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits::DstIsAligned) , - srcAlignment = assign_traits::JointAlignment - }; - const Index alignedStart = assign_traits::DstIsAligned ? 0 - : internal::first_aligned(&dst.coeffRef(0), size); - const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - - unaligned_assign_impl::DstIsAligned!=0>::run(src,dst,0,alignedStart); - - for(Index index = alignedStart; index < alignedEnd; index += packetSize) - { - dst.template copyPacket(index, src); - } - - unaligned_assign_impl<>::run(src,dst,alignedEnd,size); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - enum { size = Derived1::SizeAtCompileTime, - packetSize = packet_traits::size, - alignedSize = (size/packetSize)*packetSize }; - - assign_innervec_CompleteUnrolling::run(dst, src); - assign_DefaultTraversal_CompleteUnrolling::run(dst, src); - } -}; - -/************************** -*** Slice vectorization *** -***************************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - typedef packet_traits PacketTraits; - enum { - packetSize = PacketTraits::size, - alignable = PacketTraits::AlignedOnScalar, - dstAlignment = alignable ? Aligned : int(assign_traits::DstIsAligned) , - srcAlignment = assign_traits::JointAlignment - }; - const Index packetAlignedMask = packetSize - 1; - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; - Index alignedStart = ((!alignable) || assign_traits::DstIsAligned) ? 0 - : internal::first_aligned(&dst.coeffRef(0,0), innerSize); - - for(Index outer = 0; outer < outerSize; ++outer) - { - const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); - // do the non-vectorizable part of the assignment - for(Index inner = 0; inner(outer, inner, src); - - // do the non-vectorizable part of the assignment - for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); - } - } -}; - -} // end namespace internal - -/*************************************************************************** -* Part 4 : implementation of DenseBase methods -***************************************************************************/ - template template EIGEN_STRONG_INLINE Derived& DenseBase @@ -492,90 +27,62 @@ EIGEN_STRONG_INLINE Derived& DenseBase EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) -#ifdef EIGEN_DEBUG_ASSIGN - internal::assign_traits::debug(); -#endif eigen_assert(rows() == other.rows() && cols() == other.cols()); - internal::assign_impl::Traversal) - : int(InvalidTraversal)>::run(derived(),other.derived()); -#ifndef EIGEN_NO_DEBUG - checkTransposeAliasing(other.derived()); -#endif + internal::call_assignment_no_alias(derived(),other.derived()); + return derived(); } -namespace internal { - -template::Flags) & EvalBeforeAssigningBit) != 0, - bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1) - | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". - // revert to || as soon as not needed anymore. - (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1)) - && int(Derived::SizeAtCompileTime) != 1> -struct assign_selector; - -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } - template - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } - template - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose dstTrans(dst); other.evalTo(dstTrans); return dst; } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } -}; - -} // end namespace internal - template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) { - return internal::assign_selector::evalTo(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) { - return internal::assign_selector::evalTo(derived(), other.derived()); + other.derived().evalTo(derived()); + return derived(); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/AssignEvaluator.h b/nuparu/include/Eigen/src/Core/AssignEvaluator.h new file mode 100644 index 00000000..9dfffbcc --- /dev/null +++ b/nuparu/include/Eigen/src/Core/AssignEvaluator.h @@ -0,0 +1,810 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_EVALUATOR_H +#define EIGEN_ASSIGN_EVALUATOR_H + +namespace Eigen { + +// This implementation is based on Assign.h + +namespace internal { + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits + +template +struct copy_using_evaluator_traits +{ + typedef typename DstEvaluator::XprType Dst; + typedef typename Dst::Scalar DstScalar; + // TODO distinguish between linear traversal and inner-traversals + typedef typename find_best_packet::type PacketType; + + enum { + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags, + RequiredAlignment = unpacket_traits::alignment + }; + +public: + enum { + DstAlignment = DstEvaluator::Alignment, + SrcAlignment = SrcEvaluator::Alignment, + DstHasDirectAccess = DstFlags & DirectAccessBit, + JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) + }; + +private: + enum { + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time::ret), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + PacketSize = unpacket_traits::size + }; + + enum { + DstIsRowMajor = DstFlags&RowMajorBit, + SrcIsRowMajor = SrcFlags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), + MightVectorize = StorageOrdersAgree + && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) + && (functor_traits::PacketAccess), + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0 + && int(JointAlignment)>=int(RequiredAlignment), + MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), + MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess + && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + MaySliceVectorize = MightVectorize && DstHasDirectAccess + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix */ + }; + +public: + enum { + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) == InnerVectorizedTraversal + || int(Traversal) == LinearVectorizedTraversal + || int(Traversal) == SliceVectorizedTraversal + }; + +private: + enum { + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), + MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic + && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize) != Dynamic + && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(NoUnrolling) + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; + std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(DstAlignment) + EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(RequiredAlignment) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSize) + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.assignCoeffByOuterInner(outer, inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.assignCoeffByOuterInner(outer, Index_); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) + { + kernel.assignCoeff(Index); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime, + JointAlignment = Kernel::AssignmentTraits::JointAlignment + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.template assignPacketByOuterInner(outer, inner); + enum { NextIndex = Index + unpacket_traits::size }; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + typedef typename Kernel::PacketType PacketType; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.template assignPacketByOuterInner(outer, Index_); + enum { NextIndex = Index_ + unpacket_traits::size }; + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +// dense_assignment_loop is based on assign_impl + +template +struct dense_assignment_loop; + +/************************ +*** Default traversal *** +************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) + { + for(Index outer = 0; outer < kernel.outerSize(); ++outer) { + for(Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + + +// The goal of unaligned_dense_assignment_loop is simply to factorize the handling +// of the non vectorizable beginning and ending parts + +template +struct unaligned_dense_assignment_loop +{ + // if IsAligned = true, then do nothing + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} +}; + +template <> +struct unaligned_dense_assignment_loop +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. + // FIXME check which version exhibits this issue +#if EIGEN_COMP_MSVC + template + static EIGEN_DONT_INLINE void run(Kernel &kernel, + Index start, + Index end) +#else + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, + Index start, + Index end) +#endif + { + for (Index index = start; index < end; ++index) + kernel.assignCoeff(index); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment, + packetSize = unpacket_traits::size, + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = packet_traits::AlignedOnScalar ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment), + srcAlignment = Kernel::AssignmentTraits::JointAlignment + }; + const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_dense_assignment_loop::run(kernel, 0, alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + kernel.template assignPacket(index); + + unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + + enum { size = DstXprType::SizeAtCompileTime, + packetSize = packet_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct dense_assignment_loop +{ + typedef typename Kernel::PacketType PacketType; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index packetSize = unpacket_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + kernel.template assignPacketByOuterInner(outer, inner); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + for(Index i = 0; i < size; ++i) + kernel.assignCoeff(i); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + { + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + packetSize = unpacket_traits::size, + requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment), + alignable = packet_traits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = alignable ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment) + }; + const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0); + if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) + { + // the pointer is not aligend-on scalar, so alignment is not possible + return dense_assignment_loop::run(kernel); + } + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner(outer, inner); + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); + } + } +}; + +/*************************************************************************** +* Part 4 : Generic dense assignment kernel +***************************************************************************/ + +// This class generalize the assignment of a coefficient (or packet) from one dense evaluator +// to another dense writable evaluator. +// It is parametrized by the two evaluators, and the actual assignment functor. +// This abstraction level permits to keep the evaluation loops as simple and as generic as possible. +// One can customize the assignment using this generic dense_assignment_kernel with different +// functors, or by completely overloading it, by-passing a functor. +template +class generic_dense_assignment_kernel +{ +protected: + typedef typename DstEvaluatorTypeT::XprType DstXprType; + typedef typename SrcEvaluatorTypeT::XprType SrcXprType; +public: + + typedef DstEvaluatorTypeT DstEvaluatorType; + typedef SrcEvaluatorTypeT SrcEvaluatorType; + typedef typename DstEvaluatorType::Scalar Scalar; + typedef copy_using_evaluator_traits AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; + + + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) + { + #ifdef EIGEN_DEBUG_ASSIGN + AssignmentTraits::debug(); + #endif + } + + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } + + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } + + /// Assign src(row,col) to dst(row,col) through the assignment functor. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) + { + m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) + { + m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col); + } + + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) + { + m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) + { + m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col); + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner + : outer; + } + +protected: + DstEvaluatorType& m_dst; + const SrcEvaluatorType& m_src; + const Functor &m_functor; + // TODO find a way to avoid the needs of the original expression + DstXprType& m_dstExpr; +}; + +/*************************************************************************** +* Part 5 : Entry point for dense rectangular assignment +***************************************************************************/ + +template +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +{ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); +} + +template +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +{ + call_dense_assignment_loop(dst, src, internal::assign_op()); +} + +/*************************************************************************** +* Part 6 : Generic assignment +***************************************************************************/ + +// Based on the respective shapes of the destination and source, +// the class AssignmentKind determine the kind of assignment mechanism. +// AssignmentKind must define a Kind typedef. +template struct AssignmentKind; + +// Assignement kind defined in this file: +struct Dense2Dense {}; +struct EigenBase2EigenBase {}; + +template struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; +template<> struct AssignmentKind { typedef Dense2Dense Kind; }; + +// This is the main assignment class +template< typename DstXprType, typename SrcXprType, typename Functor, + typename Kind = typename AssignmentKind< typename evaluator_traits::Shape , typename evaluator_traits::Shape >::Kind, + typename Scalar = typename DstXprType::Scalar> +struct Assignment; + + +// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition. +// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated. +// So this intermediate function removes everything related to AssumeAliasing such that Assignment +// does not has to bother about these annoying details. + +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} +template +EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} + +// Deal with AssumeAliasing +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::AssumeAliasing==1, void*>::type = 0) +{ + typename plain_matrix_type::type tmp(src); + call_assignment_no_alias(dst, tmp, func); +} + +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::AssumeAliasing==0, void*>::type = 0) +{ + call_assignment_no_alias(dst, src, func); +} + +// by-pass AssumeAliasing +// When there is no aliasing, we require that 'dst' has been properly resized +template class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment(NoAlias& dst, const Src& src, const Func& func) +{ + call_assignment_no_alias(dst.expression(), src, func); +} + + +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +{ + enum { + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) + ) && int(Dst::SizeAtCompileTime) != 1 + }; + + Index dstRows = NeedToTranspose ? src.cols() : src.rows(); + Index dstCols = NeedToTranspose ? src.rows() : src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + typedef typename internal::conditional, Dst>::type ActualDstTypeCleaned; + typedef typename internal::conditional, Dst&>::type ActualDstType; + ActualDstType actualDst(dst); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + + Assignment::run(actualDst, src, func); +} +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) +{ + call_assignment_no_alias(dst, src, internal::assign_op()); +} + +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) +{ + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) + + Assignment::run(dst, src, func); +} +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) +{ + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); +} + +// forward declaration +template void check_for_aliasing(const Dst &dst, const Src &src); + +// Generic Dense to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func); + } +}; + +// Generic assignment through evalTo. +// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.evalTo(dst); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_EVALUATOR_H diff --git a/nuparu/include/Eigen/src/Core/Assign_MKL.h b/nuparu/include/Eigen/src/Core/Assign_MKL.h index 7772951b..897187a3 100644 --- a/nuparu/include/Eigen/src/Core/Assign_MKL.h +++ b/nuparu/include/Eigen/src/Core/Assign_MKL.h @@ -1,6 +1,7 @@ /* Copyright (c) 2011, Intel Corporation. All rights reserved. - + Copyright (C) 2015 Gael Guennebaud + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -37,17 +38,13 @@ namespace Eigen { namespace internal { -template struct vml_call -{ enum { IsSupported = 0 }; }; - -template +template class vml_assign_traits { private: enum { DstHasDirectAccess = Dst::Flags & DirectAccessBit, SrcHasDirectAccess = Src::Flags & DirectAccessBit, - StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) @@ -57,165 +54,118 @@ class vml_assign_traits : int(Dst::MaxRowsAtCompileTime), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, - MightEnableVml = vml_call::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess - && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, - LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, - MayEnableVml = MightEnableVml && LargeEnough, - MayLinearize = MayEnableVml && MightLinearize + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD }; public: enum { - Traversal = MayLinearize ? LinearVectorizedTraversal - : MayEnableVml ? InnerVectorizedTraversal - : DefaultTraversal + EnableVml = MightEnableVml && LargeEnough, + Traversal = MightLinearize ? LinearTraversal : DefaultTraversal }; }; -template::Traversal > -struct vml_assign_impl - : assign_impl,Traversal,Unrolling,BuiltIn> -{ -}; - -template -struct vml_assign_impl -{ - typedef typename Derived1::Scalar Scalar; - typedef typename Derived1::Index Index; - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) { - const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : - &(src.nestedExpression().coeffRef(0, outer)); - Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); - vml_call::run(src.functor(), innerSize, src_ptr, dst_ptr ); - } - } -}; - -template -struct vml_assign_impl -{ - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - vml_call::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); - } -}; - -// Macroses - -#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ - template \ - struct assign_impl, TRAVERSAL, UNROLLING, Specialized> { \ - static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp &src) { \ - vml_assign_impl::run(dst, src); \ - } \ - }; - -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) - - +#define EIGEN_PP_EXPAND(ARG) ARG #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) -#define EIGEN_MKL_VML_MODE VML_HA +#define EIGEN_VMLMODE_EXPAND_LA , VML_HA #else -#define EIGEN_MKL_VML_MODE VML_LA +#define EIGEN_VMLMODE_EXPAND_LA , VML_LA #endif -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ - } \ +#define EIGEN_VMLMODE_EXPAND__ + +#define EIGEN_VMLMODE_PREFIX_LA vm +#define EIGEN_VMLMODE_PREFIX__ v +#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml,EIGENTYPE>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + if(vml_assign_traits::Traversal==LinearTraversal) { \ + VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ + }; \ + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA) +// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml,EIGENTYPE>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + VMLTYPE exponent = reinterpret_cast(src.functor().m_exponent); \ + if(vml_assign_traits::Traversal==LinearTraversal) \ + { \ + VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& func, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - EIGENTYPE exponent = func.m_exponent; \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ - (VMLTYPE*)dst, &vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) - - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) - - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) -//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) - -// The vm*powx functions are not avaibale in the windows version of MKL. -#ifndef _WIN32 -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) -#endif + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA) } // end namespace internal diff --git a/nuparu/include/Eigen/src/Core/BandMatrix.h b/nuparu/include/Eigen/src/Core/BandMatrix.h index ffd7fe8b..87c124fd 100644 --- a/nuparu/include/Eigen/src/Core/BandMatrix.h +++ b/nuparu/include/Eigen/src/Core/BandMatrix.h @@ -32,7 +32,7 @@ class BandMatrixBase : public EigenBase }; typedef typename internal::traits::Scalar Scalar; typedef Matrix DenseMatrixType; - typedef typename DenseMatrixType::Index Index; + typedef typename DenseMatrixType::StorageIndex StorageIndex; typedef typename internal::traits::CoefficientsType CoefficientsType; typedef EigenBase Base; @@ -179,7 +179,7 @@ struct traits > { typedef _Scalar Scalar; typedef Dense StorageKind; - typedef DenseIndex Index; + typedef Eigen::Index StorageIndex; enum { CoeffReadCost = NumTraits::ReadCost, RowsAtCompileTime = _Rows, @@ -201,10 +201,10 @@ class BandMatrix : public BandMatrixBase::Scalar Scalar; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; typedef typename internal::traits::CoefficientsType CoefficientsType; - inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) + explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) : m_coeffs(1+supers+subs,cols), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -241,7 +241,7 @@ struct traits::CoeffReadCost, RowsAtCompileTime = _Rows, @@ -264,9 +264,9 @@ class BandMatrixWrapper : public BandMatrixBase::Scalar Scalar; typedef typename internal::traits::CoefficientsType CoefficientsType; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; - inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) + explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) : m_coeffs(coeffs), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -312,9 +312,9 @@ template class TridiagonalMatrix : public BandMatrix { typedef BandMatrix Base; - typedef typename Base::Index Index; + typedef typename Base::StorageIndex StorageIndex; public: - TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} + explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} inline typename Base::template DiagonalIntReturnType<1>::Type super() { return Base::template diagonal<1>(); } @@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + } // end namespace internal } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/Block.h b/nuparu/include/Eigen/src/Core/Block.h index 358b3188..3748e259 100644 --- a/nuparu/include/Eigen/src/Core/Block.h +++ b/nuparu/include/Eigen/src/Core/Block.h @@ -21,6 +21,9 @@ namespace Eigen { * \param XprType the type of the expression in which we are taking a block * \param BlockRows the number of rows of the block we are taking at compile time (optional) * \param BlockCols the number of columns of the block we are taking at compile time (optional) + * \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or + * to set of columns of a column major matrix (optional). The parameter allows to determine + * at compile time whether aligned access is possible on the block expression. * * This class represents an expression of either a fixed-size or dynamic-size block. It is the return * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block(Index,Index) and @@ -52,7 +55,7 @@ struct traits > : traits::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - typedef typename nested::type XprTypeNested; + typedef typename ref_selector::type XprTypeNested; typedef typename remove_reference::type _XprTypeNested; enum{ MatrixRows = traits::RowsAtCompileTime, @@ -65,6 +68,7 @@ struct traits > : traits::MaxColsAtCompileTime), + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -77,18 +81,16 @@ struct traits > : traits::ret) : int(inner_stride_at_compile_time::ret), - MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) - && (InnerStrideAtCompileTime == 1) - ? PacketAccessBit : 0, - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0, - FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, + + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - Flags0 = traits::Flags & ( (HereditaryBits & ~RowMajorBit) | - DirectAccessBit | - MaskPacketAccessBit | - MaskAlignedBit), - Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit + Flags = (traits::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit, + // FIXME DirectAccessBit should not be handled by expressions + // + // Alignment is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + Alignment = 0 }; }; @@ -108,9 +110,12 @@ template class typedef Impl Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Block) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + typedef typename internal::remove_all::type NestedExpression; /** Column or Row constructor */ + EIGEN_DEVICE_FUNC inline Block(XprType& xpr, Index i) : Impl(xpr,i) { eigen_assert( (i>=0) && ( @@ -120,25 +125,27 @@ template class /** Fixed-size constructor */ - inline Block(XprType& xpr, Index a_startRow, Index a_startCol) - : Impl(xpr, a_startRow, a_startCol) + EIGEN_DEVICE_FUNC + inline Block(XprType& xpr, Index startRow, Index startCol) + : Impl(xpr, startRow, startCol) { EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) - eigen_assert(a_startRow >= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows() - && a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols()); + eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows() + && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols()); } /** Dynamic-size constructor */ + EIGEN_DEVICE_FUNC inline Block(XprType& xpr, - Index a_startRow, Index a_startCol, + Index startRow, Index startCol, Index blockRows, Index blockCols) - : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) { eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); - eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows - && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols); + eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows + && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols); } }; @@ -149,14 +156,15 @@ class BlockImpl : public internal::BlockImpl_dense { typedef internal::BlockImpl_dense Impl; - typedef typename XprType::Index Index; + typedef typename XprType::StorageIndex StorageIndex; public: typedef Impl Base; EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) - inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} - inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {} - inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols) - : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {} + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {} + EIGEN_DEVICE_FUNC + inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) {} }; namespace internal { @@ -172,10 +180,11 @@ template::type& nestedExpression() const + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } - Index startRow() const + EIGEN_DEVICE_FUNC + StorageIndex startRow() const { return m_startRow.value(); } - Index startCol() const + EIGEN_DEVICE_FUNC + StorageIndex startCol() const { return m_startCol.value(); } @@ -302,10 +322,10 @@ template m_startRow; - const internal::variable_if_dynamic m_startCol; - const internal::variable_if_dynamic m_blockRows; - const internal::variable_if_dynamic m_blockCols; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; + const internal::variable_if_dynamic m_blockRows; + const internal::variable_if_dynamic m_blockCols; }; /** \internal Internal implementation of dense Blocks in the direct access case.*/ @@ -314,6 +334,9 @@ class BlockImpl_dense : public MapBase > { typedef Block BlockType; + enum { + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0 + }; public: typedef MapBase Base; @@ -322,10 +345,10 @@ class BlockImpl_dense /** Column or Row constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index i) - : Base(internal::const_cast_ptr(&xpr.coeffRef( - (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0, - (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)), + : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) + || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), BlockRows==1 ? 1 : xpr.rows(), BlockCols==1 ? 1 : xpr.cols()), m_xpr(xpr) @@ -335,29 +358,34 @@ class BlockImpl_dense /** Fixed-size constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), + m_xpr(xpr) { init(); } /** Dynamic-size constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols), + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), m_xpr(xpr) { init(); } - const typename internal::remove_all::type& nestedExpression() const + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } /** \sa MapBase::innerStride() */ + EIGEN_DEVICE_FUNC inline Index innerStride() const { return internal::traits::HasSameStorageOrderAsXprType @@ -366,6 +394,7 @@ class BlockImpl_dense } /** \sa MapBase::outerStride() */ + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_outerStride; @@ -379,6 +408,7 @@ class BlockImpl_dense #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal used by allowAligned() */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols) : Base(data, blockRows, blockCols), m_xpr(xpr) { @@ -387,6 +417,7 @@ class BlockImpl_dense #endif protected: + EIGEN_DEVICE_FUNC void init() { m_outerStride = internal::traits::HasSameStorageOrderAsXprType diff --git a/nuparu/include/Eigen/src/Core/BooleanRedux.h b/nuparu/include/Eigen/src/Core/BooleanRedux.h index 6e37e031..8409d874 100644 --- a/nuparu/include/Eigen/src/Core/BooleanRedux.h +++ b/nuparu/include/Eigen/src/Core/BooleanRedux.h @@ -17,9 +17,10 @@ namespace internal { template struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) @@ -29,9 +30,9 @@ struct all_unroller }; template -struct all_unroller +struct all_unroller { - static inline bool run(const Derived &mat) { return mat.coeff(0, 0); } + static inline bool run(const Derived &/*mat*/) { return true; } }; template @@ -43,11 +44,12 @@ struct all_unroller template struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; - + static inline bool run(const Derived &mat) { return any_unroller::run(mat) || mat.coeff(row, col); @@ -55,9 +57,9 @@ struct any_unroller }; template -struct any_unroller +struct any_unroller { - static inline bool run(const Derived &mat) { return mat.coeff(0, 0); } + static inline bool run(const Derived & /*mat*/) { return false; } }; template @@ -78,19 +80,19 @@ struct any_unroller template inline bool DenseBase::all() const { + typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller::run(derived()); + return internal::all_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (!coeff(i, j)) return false; + if (!evaluator.coeff(i, j)) return false; return true; } } @@ -102,19 +104,19 @@ inline bool DenseBase::all() const template inline bool DenseBase::any() const { + typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller::run(derived()); + return internal::any_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (coeff(i, j)) return true; + if (evaluator.coeff(i, j)) return true; return false; } } @@ -124,7 +126,7 @@ inline bool DenseBase::any() const * \sa all(), any() */ template -inline typename DenseBase::Index DenseBase::count() const +inline Eigen::Index DenseBase::count() const { return derived().template cast().template cast().sum(); } @@ -136,7 +138,11 @@ inline typename DenseBase::Index DenseBase::count() const template inline bool DenseBase::hasNaN() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else return !((derived().array()==derived().array()).all()); +#endif } /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. @@ -146,7 +152,11 @@ inline bool DenseBase::hasNaN() const template inline bool DenseBase::allFinite() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else return !((derived()-derived()).hasNaN()); +#endif } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/CMakeLists.txt b/nuparu/include/Eigen/src/Core/CMakeLists.txt index 2346fc2b..38c3afde 100644 --- a/nuparu/include/Eigen/src/Core/CMakeLists.txt +++ b/nuparu/include/Eigen/src/Core/CMakeLists.txt @@ -8,3 +8,4 @@ INSTALL(FILES ADD_SUBDIRECTORY(products) ADD_SUBDIRECTORY(util) ADD_SUBDIRECTORY(arch) +ADD_SUBDIRECTORY(functors) diff --git a/nuparu/include/Eigen/src/Core/CommaInitializer.h b/nuparu/include/Eigen/src/Core/CommaInitializer.h index a96867af..89bcd750 100644 --- a/nuparu/include/Eigen/src/Core/CommaInitializer.h +++ b/nuparu/include/Eigen/src/Core/CommaInitializer.h @@ -28,8 +28,8 @@ template struct CommaInitializer { typedef typename XprType::Scalar Scalar; - typedef typename XprType::Index Index; + EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const Scalar& s) : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) { @@ -37,13 +37,27 @@ struct CommaInitializer } template + EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const DenseBase& other) : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows()) { m_xpr.block(0, 0, other.rows(), other.cols()) = other; } + /* Copy/Move constructor which transfers ownership. This is crucial in + * absence of return value optimization to avoid assertions during destruction. */ + // FIXME in C++11 mode this could be replaced by a proper RValue constructor + EIGEN_DEVICE_FUNC + inline CommaInitializer(const CommaInitializer& o) + : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { + // Mark original object as finished. In absence of R-value references we need to const_cast: + const_cast(o).m_row = m_xpr.rows(); + const_cast(o).m_col = m_xpr.cols(); + const_cast(o).m_currentBlockRows = 0; + } + /* inserts a scalar value in the target matrix */ + EIGEN_DEVICE_FUNC CommaInitializer& operator,(const Scalar& s) { if (m_col==m_xpr.cols()) @@ -63,6 +77,7 @@ struct CommaInitializer /* inserts a matrix expression in the target matrix */ template + EIGEN_DEVICE_FUNC CommaInitializer& operator,(const DenseBase& other) { if(other.cols()==0 || other.rows()==0) @@ -88,7 +103,11 @@ struct CommaInitializer return *this; } + EIGEN_DEVICE_FUNC inline ~CommaInitializer() +#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS + EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) +#endif { eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() && m_col == m_xpr.cols() @@ -102,9 +121,10 @@ struct CommaInitializer * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished()); * \endcode */ + EIGEN_DEVICE_FUNC inline XprType& finished() { return m_xpr; } - XprType& m_xpr; // target expression + XprType& m_xpr; // target expression Index m_row; // current row id Index m_col; // current col id Index m_currentBlockRows; // current block height diff --git a/nuparu/include/Eigen/src/Core/CoreEvaluators.h b/nuparu/include/Eigen/src/Core/CoreEvaluators.h new file mode 100644 index 00000000..f97dc33d --- /dev/null +++ b/nuparu/include/Eigen/src/Core/CoreEvaluators.h @@ -0,0 +1,1376 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_COREEVALUATORS_H +#define EIGEN_COREEVALUATORS_H + +namespace Eigen { + +namespace internal { + +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; + +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template struct storage_kind_to_shape; + +template<> struct storage_kind_to_shape { typedef DenseShape Shape; }; +template<> struct storage_kind_to_shape { typedef SolverShape Shape; }; +template<> struct storage_kind_to_shape { typedef PermutationShape Shape; }; +template<> struct storage_kind_to_shape { typedef TranspositionsShape Shape; }; + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template< typename T, + typename LhsKind = typename evaluator_traits::Kind, + typename RhsKind = typename evaluator_traits::Kind, + typename LhsScalar = typename traits::Scalar, + typename RhsScalar = typename traits::Scalar> struct binary_evaluator; + +template< typename T, + typename Kind = typename evaluator_traits::Kind, + typename Scalar = typename T::Scalar> struct unary_evaluator; + +// evaluator_traits contains traits for evaluator + +template +struct evaluator_traits_base +{ + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; + + // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a + // temporary; 0 if not. + static const int AssumeAliasing = 0; +}; + +// Default evaluator traits +template +struct evaluator_traits : public evaluator_traits_base +{ +}; + + +// By default, we assume a unary expression: +template +struct evaluator : public unary_evaluator +{ + typedef unary_evaluator Base; + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} +}; + + +// TODO: Think about const-correctness +template +struct evaluator + : evaluator +{ + EIGEN_DEVICE_FUNC + explicit evaluator(const T& xpr) : evaluator(xpr) {} +}; + +// ---------- base class for all evaluators ---------- + +template +struct evaluator_base : public noncopyable +{ + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. + typedef traits ExpressionTraits; + + enum { + Alignment = 0 + }; +}; + +// -------------------- Matrix and Array -------------------- +// +// evaluator is a common base class for the +// Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. + +template +struct evaluator > + : evaluator_base +{ + typedef PlainObjectBase PlainObjectType; + typedef typename PlainObjectType::Scalar Scalar; + typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = PlainObjectType::IsRowMajor, + IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, + RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits::ReadCost, + Flags = traits::EvaluatorFlags, + Alignment = traits::Alignment + }; + + EIGEN_DEVICE_FUNC evaluator() + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + if (IsRowMajor) + return m_data[row * m_outerStride.value() + col]; + else + return m_data[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_data[index]; + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + if (IsRowMajor) + return const_cast(m_data)[row * m_outerStride.value() + col]; + else + return const_cast(m_data)[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return const_cast(m_data)[index]; + } + + template + PacketType packet(Index row, Index col) const + { + if (IsRowMajor) + return ploadt(m_data + row * m_outerStride.value() + col); + else + return ploadt(m_data + row + col * m_outerStride.value()); + } + + template + PacketType packet(Index index) const + { + return ploadt(m_data + index); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + if (IsRowMajor) + return pstoret + (const_cast(m_data) + row * m_outerStride.value() + col, x); + else + return pstoret + (const_cast(m_data) + row + col * m_outerStride.value(), x); + } + + template + void writePacket(Index index, const PacketType& x) + { + return pstoret(const_cast(m_data) + index, x); + } + +protected: + const Scalar *m_data; + + // We do not need to know the outer stride for vectors + variable_if_dynamic m_outerStride; +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Matrix XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Array XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +// -------------------- Transpose -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Transpose XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags ^ RowMajorBit, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(col, row); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(col, row); + } + + EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(col, row); + } + + template + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(col, row, x); + } + + template + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +// -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. + +template +struct evaluator > + : evaluator_base > +{ + typedef CwiseNullaryOp XprType; + typedef typename internal::remove_all::type PlainObjectTypeCleaned; + + enum { + CoeffReadCost = internal::functor_traits::Cost, + + Flags = (evaluator::Flags + & ( HereditaryBits + | (functor_has_linear_access::ret ? LinearAccessBit : 0) + | (functor_traits::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + Alignment = AlignedMax + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) + : m_functor(n.functor()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(row, col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(index); + } + + template + PacketType packet(Index row, Index col) const + { + return m_functor.template packetOp(row, col); + } + + template + PacketType packet(Index index) const + { + return m_functor.template packetOp(index); + } + +protected: + const NullaryOp m_functor; +}; + +// -------------------- CwiseUnaryOp -------------------- + +template +struct unary_evaluator, IndexBased > + : evaluator_base > +{ + typedef CwiseUnaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = evaluator::Flags + & (HereditaryBits | LinearAccessBit | (functor_traits::PacketAccess ? PacketAccessBit : 0)), + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_argImpl.coeff(index)); + } + + template + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_argImpl.template packet(row, col)); + } + + template + PacketType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + +protected: + const UnaryOp m_functor; + evaluator m_argImpl; +}; + +// -------------------- CwiseBinaryOp -------------------- + +// this is a binary expression +template +struct evaluator > + : public binary_evaluator > +{ + typedef CwiseBinaryOp XprType; + typedef binary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct binary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseBinaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment,evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); + } + + template + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_lhsImpl.template packet(row, col), + m_rhsImpl.template packet(row, col)); + } + + template + PacketType packet(Index index) const + { + return m_functor.packetOp(m_lhsImpl.template packet(index), + m_rhsImpl.template packet(index)); + } + +protected: + const BinaryOp m_functor; + evaluator m_lhsImpl; + evaluator m_rhsImpl; +}; + +// -------------------- CwiseUnaryView -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef CwiseUnaryView XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), + + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_unaryOp(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_unaryOp(m_argImpl.coeff(index)); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_unaryOp(m_argImpl.coeffRef(row, col)); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_unaryOp(m_argImpl.coeffRef(index)); + } + +protected: + const UnaryOp m_unaryOp; + evaluator m_argImpl; +}; + +// -------------------- Map -------------------- + +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template +struct mapbase_evaluator; + +template +struct mapbase_evaluator : evaluator_base +{ + typedef Derived XprType; + typedef typename XprType::PointerType PointerType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits::ReadCost + }; + + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast(map.data())), + m_xpr(map) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_data[index * m_xpr.innerStride()]; + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_data[index * m_xpr.innerStride()]; + } + + template + PacketType packet(Index row, Index col) const + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + return internal::ploadt(ptr); + } + + template + PacketType packet(Index index) const + { + return internal::ploadt(m_data + index * m_xpr.innerStride()); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + return internal::pstoret(ptr, x); + } + + template + void writePacket(Index index, const PacketType& x) + { + internal::pstoret(m_data + index * m_xpr.innerStride(), x); + } + +protected: + PointerType m_data; + const XprType& m_xpr; +}; + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Map XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator::Flags) & (LinearAccessMask&PacketAccessMask), + + Alignment = int(MapOptions)&int(AlignedMask) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) + : mapbase_evaluator(map) + { } +}; + +// -------------------- Ref -------------------- + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Ref XprType; + + enum { + Flags = evaluator >::Flags, + Alignment = evaluator >::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) + : mapbase_evaluator(ref) + { } +}; + +// -------------------- Block -------------------- + +template::ret> struct block_evaluator; + +template +struct evaluator > + : block_evaluator +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) + && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit : 0, + + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, + + PacketAlignment = unpacket_traits::alignment, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) + }; + typedef block_evaluator block_evaluator_type; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } +}; + +// no direct-access => dispatch to a unary evaluator +template +struct block_evaluator + : unary_evaluator > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : unary_evaluator(block) + {} +}; + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) + : m_argImpl(block.nestedExpression()), + m_startRow(block.startRow()), + m_startCol(block.startCol()) + { } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + RowsAtCompileTime = XprType::RowsAtCompileTime + }; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + template + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); + } + + template + PacketType packet(Index index) const + { + return packet(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); + } + + template + void writePacket(Index index, const PacketType& x) + { + return writePacket(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); + } + +protected: + evaluator m_argImpl; + const variable_if_dynamic m_startRow; + const variable_if_dynamic m_startCol; +}; + +// TODO: This evaluator does not actually use the child evaluator; +// all action is via the data() as returned by the Block expression. + +template +struct block_evaluator + : mapbase_evaluator, + typename Block::PlainObject> +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : mapbase_evaluator(block) + { + // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime + eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); + } +}; + + +// -------------------- Select -------------------- +// NOTE shall we introduce a ternary_evaluator? + +// TODO enable vectorization for Select +template +struct evaluator > + : evaluator_base > +{ + typedef Select XprType; + enum { + CoeffReadCost = evaluator::CoeffReadCost + + EIGEN_PLAIN_ENUM_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), + + Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, + + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment) + }; + + inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) + : m_conditionImpl(select.conditionMatrix()), + m_thenImpl(select.thenMatrix()), + m_elseImpl(select.elseMatrix()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + if (m_conditionImpl.coeff(row, col)) + return m_thenImpl.coeff(row, col); + else + return m_elseImpl.coeff(row, col); + } + + inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + if (m_conditionImpl.coeff(index)) + return m_thenImpl.coeff(index); + else + return m_elseImpl.coeff(index); + } + +protected: + evaluator m_conditionImpl; + evaluator m_thenImpl; + evaluator m_elseImpl; +}; + + +// -------------------- Replicate -------------------- + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Replicate XprType; + typedef typename XprType::CoeffReturnType CoeffReturnType; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, + Flags = (evaluator::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits::Flags & RowMajorBit), + + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) + {} + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.coeff(actual_row, actual_col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.coeff(actual_index); + } + + template + PacketType packet(Index row, Index col) const + { + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.template packet(actual_row, actual_col); + } + + template + PacketType packet(Index index) const + { + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.template packet(actual_index); + } + +protected: + const ArgTypeNested m_arg; + evaluator m_argImpl; + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- PartialReduxExpr -------------------- + +template< typename ArgType, typename MemberOp, int Direction> +struct evaluator > + : evaluator_base > +{ + typedef PartialReduxExpr XprType; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + typedef typename ArgType::Scalar InputScalar; + typedef typename XprType::Scalar Scalar; + enum { + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost CostOpType; + enum { + CoeffReadCost = TraversalSize==Dynamic ? HugeCost + : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), + + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&(HereditaryBits&(~RowMajorBit))), + + Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) + : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(j)); + else + return m_functor(m_arg.row(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(index)); + else + return m_functor(m_arg.row(index)); + } + +protected: + const ArgTypeNested m_arg; + const MemberOp m_functor; +}; + + +// -------------------- MatrixWrapper and ArrayWrapper -------------------- +// +// evaluator_wrapper_base is a common base class for the +// MatrixWrapper and ArrayWrapper evaluators. + +template +struct evaluator_wrapper_base + : evaluator_base +{ + typedef typename remove_all::type ArgType; + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + + typedef typename ArgType::Scalar Scalar; + typedef typename ArgType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(row, col); + } + + template + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(row, col, x); + } + + template + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef MatrixWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef ArrayWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + + +// -------------------- Reverse -------------------- + +// defined in Reverse.h: +template struct reverse_packet_cond; + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Reverse XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::IsRowMajor, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1)) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), + + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) + : m_argImpl(reverse.nestedExpression()), + m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), + m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) + { } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); + } + + template + PacketType packet(Index row, Index col) const + { + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + return reverse_packet::run(m_argImpl.template packet( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col)); + } + + template + PacketType packet(Index index) const + { + enum { PacketSize = unpacket_traits::size }; + return preverse(m_argImpl.template packet(m_rows.value() * m_cols.value() - index - PacketSize)); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + // FIXME we could factorize some code with packet(i,j) + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + m_argImpl.template writePacket( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col, + reverse_packet::run(x)); + } + + template + void writePacket(Index index, const PacketType& x) + { + enum { PacketSize = unpacket_traits::size }; + m_argImpl.template writePacket + (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x)); + } + +protected: + evaluator m_argImpl; + + // If we do not reverse rows, then we do not need to know the number of rows; same for columns + // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors. + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- Diagonal -------------------- + +template +struct evaluator > + : evaluator_base > +{ + typedef Diagonal XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (unsigned int)evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit, + + Alignment = 0 + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) + : m_argImpl(diagonal.nestedExpression()), + m_index(diagonal.index()) + { } + + typedef typename XprType::Scalar Scalar; + // FIXME having to check whether ArgType is sparse here i not very nice. + typedef typename internal::conditional::value, + typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const + { + return m_argImpl.coeff(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index + rowOffset(), index + colOffset()); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) + { + return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); + } + +protected: + evaluator m_argImpl; + const internal::variable_if_dynamicindex m_index; + +private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } +}; + + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template class EvalToTemp; + +template +struct traits > + : public traits +{ }; + +template +class EvalToTemp + : public dense_xpr_base >::type +{ + public: + + typedef typename dense_xpr_base::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) + : m_arg(arg) + { } + + const ArgType& arg() const + { + return m_arg; + } + + Index rows() const + { + return m_arg.rows(); + } + + Index cols() const + { + return m_arg.cols(); + } + + private: + const ArgType& m_arg; +}; + +template +struct evaluator > + : public evaluator +{ + typedef EvalToTemp XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.arg()) + { + ::new (static_cast(this)) Base(m_result); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) + : m_result(arg) + { + ::new (static_cast(this)) Base(m_result); + } + +protected: + PlainObject m_result; +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREEVALUATORS_H diff --git a/nuparu/include/Eigen/src/Core/CoreIterators.h b/nuparu/include/Eigen/src/Core/CoreIterators.h index 6da4683d..4eb42b93 100644 --- a/nuparu/include/Eigen/src/Core/CoreIterators.h +++ b/nuparu/include/Eigen/src/Core/CoreIterators.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,47 +15,113 @@ namespace Eigen { /* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core */ -/** \ingroup SparseCore_Module - * \class InnerIterator - * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression - * - * todo +namespace internal { + +template +class inner_iterator_selector; + +} + +/** \class InnerIterator + * \brief An InnerIterator allows to loop over the element of any matrix expression. + * + * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed. + * + * TODO: add a usage example */ +template +class InnerIterator +{ +protected: + typedef internal::inner_iterator_selector::Kind> IteratorType; + typedef internal::evaluator EvaluatorType; + typedef typename internal::traits::Scalar Scalar; +public: + /** Construct an iterator over the \a outerId -th row or column of \a xpr */ + InnerIterator(const XprType &xpr, const Index &outerId) + : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize()) + {} + + /// \returns the value of the current coefficient. + EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); } + /** Increment the iterator \c *this to the next non-zero coefficient. + * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView + */ + EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } + /// \returns the column or row index of the current coefficient. + EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } + /// \returns the row index of the current coefficient. + EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); } + /// \returns the column index of the current coefficient. + EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); } + /// \returns \c true if the iterator \c *this still references a valid coefficient. + EIGEN_STRONG_INLINE operator bool() const { return m_iter; } + +protected: + EvaluatorType m_eval; + IteratorType m_iter; +private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix A; + // SparseMatrix::InnerIterator it(A,0); + template InnerIterator(const EigenBase&,Index outer); +}; + +namespace internal { -// generic version for dense matrix and expressions -template class DenseBase::InnerIterator +// Generic inner iterator implementation for dense objects +template +class inner_iterator_selector { - protected: - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - - enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit }; - public: - EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer) - : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize()) - {} - - EIGEN_STRONG_INLINE Scalar value() const - { - return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner) - : m_expression.coeff(m_inner, m_outer); - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; } - - EIGEN_STRONG_INLINE Index index() const { return m_inner; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } - - protected: - const Derived& m_expression; - Index m_inner; - const Index m_outer; - const Index m_end; +protected: + typedef evaluator EvaluatorType; + typedef typename traits::Scalar Scalar; + enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit }; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize) + : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize) + {} + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner) + : m_eval.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + +protected: + const EvaluatorType& m_eval; + Index m_inner; + const Index m_outer; + const Index m_end; }; +// For iterator-based evaluator, inner-iterator is already implemented as +// evaluator<>::InnerIterator +template +class inner_iterator_selector + : public evaluator::InnerIterator +{ +protected: + typedef typename evaluator::InnerIterator Base; + typedef evaluator EvaluatorType; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/) + : Base(eval, outerId) + {} +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_COREITERATORS_H diff --git a/nuparu/include/Eigen/src/Core/CwiseBinaryOp.h b/nuparu/include/Eigen/src/Core/CwiseBinaryOp.h index 586f77aa..e42c3031 100644 --- a/nuparu/include/Eigen/src/Core/CwiseBinaryOp.h +++ b/nuparu/include/Eigen/src/Core/CwiseBinaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -56,72 +56,51 @@ struct traits > typename Rhs::Scalar ) >::type Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; + typedef typename cwise_promote_storage_type::StorageKind, + typename traits::StorageKind, + BinaryOp>::ret StorageKind; + typedef typename promote_index_type::StorageIndex, + typename traits::StorageIndex>::type StorageIndex; typedef typename Lhs::Nested LhsNested; typedef typename Rhs::Nested RhsNested; typedef typename remove_reference::type _LhsNested; typedef typename remove_reference::type _RhsNested; enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - SameType = is_same::value, - StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), - Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( - HereditaryBits - | (int(LhsFlags) & int(RhsFlags) & - ( AlignedBit - | (StorageOrdersAgree ? LinearAccessBit : 0) - | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) - ) - ) - ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), - CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits::Cost + Flags = _LhsNested::Flags & RowMajorBit }; }; } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. -// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. -// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to -// add together a float matrix and a double matrix. -#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ - ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same::value)), \ - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - template class CwiseBinaryOpImpl; -template -class CwiseBinaryOp : internal::no_assignment_operator, +template +class CwiseBinaryOp : public CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type::StorageKind, - typename internal::traits::StorageKind>::ret> + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>, + internal::no_assignment_operator { public: + + typedef typename internal::remove_all::type Lhs; + typedef typename internal::remove_all::type Rhs; typedef typename CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type::StorageKind, - typename internal::traits::StorageKind>::ret>::Base Base; + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) - typedef typename internal::nested::type LhsNested; - typedef typename internal::nested::type RhsNested; + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; typedef typename internal::remove_reference::type _LhsNested; typedef typename internal::remove_reference::type _RhsNested; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp()) : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) { @@ -131,6 +110,7 @@ class CwiseBinaryOp : internal::no_assignment_operator, eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { // return the fixed size type if available to enable compile time optimizations if (internal::traits::type>::RowsAtCompileTime==Dynamic) @@ -138,6 +118,7 @@ class CwiseBinaryOp : internal::no_assignment_operator, else return m_lhs.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { // return the fixed size type if available to enable compile time optimizations if (internal::traits::type>::ColsAtCompileTime==Dynamic) @@ -147,10 +128,13 @@ class CwiseBinaryOp : internal::no_assignment_operator, } /** \returns the left hand side nested expression */ + EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; } /** \returns the right hand side nested expression */ + EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; } /** \returns the functor representing the binary operation */ + EIGEN_DEVICE_FUNC const BinaryOp& functor() const { return m_functor; } protected: @@ -159,41 +143,13 @@ class CwiseBinaryOp : internal::no_assignment_operator, const BinaryOp m_functor; }; -template -class CwiseBinaryOpImpl - : public internal::dense_xpr_base >::type +// Generic API dispatcher +template +class CwiseBinaryOpImpl + : public internal::generic_xpr_base >::type { - typedef CwiseBinaryOp Derived; - public: - - typedef typename internal::dense_xpr_base >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE( Derived ) - - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().lhs().coeff(rowId, colId), - derived().rhs().coeff(rowId, colId)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().lhs().template packet(rowId, colId), - derived().rhs().template packet(rowId, colId)); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().lhs().coeff(index), - derived().rhs().coeff(index)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().lhs().template packet(index), - derived().rhs().template packet(index)); - } +public: + typedef typename internal::generic_xpr_base >::type Base; }; /** replaces \c *this by \c *this - \a other. @@ -205,8 +161,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -219,11 +174,11 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } } // end namespace Eigen #endif // EIGEN_CWISE_BINARY_OP_H + diff --git a/nuparu/include/Eigen/src/Core/CwiseNullaryOp.h b/nuparu/include/Eigen/src/Core/CwiseNullaryOp.h index a93bab2d..2bc6933d 100644 --- a/nuparu/include/Eigen/src/Core/CwiseNullaryOp.h +++ b/nuparu/include/Eigen/src/Core/CwiseNullaryOp.h @@ -35,37 +35,35 @@ template struct traits > : traits { enum { - Flags = (traits::Flags - & ( HereditaryBits - | (functor_has_linear_access::ret ? LinearAccessBit : 0) - | (functor_traits::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), - CoeffReadCost = functor_traits::Cost + Flags = traits::Flags & RowMajorBit }; }; } template -class CwiseNullaryOp : internal::no_assignment_operator, - public internal::dense_xpr_base< CwiseNullaryOp >::type +class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp >::type, internal::no_assignment_operator { public: typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) - CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp()) - : m_rows(nbRows), m_cols(nbCols), m_functor(func) + EIGEN_DEVICE_FUNC + CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) + : m_rows(rows), m_cols(cols), m_functor(func) { - eigen_assert(nbRows >= 0 - && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows) - && nbCols >= 0 - && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols)); + eigen_assert(rows >= 0 + && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 + && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const { return m_functor(rowId, colId); @@ -77,6 +75,7 @@ class CwiseNullaryOp : internal::no_assignment_operator, return m_functor.packetOp(rowId, colId); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_functor(index); @@ -89,6 +88,7 @@ class CwiseNullaryOp : internal::no_assignment_operator, } /** \returns the functor representing the nullary operation */ + EIGEN_DEVICE_FUNC const NullaryOp& functor() const { return m_functor; } protected: @@ -113,10 +113,10 @@ class CwiseNullaryOp : internal::no_assignment_operator, */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) { - return CwiseNullaryOp(rows, cols, func); + return CwiseNullaryOp(rows, cols, func); } /** \returns an expression of a matrix defined by a custom functor \a func @@ -132,16 +132,19 @@ DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f * * The template parameter \a CustomNullaryOp is the type of the functor. * + * Here is an example with C++11 random generators: \include random_cpp11.cpp + * Output: \verbinclude random_cpp11.out + * * \sa class CwiseNullaryOp */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); - else return CwiseNullaryOp(size, 1, func); + if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); + else return CwiseNullaryOp(size, 1, func); } /** \returns an expression of a matrix defined by a custom functor \a func @@ -155,19 +158,19 @@ DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(const CustomNullaryOp& func) { - return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); + return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); } /** \returns an expression of a constant matrix of value \a value * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this DenseBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, - * it is redundant to pass \a nbRows and \a nbCols as arguments, so Zero() should be used + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used * instead. * * The template parameter \a CustomNullaryOp is the type of the functor. @@ -176,9 +179,9 @@ DenseBase::NullaryExpr(const CustomNullaryOp& func) */ template EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Constant(Index nbRows, Index nbCols, const Scalar& value) +DenseBase::Constant(Index rows, Index cols, const Scalar& value) { - return DenseBase::NullaryExpr(nbRows, nbCols, internal::scalar_constant_op(value)); + return DenseBase::NullaryExpr(rows, cols, internal::scalar_constant_op(value)); } /** \returns an expression of a constant matrix of value \a value @@ -242,7 +245,7 @@ EIGEN_STRONG_INLINE const typename DenseBase::SequentialLinSpacedReturn DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); } /** @@ -255,7 +258,7 @@ DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); } /** @@ -276,7 +279,7 @@ EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedRetu DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); } /** @@ -289,7 +292,7 @@ DenseBase::LinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); } /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ @@ -297,9 +300,10 @@ template bool DenseBase::isApproxToConstant (const Scalar& val, const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isApprox(this->coeff(i, j), val, prec)) + if(!internal::isApprox(self.coeff(i, j), val, prec)) return false; return true; } @@ -353,8 +357,8 @@ PlainObjectBase::setConstant(Index size, const Scalar& val) /** Resizes to the given size, and sets all coefficients in this expression to the given \a value. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * \param val the value to which all coefficients are set * * Example: \include Matrix_setConstant_int_int.cpp @@ -364,9 +368,9 @@ PlainObjectBase::setConstant(Index size, const Scalar& val) */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setConstant(Index nbRows, Index nbCols, const Scalar& val) +PlainObjectBase::setConstant(Index rows, Index cols, const Scalar& val) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(val); } @@ -387,7 +391,7 @@ template EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); + return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); } /** @@ -425,9 +429,9 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, */ template EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Zero(Index nbRows, Index nbCols) +DenseBase::Zero(Index rows, Index cols) { - return Constant(nbRows, nbCols, Scalar(0)); + return Constant(rows, cols, Scalar(0)); } /** \returns an expression of a zero vector. @@ -481,9 +485,10 @@ DenseBase::Zero() template bool DenseBase::isZero(const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; return true; } @@ -520,8 +525,8 @@ PlainObjectBase::setZero(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to zero. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setZero_int_int.cpp * Output: \verbinclude Matrix_setZero_int_int.out @@ -530,9 +535,9 @@ PlainObjectBase::setZero(Index newSize) */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setZero(Index nbRows, Index nbCols) +PlainObjectBase::setZero(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(Scalar(0)); } @@ -540,7 +545,7 @@ PlainObjectBase::setZero(Index nbRows, Index nbCols) /** \returns an expression of a matrix where all coefficients equal one. * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, @@ -554,9 +559,9 @@ PlainObjectBase::setZero(Index nbRows, Index nbCols) */ template EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Ones(Index nbRows, Index nbCols) +DenseBase::Ones(Index rows, Index cols) { - return Constant(nbRows, nbCols, Scalar(1)); + return Constant(rows, cols, Scalar(1)); } /** \returns an expression of a vector where all coefficients equal one. @@ -646,8 +651,8 @@ PlainObjectBase::setOnes(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to one. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setOnes_int_int.cpp * Output: \verbinclude Matrix_setOnes_int_int.out @@ -656,9 +661,9 @@ PlainObjectBase::setOnes(Index newSize) */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setOnes(Index nbRows, Index nbCols) +PlainObjectBase::setOnes(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(Scalar(1)); } @@ -666,7 +671,7 @@ PlainObjectBase::setOnes(Index nbRows, Index nbCols) /** \returns an expression of the identity matrix (not necessarily square). * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, @@ -680,9 +685,9 @@ PlainObjectBase::setOnes(Index nbRows, Index nbCols) */ template EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType -MatrixBase::Identity(Index nbRows, Index nbCols) +MatrixBase::Identity(Index rows, Index cols) { - return DenseBase::NullaryExpr(nbRows, nbCols, internal::scalar_identity_op()); + return DenseBase::NullaryExpr(rows, cols, internal::scalar_identity_op()); } /** \returns an expression of the identity matrix (not necessarily square). @@ -716,18 +721,19 @@ template bool MatrixBase::isIdentity (const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) { for(Index i = 0; i < rows(); ++i) { if(i == j) { - if(!internal::isApprox(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isApprox(self.coeff(i, j), static_cast(1), prec)) return false; } else { - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; } } @@ -740,6 +746,7 @@ namespace internal { template=16)> struct setIdentity_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Derived& run(Derived& m) { return m = Derived::Identity(m.rows(), m.cols()); @@ -749,7 +756,7 @@ struct setIdentity_impl template struct setIdentity_impl { - typedef typename Derived::Index Index; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Derived& run(Derived& m) { m.setZero(); @@ -776,8 +783,8 @@ EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() /** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setIdentity_int_int.cpp * Output: \verbinclude Matrix_setIdentity_int_int.out @@ -785,9 +792,9 @@ EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() */ template -EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index nbRows, Index nbCols) +EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index cols) { - derived().resize(nbRows, nbCols); + derived().resize(rows, cols); return setIdentity(); } diff --git a/nuparu/include/Eigen/src/Core/CwiseUnaryOp.h b/nuparu/include/Eigen/src/Core/CwiseUnaryOp.h index f2de749f..da1d1992 100644 --- a/nuparu/include/Eigen/src/Core/CwiseUnaryOp.h +++ b/nuparu/include/Eigen/src/Core/CwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -44,10 +44,7 @@ struct traits > typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference::type _XprTypeNested; enum { - Flags = _XprTypeNested::Flags & ( - HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits::PacketAccess ? PacketAccessBit : 0)), - CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits::Cost + Flags = _XprTypeNested::Flags & RowMajorBit }; }; } @@ -56,28 +53,34 @@ template class CwiseUnaryOpImpl; template -class CwiseUnaryOp : internal::no_assignment_operator, - public CwiseUnaryOpImpl::StorageKind> +class CwiseUnaryOp : public CwiseUnaryOpImpl::StorageKind>, internal::no_assignment_operator { public: typedef typename CwiseUnaryOpImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::remove_all::type NestedExpression; - inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + EIGEN_DEVICE_FUNC + explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); } /** \returns the functor representing the unary operation */ + EIGEN_DEVICE_FUNC const UnaryOp& functor() const { return m_functor; } /** \returns the nested expression */ + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } /** \returns the nested expression */ + EIGEN_DEVICE_FUNC typename internal::remove_all::type& nestedExpression() { return m_xpr.const_cast_derived(); } @@ -86,39 +89,13 @@ class CwiseUnaryOp : internal::no_assignment_operator, const UnaryOp m_functor; }; -// This is the generic implementation for dense storage. -// It can be used for any expression types implementing the dense concept. -template -class CwiseUnaryOpImpl - : public internal::dense_xpr_base >::type +// Generic API dispatcher +template +class CwiseUnaryOpImpl + : public internal::generic_xpr_base >::type { - public: - - typedef CwiseUnaryOp Derived; - typedef typename internal::dense_xpr_base >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) - - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().nestedExpression().coeff(rowId, colId)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet(rowId, colId)); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet(index)); - } +public: + typedef typename internal::generic_xpr_base >::type Base; }; } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/CwiseUnaryView.h b/nuparu/include/Eigen/src/Core/CwiseUnaryView.h index b2638d32..72244751 100644 --- a/nuparu/include/Eigen/src/Core/CwiseUnaryView.h +++ b/nuparu/include/Eigen/src/Core/CwiseUnaryView.h @@ -37,8 +37,8 @@ struct traits > typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all::type _MatrixTypeNested; enum { - Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), - CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits::Cost, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions MatrixTypeInnerStride = inner_stride_at_compile_time::ret, // need to cast the sizeof's from size_t to int explicitly, otherwise: // "error: no integral type can represent all of the enumerator values @@ -62,8 +62,9 @@ class CwiseUnaryView : public CwiseUnaryViewImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::remove_all::type NestedExpression; - inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) + explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) : m_matrix(mat), m_functor(func) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) @@ -83,11 +84,19 @@ class CwiseUnaryView : public CwiseUnaryViewImpl::type m_matrix; + typename internal::ref_selector::type m_matrix; ViewOp m_functor; }; +// Generic API dispatcher +template +class CwiseUnaryViewImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + template class CwiseUnaryViewImpl : public internal::dense_xpr_base< CwiseUnaryView >::type @@ -100,38 +109,18 @@ class CwiseUnaryViewImpl EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) - inline Scalar* data() { return &coeffRef(0); } - inline const Scalar* data() const { return &coeff(0); } + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const - { - return derived().functor()(derived().nestedExpression().coeff(row, col)); - } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index)); - } }; } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/DenseBase.h b/nuparu/include/Eigen/src/Core/DenseBase.h index c5800f6c..e181dafa 100644 --- a/nuparu/include/Eigen/src/Core/DenseBase.h +++ b/nuparu/include/Eigen/src/Core/DenseBase.h @@ -40,31 +40,43 @@ static inline void check_DenseIndex_is_signed() { */ template class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real> + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + DenseCoeffsBase > #else : public DenseCoeffsBase #endif // not EIGEN_PARSED_BY_DOXYGEN { public: - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - class InnerIterator; + /** Inner iterator type to iterate over the coefficients of a row or column. + * \sa class InnerIterator + */ + typedef Eigen::InnerIterator InnerIterator; typedef typename internal::traits::StorageKind StorageKind; - /** \brief The type of indices - * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. - * \sa \ref TopicPreprocessorDirectives. - */ - typedef typename internal::traits::Index Index; + /** + * \brief The type used to store indices + * \details This typedef is relevant for types that store multiple indices such as + * PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index + * \sa \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. + */ + typedef typename internal::traits::StorageIndex StorageIndex; + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. */ typedef typename internal::traits::Scalar Scalar; - typedef typename internal::packet_traits::type PacketScalar; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. + * + * It is an alias for the Scalar type */ + typedef Scalar value_type; + typedef typename NumTraits::Real RealScalar; + typedef internal::special_scalar_op_base > Base; - typedef DenseCoeffsBase Base; + using Base::operator*; + using Base::operator/; using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -74,16 +86,6 @@ template class DenseBase using Base::colIndexByOuterInner; using Base::coeff; using Base::coeffByOuterInner; - using Base::packet; - using Base::packetByOuterInner; - using Base::writePacket; - using Base::writePacketByOuterInner; - using Base::coeffRef; - using Base::coeffRefByOuterInner; - using Base::copyCoeff; - using Base::copyCoeffByOuterInner; - using Base::copyPacket; - using Base::copyPacketByOuterInner; using Base::operator(); using Base::operator[]; using Base::x; @@ -169,30 +171,54 @@ template class DenseBase InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), - CoeffReadCost = internal::traits::CoeffReadCost, - /**< This is a rough measure of how expensive it is to read one coefficient from - * this expression. - */ - InnerStrideAtCompileTime = internal::inner_stride_at_compile_time::ret, OuterStrideAtCompileTime = internal::outer_stride_at_compile_time::ret }; + + typedef typename internal::find_best_packet::type PacketScalar; - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 0 }; + + /** The plain matrix type corresponding to this expression. + * \sa PlainObject */ + typedef Matrix::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainMatrix; + + /** The plain array type corresponding to this expression. + * \sa PlainObject */ + typedef Array::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainArray; + + /** \brief The plain matrix or array type corresponding to this expression. + * + * This is not necessarily exactly the return type of eval(). In the case of plain matrices, + * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed + * that the return type of eval() is either PlainObject or const PlainObject&. + */ + typedef typename internal::conditional::XprKind,MatrixXpr >::value, + PlainMatrix, PlainArray>::type PlainObject; /** \returns the number of nonzero coefficients which is in practice the number * of stored coefficients. */ + EIGEN_DEVICE_FUNC inline Index nonZeros() const { return size(); } - /** \returns true if either the number of rows or the number of columns is equal to 1. - * In other words, this function returns - * \code rows()==1 || cols()==1 \endcode - * \sa rows(), cols(), IsVectorAtCompileTime. */ /** \returns the outer size. * * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a * column-major matrix, and the number of rows for a row-major matrix. */ + EIGEN_DEVICE_FUNC Index outerSize() const { return IsVectorAtCompileTime ? 1 @@ -204,6 +230,7 @@ template class DenseBase * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a * column-major matrix, and the number of columns for a row-major matrix. */ + EIGEN_DEVICE_FUNC Index innerSize() const { return IsVectorAtCompileTime ? this->size() @@ -214,6 +241,7 @@ template class DenseBase * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does * nothing else. */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { EIGEN_ONLY_USED_FOR_DEBUG(newSize); @@ -224,22 +252,22 @@ template class DenseBase * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does * nothing else. */ - void resize(Index nbRows, Index nbCols) + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { - EIGEN_ONLY_USED_FOR_DEBUG(nbRows); - EIGEN_ONLY_USED_FOR_DEBUG(nbCols); - eigen_assert(nbRows == this->rows() && nbCols == this->cols() + EIGEN_ONLY_USED_FOR_DEBUG(rows); + EIGEN_ONLY_USED_FOR_DEBUG(cols); + eigen_assert(rows == this->rows() && cols == this->cols() && "DenseBase::resize() does not actually allow to resize."); } #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ - typedef CwiseNullaryOp,Derived> SequentialLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> SequentialLinSpacedReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ - typedef CwiseNullaryOp,Derived> RandomAccessLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> RandomAccessLinSpacedReturnType; /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix::Scalar>::Real, internal::traits::ColsAtCompileTime, 1> EigenvaluesReturnType; @@ -247,110 +275,122 @@ template class DenseBase /** Copies \a other into *this. \returns a reference to *this. */ template + EIGEN_DEVICE_FUNC Derived& operator=(const DenseBase& other); /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC Derived& operator=(const DenseBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator+=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator-=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& func); -#ifndef EIGEN_PARSED_BY_DOXYGEN - /** Copies \a other into *this without evaluating other. \returns a reference to *this. */ + /** \ínternal + * Copies \a other into *this without evaluating other. \returns a reference to *this. + * \deprecated */ template + EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase& other); -#endif // not EIGEN_PARSED_BY_DOXYGEN + EIGEN_DEVICE_FUNC CommaInitializer operator<< (const Scalar& s); + /** \deprecated it now returns \c *this */ template - const Flagged flagged() const; + EIGEN_DEPRECATED + const Derived& flagged() const + { return derived(); } template + EIGEN_DEVICE_FUNC CommaInitializer operator<< (const DenseBase& other); - Eigen::Transpose transpose(); - typedef typename internal::add_const >::type ConstTransposeReturnType; + typedef Transpose TransposeReturnType; + EIGEN_DEVICE_FUNC + TransposeReturnType transpose(); + typedef typename internal::add_const >::type ConstTransposeReturnType; + EIGEN_DEVICE_FUNC ConstTransposeReturnType transpose() const; + EIGEN_DEVICE_FUNC void transposeInPlace(); -#ifndef EIGEN_NO_DEBUG - protected: - template - void checkTransposeAliasing(const OtherDerived& other) const; - public: -#endif - - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index rows, Index cols, const Scalar& value); - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index size, const Scalar& value); - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(const Scalar& value); - static const SequentialLinSpacedReturnType + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high); - static const RandomAccessLinSpacedReturnType + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Index size, const Scalar& low, const Scalar& high); - static const SequentialLinSpacedReturnType + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType LinSpaced(Sequential_t, const Scalar& low, const Scalar& high); - static const RandomAccessLinSpacedReturnType + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(const Scalar& low, const Scalar& high); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(Index size, const CustomNullaryOp& func); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(const CustomNullaryOp& func); - static const ConstantReturnType Zero(Index rows, Index cols); - static const ConstantReturnType Zero(Index size); - static const ConstantReturnType Zero(); - static const ConstantReturnType Ones(Index rows, Index cols); - static const ConstantReturnType Ones(Index size); - static const ConstantReturnType Ones(); - - void fill(const Scalar& value); - Derived& setConstant(const Scalar& value); - Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); - Derived& setLinSpaced(const Scalar& low, const Scalar& high); - Derived& setZero(); - Derived& setOnes(); - Derived& setRandom(); - - template + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(); + + EIGEN_DEVICE_FUNC void fill(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setZero(); + EIGEN_DEVICE_FUNC Derived& setOnes(); + EIGEN_DEVICE_FUNC Derived& setRandom(); + + template EIGEN_DEVICE_FUNC bool isApprox(const DenseBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const RealScalar& other, const RealScalar& prec = NumTraits::dummy_precision()) const; - template + template EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const DenseBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; inline bool hasNaN() const; inline bool allFinite() const; + EIGEN_DEVICE_FUNC inline Derived& operator*=(const Scalar& other); + EIGEN_DEVICE_FUNC inline Derived& operator/=(const Scalar& other); typedef typename internal::add_const_on_value_type::type>::type EvalReturnType; @@ -358,7 +398,10 @@ template class DenseBase * * Notice that in the case of a plain matrix or vector (not an expression) this function just returns * a const reference, in order to avoid a useless copy. + * + * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvalReturnType eval() const { // Even though MSVC does not honor strong inlining when the return type @@ -366,61 +409,68 @@ template class DenseBase // size types on MSVC. return typename internal::eval::type(derived()); } - + /** swaps *this with the expression \a other. * */ template - void swap(const DenseBase& other, - int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase) + EIGEN_DEVICE_FUNC + void swap(const DenseBase& other) { - SwapWrapper(derived()).lazyAssign(other.derived()); + EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } /** swaps *this with the matrix or array \a other. * */ template + EIGEN_DEVICE_FUNC void swap(PlainObjectBase& other) { - SwapWrapper(derived()).lazyAssign(other.derived()); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.derived(), internal::swap_assign_op()); } + EIGEN_DEVICE_FUNC inline const NestByValue nestByValue() const; + EIGEN_DEVICE_FUNC inline const ForceAlignedAccess forceAlignedAccess() const; + EIGEN_DEVICE_FUNC inline ForceAlignedAccess forceAlignedAccess(); + template EIGEN_DEVICE_FUNC + inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; + template EIGEN_DEVICE_FUNC + inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); - inline const NestByValue nestByValue() const; - inline const ForceAlignedAccess forceAlignedAccess() const; - inline ForceAlignedAccess forceAlignedAccess(); - template inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; - template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + EIGEN_DEVICE_FUNC Scalar sum() const; + EIGEN_DEVICE_FUNC Scalar mean() const; + EIGEN_DEVICE_FUNC Scalar trace() const; - Scalar sum() const; - Scalar mean() const; - Scalar trace() const; + EIGEN_DEVICE_FUNC Scalar prod() const; - Scalar prod() const; + EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff() const; + EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff() const; - typename internal::traits::Scalar minCoeff() const; - typename internal::traits::Scalar maxCoeff() const; - - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* index) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* index) const; template - typename internal::result_of::Scalar)>::type - redux(const BinaryOp& func) const; + EIGEN_DEVICE_FUNC + Scalar redux(const BinaryOp& func) const; template + EIGEN_DEVICE_FUNC void visit(Visitor& func) const; inline const WithFormat format(const IOFormat& fmt) const; /** \returns the unique coefficient of a 1x1 expression */ + EIGEN_DEVICE_FUNC CoeffReturnType value() const { EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) @@ -428,8 +478,8 @@ template class DenseBase return derived().coeff(0,0); } - bool all(void) const; - bool any(void) const; + bool all() const; + bool any() const; Index count() const; typedef VectorwiseOp RowwiseReturnType; @@ -437,14 +487,35 @@ template class DenseBase typedef VectorwiseOp ColwiseReturnType; typedef const VectorwiseOp ConstColwiseReturnType; - ConstRowwiseReturnType rowwise() const; - RowwiseReturnType rowwise(); - ConstColwiseReturnType colwise() const; - ColwiseReturnType colwise(); + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_rowwise.cpp + * Output: \verbinclude MatrixBase_rowwise.out + * + * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const { + return ConstRowwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC RowwiseReturnType rowwise(); + + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_colwise.cpp + * Output: \verbinclude MatrixBase_colwise.out + * + * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const { + return ConstColwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC ColwiseReturnType colwise(); - static const CwiseNullaryOp,Derived> Random(Index rows, Index cols); - static const CwiseNullaryOp,Derived> Random(Index size); - static const CwiseNullaryOp,Derived> Random(); + typedef CwiseNullaryOp,PlainObject> RandomReturnType; + static const RandomReturnType Random(Index rows, Index cols); + static const RandomReturnType Random(Index size); + static const RandomReturnType Random(); template const Select @@ -462,14 +533,33 @@ template class DenseBase template RealScalar lpNorm() const; template + EIGEN_DEVICE_FUNC const Replicate replicate() const; - const Replicate replicate(Index rowFacor,Index colFactor) const; + /** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate_int_int.cpp + * Output: \verbinclude MatrixBase_replicate_int_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC + const Replicate replicate(Index rowFactor, Index colFactor) const + { + return Replicate(derived(), rowFactor, colFactor); + } typedef Reverse ReverseReturnType; typedef const Reverse ConstReverseReturnType; - ReverseReturnType reverse(); - ConstReverseReturnType reverse() const; - void reverseInPlace(); + EIGEN_DEVICE_FUNC ReverseReturnType reverse(); + /** This is the const version of reverse(). */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const + { + return ConstReverseReturnType(derived()); + } + EIGEN_DEVICE_FUNC void reverseInPlace(); #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase # include "../plugins/BlockMethods.h" @@ -478,27 +568,18 @@ template class DenseBase # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS -#ifdef EIGEN2_SUPPORT - - Block corner(CornerType type, Index cRows, Index cCols); - const Block corner(CornerType type, Index cRows, Index cCols) const; - template - Block corner(CornerType type); - template - const Block corner(CornerType type) const; - -#endif // EIGEN2_SUPPORT - // disable the use of evalTo for dense objects with a nice compilation error - template inline void evalTo(Dest& ) const + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& ) const { EIGEN_STATIC_ASSERT((internal::is_same::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); } protected: /** Default constructor. Do nothing. */ - DenseBase() + EIGEN_DEVICE_FUNC DenseBase() { /* Just checks for self-consistency of the flags. * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down @@ -511,9 +592,9 @@ template class DenseBase } private: - explicit DenseBase(int); - DenseBase(int,int); - template explicit DenseBase(const DenseBase&); + EIGEN_DEVICE_FUNC explicit DenseBase(int); + EIGEN_DEVICE_FUNC DenseBase(int,int); + template EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase&); }; } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/DenseCoeffsBase.h b/nuparu/include/Eigen/src/Core/DenseCoeffsBase.h index 3c890f21..820a90e6 100644 --- a/nuparu/include/Eigen/src/Core/DenseCoeffsBase.h +++ b/nuparu/include/Eigen/src/Core/DenseCoeffsBase.h @@ -35,7 +35,6 @@ class DenseCoeffsBase : public EigenBase { public: typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; @@ -61,6 +60,7 @@ class DenseCoeffsBase : public EigenBase using Base::size; using Base::derived; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const { return int(Derived::RowsAtCompileTime) == 1 ? 0 @@ -69,6 +69,7 @@ class DenseCoeffsBase : public EigenBase : inner; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const { return int(Derived::ColsAtCompileTime) == 1 ? 0 @@ -91,13 +92,15 @@ class DenseCoeffsBase : public EigenBase * * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeff(row, col); + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeff(row,col); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { return coeff(rowIndexByOuterInner(outer, inner), @@ -108,11 +111,12 @@ class DenseCoeffsBase : public EigenBase * * \sa operator()(Index,Index), operator[](Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeff(row, col); + return coeff(row, col); } /** Short version: don't use this function, use @@ -130,11 +134,14 @@ class DenseCoeffsBase : public EigenBase * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); - return derived().coeff(index); + return internal::evaluator(derived()).coeff(index); } @@ -146,15 +153,14 @@ class DenseCoeffsBase : public EigenBase * z() const, w() const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator[](Index index) const { - #ifndef EIGEN2_SUPPORT EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) - #endif eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** \returns the coefficient at given index. @@ -167,30 +173,35 @@ class DenseCoeffsBase : public EigenBase * z() const, w() const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index index) const { eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** equivalent to operator[](0). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType x() const { return (*this)[0]; } /** equivalent to operator[](1). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType y() const { return (*this)[1]; } /** equivalent to operator[](2). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType z() const { return (*this)[2]; } /** equivalent to operator[](3). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType w() const { return (*this)[3]; } @@ -207,9 +218,9 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().template packet(row,col); + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return internal::evaluator(derived()).template packet(row,col); } @@ -234,8 +245,11 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + typedef typename internal::packet_traits::type DefaultPacketType; eigen_internal_assert(index >= 0 && index < size()); - return derived().template packet(index); + return internal::evaluator(derived()).template packet(index); } protected: @@ -278,7 +292,6 @@ class DenseCoeffsBase : public DenseCoeffsBase Base; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -311,13 +324,15 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeffRef(row,col); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRefByOuterInner(Index outer, Index inner) { @@ -330,12 +345,13 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + return coeffRef(row, col); } @@ -354,11 +370,14 @@ class DenseCoeffsBase : public DenseCoeffsBase::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); - return derived().coeffRef(index); + return internal::evaluator(derived()).coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -368,15 +387,14 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -388,167 +406,37 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** equivalent to operator[](0). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& x() { return (*this)[0]; } /** equivalent to operator[](1). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& y() { return (*this)[1]; } /** equivalent to operator[](2). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& z() { return (*this)[2]; } /** equivalent to operator[](3). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& w() { return (*this)[3]; } - - /** \internal - * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit. - * - * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - - template - EIGEN_STRONG_INLINE void writePacket - (Index row, Index col, const typename internal::packet_traits::type& val) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket(row,col,val); - } - - - /** \internal */ - template - EIGEN_STRONG_INLINE void writePacketByOuterInner - (Index outer, Index inner, const typename internal::packet_traits::type& val) - { - writePacket(rowIndexByOuterInner(outer, inner), - colIndexByOuterInner(outer, inner), - val); - } - - /** \internal - * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit and the LinearAccessBit. - * - * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - template - EIGEN_STRONG_INLINE void writePacket - (Index index, const typename internal::packet_traits::type& val) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket(index,val); - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - - /** \internal Copies the coefficient at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().coeffRef(row, col) = other.derived().coeff(row, col); - } - - /** \internal Copies the coefficient at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().coeffRef(index) = other.derived().coeff(index); - } - - - template - EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().copyCoeff(row, col, other); - } - - /** \internal Copies the packet at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket(row, col, - other.derived().template packet(row, col)); - } - - /** \internal Copies the packet at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket(index, - other.derived().template packet(index)); - } - - /** \internal */ - template - EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other); - } -#endif - }; /** \brief Base class providing direct read-only coefficient access to matrices and arrays. @@ -568,7 +456,6 @@ class DenseCoeffsBase : public DenseCoeffsBase Base; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename NumTraits::Real RealScalar; @@ -581,6 +468,7 @@ class DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase public: typedef DenseCoeffsBase Base; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename NumTraits::Real RealScalar; @@ -652,6 +542,7 @@ class DenseCoeffsBase * * \sa outerStride(), rowStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().innerStride(); @@ -662,6 +553,7 @@ class DenseCoeffsBase * * \sa innerStride(), rowStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().outerStride(); @@ -677,6 +569,7 @@ class DenseCoeffsBase * * \sa innerStride(), outerStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index rowStride() const { return Derived::IsRowMajor ? outerStride() : innerStride(); @@ -686,6 +579,7 @@ class DenseCoeffsBase * * \sa innerStride(), outerStride(), rowStride() */ + EIGEN_DEVICE_FUNC inline Index colStride() const { return Derived::IsRowMajor ? innerStride() : outerStride(); @@ -694,33 +588,42 @@ class DenseCoeffsBase namespace internal { -template +template struct first_aligned_impl { - static inline typename Derived::Index run(const Derived&) + static inline Index run(const Derived&) { return 0; } }; -template -struct first_aligned_impl +template +struct first_aligned_impl { - static inline typename Derived::Index run(const Derived& m) + static inline Index run(const Derived& m) { - return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size()); + return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size()); } }; -/** \internal \returns the index of the first element of the array that is well aligned for vectorization. +/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization. + * + * \tparam Alignment requested alignment in Bytes. * * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more * documentation. */ +template +static inline Index first_aligned(const DenseBase& m) +{ + enum { ReturnZero = (int(evaluator::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) }; + return first_aligned_impl::run(m.derived()); +} + template -static inline typename Derived::Index first_aligned(const Derived& m) +static inline Index first_default_aligned(const DenseBase& m) { - return first_aligned_impl - - ::run(m); + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type DefaultPacketType; + return internal::first_aligned::alignment),Derived>(m); } template::ret> diff --git a/nuparu/include/Eigen/src/Core/DenseStorage.h b/nuparu/include/Eigen/src/Core/DenseStorage.h index 3e7f9c1b..34048461 100644 --- a/nuparu/include/Eigen/src/Core/DenseStorage.h +++ b/nuparu/include/Eigen/src/Core/DenseStorage.h @@ -3,7 +3,7 @@ // // Copyright (C) 2008 Gael Guennebaud // Copyright (C) 2006-2009 Benoit Jacob -// Copyright (C) 2010 Hauke Heibel +// Copyright (C) 2010-2013 Hauke Heibel // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -24,26 +24,37 @@ namespace internal { struct constructor_without_unaligned_array_assert {}; +template +EIGEN_DEVICE_FUNC +void check_static_allocation_size() +{ + // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit + #if EIGEN_STACK_ALLOCATION_LIMIT + EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + #endif +} + /** \internal * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: * to 16 bytes boundary if the total size is a multiple of 16 bytes. */ template + : compute_default_alignment::value > struct plain_array { T array[Size]; - plain_array() + EIGEN_DEVICE_FUNC + plain_array() { - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + check_static_allocation_size(); } - plain_array(constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) { - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + check_static_allocation_size(); } }; @@ -56,41 +67,100 @@ struct plain_array template EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; } #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((reinterpret_cast(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \ + eigen_assert((reinterpret_cast(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \ && "this assertion is explained here: " \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ " **** READ THIS WEB PAGE !!! ****"); #else #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((reinterpret_cast(array) & sizemask) == 0 \ + eigen_assert((reinterpret_cast(array) & (sizemask)) == 0 \ && "this assertion is explained here: " \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ " **** READ THIS WEB PAGE !!! ****"); #endif +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + template struct plain_array { - EIGEN_USER_ALIGN16 T array[Size]; + EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; + EIGEN_DEVICE_FUNC plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf); - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); + check_static_allocation_size(); } + EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) { - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + check_static_allocation_size(); } }; template struct plain_array { - EIGEN_USER_ALIGN16 T array[1]; - plain_array() {} - plain_array(constructor_without_unaligned_array_assert) {} + T array[1]; + EIGEN_DEVICE_FUNC plain_array() {} + EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {} }; } // end namespace internal @@ -114,33 +184,50 @@ template class DenseSt { internal::plain_array m_data; public: - inline DenseStorage() {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()) {} - inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } - static inline DenseIndex rows(void) {return _Rows;} - static inline DenseIndex cols(void) {return _Cols;} - inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {} - inline void resize(DenseIndex,DenseIndex,DenseIndex) {} - inline const T *data() const { return m_data.array; } - inline T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC + DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} + EIGEN_DEVICE_FUNC + DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) m_data = other.m_data; + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols); + EIGEN_UNUSED_VARIABLE(size); + EIGEN_UNUSED_VARIABLE(rows); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // null matrix template class DenseStorage { public: - inline DenseStorage() {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) {} - inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} - inline void swap(DenseStorage& ) {} - static inline DenseIndex rows(void) {return _Rows;} - static inline DenseIndex cols(void) {return _Cols;} - inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {} - inline void resize(DenseIndex,DenseIndex,DenseIndex) {} - inline const T *data() const { return 0; } - inline T *data() { return 0; } + EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; } + EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {} + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return 0; } + EIGEN_DEVICE_FUNC T *data() { return 0; } }; // more specializations for null matrices; these are necessary to resolve ambiguities @@ -157,86 +244,157 @@ template class DenseStorage class DenseStorage { internal::plain_array m_data; - DenseIndex m_rows; - DenseIndex m_cols; + Index m_rows; + Index m_cols; public: - inline DenseStorage() : m_rows(0), m_cols(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} - inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {} - inline void swap(DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + m_cols = other.m_cols; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } - inline DenseIndex rows() const {return m_rows;} - inline DenseIndex cols() const {return m_cols;} - inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; } - inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; } - inline const T *data() const { return m_data.array; } - inline T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC Index rows() const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols() const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // dynamic-size matrix with fixed-size storage and fixed width template class DenseStorage { internal::plain_array m_data; - DenseIndex m_rows; + Index m_rows; public: - inline DenseStorage() : m_rows(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} - inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {} - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } - inline DenseIndex rows(void) const {return m_rows;} - inline DenseIndex cols(void) const {return _Cols;} - inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; } - inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; } - inline const T *data() const { return m_data.array; } - inline T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // dynamic-size matrix with fixed-size storage and fixed height template class DenseStorage { internal::plain_array m_data; - DenseIndex m_cols; + Index m_cols; public: - inline DenseStorage() : m_cols(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} - inline DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {} - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } - inline DenseIndex rows(void) const {return _Rows;} - inline DenseIndex cols(void) const {return m_cols;} - inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; } - inline void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; } - inline const T *data() const { return m_data.array; } - inline T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_cols = other.m_cols; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index, Index, Index cols) { m_cols = cols; } + void resize(Index, Index, Index cols) { m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // purely dynamic matrix. template class DenseStorage { T *m_data; - DenseIndex m_rows; - DenseIndex m_cols; + Index m_rows; + Index m_cols; public: - inline DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0), m_cols(0) {} - inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) - : m_data(internal::conditional_aligned_new_auto(size)), m_rows(nbRows), m_cols(nbCols) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } - inline ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } - inline void swap(DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) + : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*other.m_cols)) + , m_rows(other.m_rows) + , m_cols(other.m_cols) + { + internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + other.m_rows = 0; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + swap(m_cols, other.m_cols); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } - inline DenseIndex rows(void) const {return m_rows;} - inline DenseIndex cols(void) const {return m_cols;} - inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index size, Index rows, Index cols) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*m_cols); - m_rows = nbRows; - m_cols = nbCols; + m_rows = rows; + m_cols = cols; } - void resize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols) { if(size != m_rows*m_cols) { @@ -247,33 +405,70 @@ template class DenseStorage class DenseStorage { T *m_data; - DenseIndex m_cols; + Index m_cols; public: - inline DenseStorage() : m_data(0), m_cols(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} - inline DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(nbCols) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } - inline ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } - static inline DenseIndex rows(void) {return _Rows;} - inline DenseIndex cols(void) const {return m_cols;} - inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0); + EIGEN_UNUSED_VARIABLE(rows); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(_Rows*other.m_cols)) + , m_cols(other.m_cols) + { + internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_cols, other.m_cols); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, _Rows*m_cols); - m_cols = nbCols; + m_cols = cols; } - EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex nbCols) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) { if(size != _Rows*m_cols) { @@ -284,32 +479,69 @@ template class DenseStorage class DenseStorage { T *m_data; - DenseIndex m_rows; + Index m_rows; public: - inline DenseStorage() : m_data(0), m_rows(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} - inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(nbRows) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } - inline ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } - inline DenseIndex rows(void) const {return m_rows;} - static inline DenseIndex cols(void) {return _Cols;} - inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*_Cols)) + , m_rows(other.m_rows) + { + internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + { + other.m_data = nullptr; + other.m_rows = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + void conservativeResize(Index size, Index rows, Index) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*_Cols); - m_rows = nbRows; + m_rows = rows; } - EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex nbRows, DenseIndex) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) { if(size != m_rows*_Cols) { @@ -320,10 +552,10 @@ template class DenseStorage struct traits > : traits { - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; typedef typename MatrixType::StorageKind StorageKind; enum { @@ -52,8 +52,7 @@ struct traits > MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), MaxColsAtCompileTime = 1, MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, - Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions MatrixTypeOuterStride = outer_stride_at_compile_time::ret, InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, OuterStrideAtCompileTime = 0 @@ -70,20 +69,28 @@ template class Diagonal typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) - inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + EIGEN_DEVICE_FUNC + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) + EIGEN_DEVICE_FUNC inline Index rows() const - { return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); } + { + return m_index.value()<0 ? numext::mini(m_matrix.cols(),m_matrix.rows()+m_index.value()) + : numext::mini(m_matrix.rows(),m_matrix.cols()-m_index.value()); + } + EIGEN_DEVICE_FUNC inline Index cols() const { return 1; } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.outerStride() + 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return 0; @@ -95,48 +102,58 @@ template class Diagonal const Scalar >::type ScalarWithConstIfNotLvalue; + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index) const { return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index) const { return m_matrix.coeff(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index idx) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index idx) const { return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index idx) const { return m_matrix.coeff(idx+rowOffset(), idx+colOffset()); } - const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC + inline const typename internal::remove_all::type& nestedExpression() const { return m_matrix; } - int index() const + EIGEN_DEVICE_FUNC + inline Index index() const { return m_index.value(); } @@ -147,10 +164,13 @@ template class Diagonal private: // some compilers may fail to optimize std::max etc in case of compile-time constants... + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } - // triger a compile time error is someone try to call packet + // trigger a compile-time error if someone try to call packet template typename MatrixType::PacketReturnType packet(Index) const; template typename MatrixType::PacketReturnType packet(Index,Index) const; }; @@ -167,7 +187,7 @@ template inline typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { - return derived(); + return DiagonalReturnType(derived()); } /** This is the const version of diagonal(). */ @@ -190,18 +210,18 @@ MatrixBase::diagonal() const * * \sa MatrixBase::diagonal(), class Diagonal */ template -inline typename MatrixBase::template DiagonalIndexReturnType::Type +inline typename MatrixBase::DiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) { - return typename DiagonalIndexReturnType::Type(derived(), index); + return DiagonalDynamicIndexReturnType(derived(), index); } /** This is the const version of diagonal(Index). */ template -inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) const { - return typename ConstDiagonalIndexReturnType::Type(derived(), index); + return ConstDiagonalDynamicIndexReturnType(derived(), index); } /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this @@ -216,20 +236,20 @@ MatrixBase::diagonal(Index index) const * * \sa MatrixBase::diagonal(), class Diagonal */ template -template -inline typename MatrixBase::template DiagonalIndexReturnType::Type +template +inline typename MatrixBase::template DiagonalIndexReturnType::Type MatrixBase::diagonal() { - return derived(); + return typename DiagonalIndexReturnType::Type(derived()); } /** This is the const version of diagonal(). */ template -template -inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +template +inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type MatrixBase::diagonal() const { - return derived(); + return typename ConstDiagonalIndexReturnType::Type(derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/DiagonalMatrix.h b/nuparu/include/Eigen/src/Core/DiagonalMatrix.h index e6c220f4..5a9e3abd 100644 --- a/nuparu/include/Eigen/src/Core/DiagonalMatrix.h +++ b/nuparu/include/Eigen/src/Core/DiagonalMatrix.h @@ -22,7 +22,7 @@ class DiagonalBase : public EigenBase typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::RealScalar RealScalar; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, @@ -30,79 +30,62 @@ class DiagonalBase : public EigenBase MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, IsVectorAtCompileTime = 0, - Flags = 0 + Flags = NoPreferredStorageOrderBit }; typedef Matrix DenseMatrixType; typedef DenseMatrixType DenseType; typedef DiagonalMatrix PlainObject; + EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - template - void evalTo(MatrixBase &other) const; - template - void addTo(MatrixBase &other) const - { other.diagonal() += diagonal(); } - template - void subTo(MatrixBase &other) const - { other.diagonal() -= diagonal(); } - + + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } + EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return derived().diagonal(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return diagonal().size(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return diagonal().size(); } - /** \returns the diagonal matrix product of \c *this by the matrix \a matrix. - */ template - const DiagonalProduct + EIGEN_DEVICE_FUNC + const Product operator*(const MatrixBase &matrix) const { - return DiagonalProduct(matrix.derived(), derived()); + return Product(derived(),matrix.derived()); } - inline const DiagonalWrapper, const DiagonalVectorType> > + typedef DiagonalWrapper, const DiagonalVectorType> > InverseReturnType; + EIGEN_DEVICE_FUNC + inline const InverseReturnType inverse() const { - return diagonal().cwiseInverse(); + return InverseReturnType(diagonal().cwiseInverse()); } - inline const DiagonalWrapper, const DiagonalVectorType> > + typedef DiagonalWrapper, const DiagonalVectorType> > ScalarMultipleReturnType; + EIGEN_DEVICE_FUNC + inline const ScalarMultipleReturnType operator*(const Scalar& scalar) const { - return diagonal() * scalar; + return ScalarMultipleReturnType(diagonal() * scalar); } - friend inline const DiagonalWrapper, const DiagonalVectorType> > + EIGEN_DEVICE_FUNC + friend inline const ScalarMultipleReturnType operator*(const Scalar& scalar, const DiagonalBase& other) { - return other.diagonal() * scalar; - } - - #ifdef EIGEN2_SUPPORT - template - bool isApprox(const DiagonalBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const - { - return diagonal().isApprox(other.diagonal(), precision); + return ScalarMultipleReturnType(other.diagonal() * scalar); } - template - bool isApprox(const MatrixBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const - { - return toDenseMatrix().isApprox(other, precision); - } - #endif }; -template -template -void DiagonalBase::evalTo(MatrixBase &other) const -{ - other.setZero(); - other.diagonal() = diagonal(); -} #endif /** \class DiagonalMatrix @@ -124,10 +107,9 @@ struct traits > : traits > { typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; - typedef Dense StorageKind; - typedef DenseIndex Index; + typedef DiagonalShape StorageKind; enum { - Flags = LvalueBit + Flags = LvalueBit | NoPreferredStorageOrderBit }; }; } @@ -141,7 +123,7 @@ class DiagonalMatrix typedef const DiagonalMatrix& Nested; typedef _Scalar Scalar; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; #endif protected: @@ -151,24 +133,31 @@ class DiagonalMatrix public: /** const version of diagonal(). */ + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return m_diagonal; } /** \returns a reference to the stored vector of diagonal coefficients. */ + EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return m_diagonal; } /** Default constructor without initialization */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix() {} /** Constructs a diagonal matrix with given dimension */ - inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + EIGEN_DEVICE_FUNC + explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} /** 2D constructor. */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {} /** 3D constructor. */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {} /** Copy constructor. */ template + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const DiagonalBase& other) : m_diagonal(other.diagonal()) {} #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -178,11 +167,13 @@ class DiagonalMatrix /** generic constructor from expression of the diagonal coefficients */ template + EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(const MatrixBase& other) : m_diagonal(other) {} /** Copy operator. */ template + EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalBase& other) { m_diagonal = other.diagonal(); @@ -193,6 +184,7 @@ class DiagonalMatrix /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalMatrix& other) { m_diagonal = other.diagonal(); @@ -201,14 +193,19 @@ class DiagonalMatrix #endif /** Resizes to given size. */ + EIGEN_DEVICE_FUNC inline void resize(Index size) { m_diagonal.resize(size); } /** Sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero() { m_diagonal.setZero(); } /** Resizes and sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero(Index size) { m_diagonal.setZero(size); } /** Sets this matrix to be the identity matrix of the current size. */ + EIGEN_DEVICE_FUNC inline void setIdentity() { m_diagonal.setOnes(); } /** Sets this matrix to be the identity matrix of the given size. */ + EIGEN_DEVICE_FUNC inline void setIdentity(Index size) { m_diagonal.setOnes(size); } }; @@ -232,14 +229,15 @@ struct traits > { typedef _DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; - typedef typename DiagonalVectorType::Index Index; - typedef typename DiagonalVectorType::StorageKind StorageKind; + typedef typename DiagonalVectorType::StorageIndex StorageIndex; + typedef DiagonalShape StorageKind; + typedef typename traits::XprKind XprKind; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - Flags = traits::Flags & LvalueBit + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + Flags = (traits::Flags & LvalueBit) | NoPreferredStorageOrderBit }; }; } @@ -255,9 +253,11 @@ class DiagonalWrapper #endif /** Constructor from expression of diagonal coefficients to wrap. */ - inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + EIGEN_DEVICE_FUNC + explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} /** \returns a const reference to the wrapped expression of diagonal coefficients. */ + EIGEN_DEVICE_FUNC const DiagonalVectorType& diagonal() const { return m_diagonal; } protected: @@ -277,7 +277,7 @@ template inline const DiagonalWrapper MatrixBase::asDiagonal() const { - return derived(); + return DiagonalWrapper(derived()); } /** \returns true if *this is approximately equal to a diagonal matrix, @@ -308,6 +308,33 @@ bool MatrixBase::isDiagonal(const RealScalar& prec) const return true; } +namespace internal { + +template<> struct storage_kind_to_shape { typedef DiagonalShape Shape; }; + +struct Diagonal2Dense {}; + +template<> struct AssignmentKind { typedef Diagonal2Dense Kind; }; + +// Diagonal matrix to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + dst.setZero(); + dst.diagonal() = src.diagonal(); + } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { dst.diagonal() += src.diagonal(); } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { dst.diagonal() -= src.diagonal(); } +}; + +} // namespace internal + } // end namespace Eigen #endif // EIGEN_DIAGONALMATRIX_H diff --git a/nuparu/include/Eigen/src/Core/DiagonalProduct.h b/nuparu/include/Eigen/src/Core/DiagonalProduct.h index c03a0c2e..d372b938 100644 --- a/nuparu/include/Eigen/src/Core/DiagonalProduct.h +++ b/nuparu/include/Eigen/src/Core/DiagonalProduct.h @@ -13,116 +13,14 @@ namespace Eigen { -namespace internal { -template -struct traits > - : traits -{ - typedef typename scalar_product_traits::ReturnType Scalar; - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, - _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) - ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), - _SameTypes = is_same::value, - // FIXME currently we need same types, but in the future the next rule should be the one - //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, - - Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), - CoeffReadCost = NumTraits::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost - }; -}; -} - -template -class DiagonalProduct : internal::no_assignment_operator, - public MatrixBase > -{ - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct) - - inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal) - : m_matrix(matrix), m_diagonal(diagonal) - { - eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols())); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } - - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const - { - return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const - { - enum { - StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor - }; - const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col; - return packet_impl(row,col,indexInDiagonalVector,typename internal::conditional< - ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) - ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type()); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - protected: - template - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const - { - return internal::pmul(m_matrix.template packet(row, col), - internal::pset1(m_diagonal.diagonal().coeff(id))); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const - { - enum { - InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) - }; - return internal::pmul(m_matrix.template packet(row, col), - m_diagonal.diagonal().template packet(id)); - } - - typename MatrixType::Nested m_matrix; - typename DiagonalType::Nested m_diagonal; -}; - /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. */ template template -inline const DiagonalProduct +inline const Product MatrixBase::operator*(const DiagonalBase &a_diagonal) const { - return DiagonalProduct(derived(), a_diagonal.derived()); + return Product(derived(),a_diagonal.derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/Dot.h b/nuparu/include/Eigen/src/Core/Dot.h index 9d7651f1..003450f1 100644 --- a/nuparu/include/Eigen/src/Core/Dot.h +++ b/nuparu/include/Eigen/src/Core/Dot.h @@ -29,6 +29,7 @@ template::Scalar,typename traits::Scalar>::ReturnType ResScalar; + EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { return a.template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); @@ -39,6 +40,7 @@ template struct dot_nocheck { typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { return a.transpose().template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); @@ -59,6 +61,7 @@ struct dot_nocheck */ template template +EIGEN_DEVICE_FUNC typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { @@ -73,34 +76,6 @@ MatrixBase::dot(const MatrixBase& other) const return internal::dot_nocheck::run(*this, other); } -#ifdef EIGEN2_SUPPORT -/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable - * (conjugating the second variable). Of course this only makes a difference in the complex case. - * - * This method is only available in EIGEN2_SUPPORT mode. - * - * \only_for_vectors - * - * \sa dot() - */ -template -template -typename internal::traits::Scalar -MatrixBase::eigen2_dot(const MatrixBase& other) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - eigen_assert(size() == other.size()); - - return internal::dot_nocheck::run(other,*this); -} -#endif - - //---------- implementation of L2 norm and related functions ---------- /** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm. @@ -124,7 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala template inline typename NumTraits::Scalar>::Real MatrixBase::norm() const { - using std::sqrt; + EIGEN_USING_STD_MATH(sqrt) return sqrt(squaredNorm()); } @@ -138,8 +113,7 @@ template inline const typename MatrixBase::PlainObject MatrixBase::normalized() const { - typedef typename internal::nested::type Nested; - typedef typename internal::remove_reference::type _Nested; + typedef typename internal::nested_eval::type _Nested; _Nested n(derived()); return n / n.norm(); } @@ -164,9 +138,10 @@ template struct lpNorm_selector { typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const MatrixBase& m) { - using std::pow; + EIGEN_USING_STD_MATH(pow) return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p); } }; @@ -174,6 +149,7 @@ struct lpNorm_selector template struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) { return m.cwiseAbs().sum(); @@ -183,6 +159,7 @@ struct lpNorm_selector template struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) { return m.norm(); @@ -192,6 +169,7 @@ struct lpNorm_selector template struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) { return m.cwiseAbs().maxCoeff(); @@ -200,9 +178,11 @@ struct lpNorm_selector } // end namespace internal -/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values - * of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ - * norm, that is the maximum of the absolute values of the coefficients of *this. +/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values + * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ + * norm, that is the maximum of the absolute values of the coefficients of \c *this. + * + * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. * * \sa norm() */ @@ -227,8 +207,8 @@ template bool MatrixBase::isOrthogonal (const MatrixBase& other, const RealScalar& prec) const { - typename internal::nested::type nested(derived()); - typename internal::nested::type otherNested(other.derived()); + typename internal::nested_eval::type nested(derived()); + typename internal::nested_eval::type otherNested(other.derived()); return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); } @@ -246,13 +226,13 @@ bool MatrixBase::isOrthogonal template bool MatrixBase::isUnitary(const RealScalar& prec) const { - typename Derived::Nested nested(derived()); + typename internal::nested_eval::type self(derived()); for(Index i = 0; i < cols(); ++i) { - if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast(1), prec)) + if(!internal::isApprox(self.col(i).squaredNorm(), static_cast(1), prec)) return false; for(Index j = 0; j < i; ++j) - if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast(1), prec)) return false; } return true; diff --git a/nuparu/include/Eigen/src/Core/EigenBase.h b/nuparu/include/Eigen/src/Core/EigenBase.h index 2b8dd1b7..79dabda3 100644 --- a/nuparu/include/Eigen/src/Core/EigenBase.h +++ b/nuparu/include/Eigen/src/Core/EigenBase.h @@ -13,7 +13,9 @@ namespace Eigen { -/** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). +/** \class EigenBase + * + * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * * In other words, an EigenBase object is an object that can be copied into a MatrixBase. * @@ -26,34 +28,52 @@ namespace Eigen { template struct EigenBase { // typedef typename internal::plain_matrix_type::type PlainObject; - + + /** \brief The interface type of indices + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead. + * \sa StorageIndex, \ref TopicPreprocessorDirectives. + */ + typedef Eigen::Index Index; + + // FIXME is it needed? typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; /** \returns a reference to the derived object */ + EIGEN_DEVICE_FUNC Derived& derived() { return *static_cast(this); } /** \returns a const reference to the derived object */ + EIGEN_DEVICE_FUNC const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } + EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast(this); } /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ + EIGEN_DEVICE_FUNC inline Index rows() const { return derived().rows(); } /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ + EIGEN_DEVICE_FUNC inline Index cols() const { return derived().cols(); } /** \returns the number of coefficients, which is rows()*cols(). * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC inline Index size() const { return rows() * cols(); } /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ - template inline void evalTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const { derived().evalTo(dst); } /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */ - template inline void addTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void addTo(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -63,7 +83,9 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */ - template inline void subTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void subTo(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -73,7 +95,8 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */ - template inline void applyThisOnTheRight(Dest& dst) const + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -81,7 +104,8 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */ - template inline void applyThisOnTheLeft(Dest& dst) const + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -106,7 +130,7 @@ template template Derived& DenseBase::operator=(const EigenBase &other) { - other.derived().evalTo(derived()); + call_assignment(derived(), other.derived()); return derived(); } @@ -114,7 +138,7 @@ template template Derived& DenseBase::operator+=(const EigenBase &other) { - other.derived().addTo(derived()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -122,40 +146,10 @@ template template Derived& DenseBase::operator-=(const EigenBase &other) { - other.derived().subTo(derived()); - return derived(); -} - -/** replaces \c *this by \c *this * \a other. - * - * \returns a reference to \c *this - */ -template -template -inline Derived& -MatrixBase::operator*=(const EigenBase &other) -{ - other.derived().applyThisOnTheRight(derived()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } -/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=(). - */ -template -template -inline void MatrixBase::applyOnTheRight(const EigenBase &other) -{ - other.derived().applyThisOnTheRight(derived()); -} - -/** replaces \c *this by \c *this * \a other. */ -template -template -inline void MatrixBase::applyOnTheLeft(const EigenBase &other) -{ - other.derived().applyThisOnTheLeft(derived()); -} - } // end namespace Eigen #endif // EIGEN_EIGENBASE_H diff --git a/nuparu/include/Eigen/src/Core/Flagged.h b/nuparu/include/Eigen/src/Core/Flagged.h deleted file mode 100644 index 1f2955fc..00000000 --- a/nuparu/include/Eigen/src/Core/Flagged.h +++ /dev/null @@ -1,140 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FLAGGED_H -#define EIGEN_FLAGGED_H - -namespace Eigen { - -/** \class Flagged - * \ingroup Core_Module - * - * \brief Expression with modified flags - * - * \param ExpressionType the type of the object of which we are modifying the flags - * \param Added the flags added to the expression - * \param Removed the flags removed from the expression (has priority over Added). - * - * This class represents an expression whose flags have been modified. - * It is the return type of MatrixBase::flagged() - * and most of the time this is the only way it is used. - * - * \sa MatrixBase::flagged() - */ - -namespace internal { -template -struct traits > : traits -{ - enum { Flags = (ExpressionType::Flags | Added) & ~Removed }; -}; -} - -template class Flagged - : public MatrixBase > -{ - public: - - typedef MatrixBase Base; - - EIGEN_DENSE_PUBLIC_INTERFACE(Flagged) - typedef typename internal::conditional::ret, - ExpressionType, const ExpressionType&>::type ExpressionTypeNested; - typedef typename ExpressionType::InnerIterator InnerIterator; - - inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - inline Index outerStride() const { return m_matrix.outerStride(); } - inline Index innerStride() const { return m_matrix.innerStride(); } - - inline CoeffReturnType coeff(Index row, Index col) const - { - return m_matrix.coeff(row, col); - } - - inline CoeffReturnType coeff(Index index) const - { - return m_matrix.coeff(index); - } - - inline const Scalar& coeffRef(Index row, Index col) const - { - return m_matrix.const_cast_derived().coeffRef(row, col); - } - - inline const Scalar& coeffRef(Index index) const - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(row, col); - } - - inline Scalar& coeffRef(Index index) - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - template - inline const PacketScalar packet(Index row, Index col) const - { - return m_matrix.template packet(row, col); - } - - template - inline void writePacket(Index row, Index col, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(row, col, x); - } - - template - inline const PacketScalar packet(Index index) const - { - return m_matrix.template packet(index); - } - - template - inline void writePacket(Index index, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(index, x); - } - - const ExpressionType& _expression() const { return m_matrix; } - - template - typename ExpressionType::PlainObject solveTriangular(const MatrixBase& other) const; - - template - void solveTriangularInPlace(const MatrixBase& other) const; - - protected: - ExpressionTypeNested m_matrix; -}; - -/** \returns an expression of *this with added and removed flags - * - * This is mostly for internal use. - * - * \sa class Flagged - */ -template -template -inline const Flagged -DenseBase::flagged() const -{ - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_FLAGGED_H diff --git a/nuparu/include/Eigen/src/Core/ForceAlignedAccess.h b/nuparu/include/Eigen/src/Core/ForceAlignedAccess.h index 807c7a29..7b08b45e 100644 --- a/nuparu/include/Eigen/src/Core/ForceAlignedAccess.h +++ b/nuparu/include/Eigen/src/Core/ForceAlignedAccess.h @@ -39,29 +39,29 @@ template class ForceAlignedAccess typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) - inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -90,7 +90,7 @@ template class ForceAlignedAccess m_expression.const_cast_derived().template writePacket(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType& m_expression; @@ -127,7 +127,7 @@ template inline typename internal::add_const_on_value_type,Derived&>::type>::type MatrixBase::forceAlignedAccessIf() const { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } /** \returns an expression of *this with forced aligned access if \a Enable is true. @@ -138,7 +138,7 @@ template inline typename internal::conditional,Derived&>::type MatrixBase::forceAlignedAccessIf() { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/Functors.h b/nuparu/include/Eigen/src/Core/Functors.h deleted file mode 100644 index 04fb2173..00000000 --- a/nuparu/include/Eigen/src/Core/Functors.h +++ /dev/null @@ -1,985 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FUNCTORS_H -#define EIGEN_FUNCTORS_H - -namespace Eigen { - -namespace internal { - -// associative functors: - -/** \internal - * \brief Template functor to compute the sum of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum() - */ -template struct scalar_sum_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::padd(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd - }; -}; - -/** \internal - * \brief Template functor to compute the product of two scalars - * - * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() - */ -template struct scalar_product_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul - }; - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmul(a,b); } - template - EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const - { return internal::predux_mul(a); } -}; -template -struct functor_traits > { - enum { - Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! - PacketAccess = scalar_product_op::Vectorizable - }; -}; - -/** \internal - * \brief Template functor to compute the conjugate product of two scalars - * - * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) - */ -template struct scalar_conj_product_op { - - enum { - Conj = NumTraits::IsComplex - }; - - typedef typename scalar_product_traits::ReturnType result_type; - - EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const - { return conj_helper().pmul(a,b); } - - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return conj_helper().pmul(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::MulCost, - PacketAccess = internal::is_same::value && packet_traits::HasMul - }; -}; - -/** \internal - * \brief Template functor to compute the min of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() - */ -template struct scalar_min_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmin(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux_min(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMin - }; -}; - -/** \internal - * \brief Template functor to compute the max of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() - */ -template struct scalar_max_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmax(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux_max(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMax - }; -}; - -/** \internal - * \brief Template functor to compute the hypot of two scalars - * - * \sa MatrixBase::stableNorm(), class Redux - */ -template struct scalar_hypot_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) -// typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const - { - using std::max; - using std::min; - using std::sqrt; - Scalar p = (max)(_x, _y); - Scalar q = (min)(_x, _y); - Scalar qp = q/p; - return p * sqrt(Scalar(1) + qp*qp); - } -}; -template -struct functor_traits > { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess=0 }; -}; - -/** \internal - * \brief Template functor to compute the pow of two scalars - */ -template struct scalar_binary_pow_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) - inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } -}; -template -struct functor_traits > { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; -}; - -// other binary functors: - -/** \internal - * \brief Template functor to compute the difference of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::operator- - */ -template struct scalar_difference_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::psub(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasSub - }; -}; - -/** \internal - * \brief Template functor to compute the quotient of two scalars - * - * \sa class CwiseBinaryOp, Cwise::operator/() - */ -template struct scalar_quotient_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv - }; - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pdiv(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = (NumTraits::MulCost + NumTraits::MulCost), // rough estimate! - PacketAccess = scalar_quotient_op::Vectorizable - }; -}; - - - -/** \internal - * \brief Template functor to compute the and of two booleans - * - * \sa class CwiseBinaryOp, ArrayBase::operator&& - */ -struct scalar_boolean_and_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op) - EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; } -}; -template<> struct functor_traits { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -/** \internal - * \brief Template functor to compute the or of two booleans - * - * \sa class CwiseBinaryOp, ArrayBase::operator|| - */ -struct scalar_boolean_or_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op) - EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; } -}; -template<> struct functor_traits { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -// unary functors: - -/** \internal - * \brief Template functor to compute the opposite of a scalar - * - * \sa class CwiseUnaryOp, MatrixBase::operator- - */ -template struct scalar_opposite_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pnegate(a); } -}; -template -struct functor_traits > -{ enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasNegate }; -}; - -/** \internal - * \brief Template functor to compute the absolute value of a scalar - * - * \sa class CwiseUnaryOp, Cwise::abs - */ -template struct scalar_abs_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pabs(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAbs - }; -}; - -/** \internal - * \brief Template functor to compute the squared absolute value of a scalar - * - * \sa class CwiseUnaryOp, Cwise::abs2 - */ -template struct scalar_abs2_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a,a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasAbs2 }; }; - -/** \internal - * \brief Template functor to compute the conjugate of a complex value - * - * \sa class CwiseUnaryOp, MatrixBase::conjugate() - */ -template struct scalar_conjugate_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = NumTraits::IsComplex ? NumTraits::AddCost : 0, - PacketAccess = packet_traits::HasConj - }; -}; - -/** \internal - * \brief Template functor to cast a scalar to another type - * - * \sa class CwiseUnaryOp, MatrixBase::cast() - */ -template -struct scalar_cast_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) - typedef NewType result_type; - EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast(a); } -}; -template -struct functor_traits > -{ enum { Cost = is_same::value ? 0 : NumTraits::AddCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the real part of a complex - * - * \sa class CwiseUnaryOp, MatrixBase::real() - */ -template -struct scalar_real_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the imaginary part of a complex - * - * \sa class CwiseUnaryOp, MatrixBase::imag() - */ -template -struct scalar_imag_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the real part of a complex as a reference - * - * \sa class CwiseUnaryOp, MatrixBase::real() - */ -template -struct scalar_real_ref_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast(&a)); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the imaginary part of a complex as a reference - * - * \sa class CwiseUnaryOp, MatrixBase::imag() - */ -template -struct scalar_imag_ref_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast(&a)); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * - * \brief Template functor to compute the exponential of a scalar - * - * \sa class CwiseUnaryOp, Cwise::exp() - */ -template struct scalar_exp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) - inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasExp }; }; - -/** \internal - * - * \brief Template functor to compute the logarithm of a scalar - * - * \sa class CwiseUnaryOp, Cwise::log() - */ -template struct scalar_log_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) - inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::plog(a); } -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasLog }; }; - -/** \internal - * \brief Template functor to multiply a scalar by a fixed other one - * - * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ - */ -/* NOTE why doing the pset1() in packetOp *is* an optimization ? - * indeed it seems better to declare m_other as a Packet and do the pset1() once - * in the constructor. However, in practice: - * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() outside the loop :) - * - simpler code ;) - * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) - */ -template -struct scalar_multiple_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -template -struct scalar_multiple2_op { - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } - EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to divide a scalar by a fixed other one - * - * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not necessarily a floating point type. - * - * \sa class CwiseUnaryOp, MatrixBase::operator/ - */ -template -struct scalar_quotient1_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pdiv(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - -// nullary functors - -template -struct scalar_constant_op { - typedef typename packet_traits::type Packet; - EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { } - template - EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } - template - EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1(m_other); } - const Scalar m_other; -}; -template -struct functor_traits > -// FIXME replace this packet test by a safe one -{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; - -template struct scalar_identity_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) - template - EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false, IsRepeatable = true }; }; - -template struct linspaced_op_impl; - -// linear access for packet ops: -// 1) initialization -// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0]) -// 2) each step (where size is 1 for coeff access or PacketSize for packet access) -// base += [size*step, ..., size*step] -// -// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp) -// in order to avoid the padd() in operator() ? -template -struct linspaced_op_impl -{ - typedef typename packet_traits::type Packet; - - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_packetStep(pset1(packet_traits::size*step)), - m_base(padd(pset1(low), pmul(pset1(step),plset(-packet_traits::size)))) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const - { - m_base = padd(m_base, pset1(m_step)); - return m_low+Scalar(i)*m_step; - } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } - - const Scalar m_low; - const Scalar m_step; - const Packet m_packetStep; - mutable Packet m_base; -}; - -// random access for packet ops: -// 1) each step -// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) -template -struct linspaced_op_impl -{ - typedef typename packet_traits::type Packet; - - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const - { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(i),m_interPacket))); } - - const Scalar m_low; - const Scalar m_step; - const Packet m_lowPacket; - const Packet m_stepPacket; - const Packet m_interPacket; -}; - -// ----- Linspace functor ---------------------------------------------------------------- - -// Forward declaration (we default to random access which does not really give -// us a speed gain when using packet access but it allows to use the functor in -// nested expressions). -template struct linspaced_op; -template struct functor_traits< linspaced_op > -{ enum { Cost = 1, PacketAccess = packet_traits::HasSetLinear, IsRepeatable = true }; }; -template struct linspaced_op -{ - typedef typename packet_traits::type Packet; - linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl(col + row); - } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl.packetOp(col + row); - } - - // This proxy object handles the actual required temporaries, the different - // implementations (random vs. sequential access) as well as the - // correct piping to size 2/4 packet operations. - const linspaced_op_impl impl; -}; - -// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta -// to indicate whether a functor allows linear access, just always answering 'yes' except for -// scalar_identity_op. -// FIXME move this to functor_traits adding a functor_default -template struct functor_has_linear_access { enum { ret = 1 }; }; -template struct functor_has_linear_access > { enum { ret = 0 }; }; - -// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication -// where the mixing of different types is handled by scalar_product_traits -// In particular, real * complex is allowed. -// FIXME move this to functor_traits adding a functor_default -template struct functor_is_product_like { enum { ret = 0 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; - - -/** \internal - * \brief Template functor to add a scalar to a fixed other one - * \sa class CwiseUnaryOp, Array::operator+ - */ -/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ -template -struct scalar_add_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } - inline scalar_add_op(const Scalar& other) : m_other(other) { } - inline Scalar operator() (const Scalar& a) const { return a + m_other; } - inline const Packet packetOp(const Packet& a) const - { return internal::padd(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to compute the square root of a scalar - * \sa class CwiseUnaryOp, Cwise::sqrt() - */ -template struct scalar_sqrt_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) - inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } -}; -template -struct functor_traits > -{ enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasSqrt - }; -}; - -/** \internal - * \brief Template functor to compute the cosine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::cos() - */ -template struct scalar_cos_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op) - inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasCos - }; -}; - -/** \internal - * \brief Template functor to compute the sine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::sin() - */ -template struct scalar_sin_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op) - inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::psin(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasSin - }; -}; - - -/** \internal - * \brief Template functor to compute the tan of a scalar - * \sa class CwiseUnaryOp, ArrayBase::tan() - */ -template struct scalar_tan_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op) - inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasTan - }; -}; - -/** \internal - * \brief Template functor to compute the arc cosine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::acos() - */ -template struct scalar_acos_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op) - inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasACos - }; -}; - -/** \internal - * \brief Template functor to compute the arc sine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::asin() - */ -template struct scalar_asin_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op) - inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasASin - }; -}; - -/** \internal - * \brief Template functor to raise a scalar to a power - * \sa class CwiseUnaryOp, Cwise::pow - */ -template -struct scalar_pow_op { - // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} - inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } - const Scalar m_exponent; -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to compute the quotient between a scalar and array entries. - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_mult_op { - scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} - inline Scalar operator() (const Scalar& a) const { return m_other / a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(m_other),a); } - Scalar m_other; -}; - -/** \internal - * \brief Template functor to compute the inverse of a scalar - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op) - inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(Scalar(1)),a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - -/** \internal - * \brief Template functor to compute the square of a scalar - * \sa class CwiseUnaryOp, Cwise::square() - */ -template -struct scalar_square_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op) - inline Scalar operator() (const Scalar& a) const { return a*a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pmul(a,a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -/** \internal - * \brief Template functor to compute the cube of a scalar - * \sa class CwiseUnaryOp, Cwise::cube() - */ -template -struct scalar_cube_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op) - inline Scalar operator() (const Scalar& a) const { return a*a*a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pmul(a,pmul(a,a)); } -}; -template -struct functor_traits > -{ enum { Cost = 2*NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -// default functor traits for STL functors: - -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; - -#ifdef EIGEN_STDEXT_SUPPORT - -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost + functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; - -#endif // EIGEN_STDEXT_SUPPORT - -// allow to add new functors and specializations of functor_traits from outside Eigen. -// this macro is really needed because functor_traits must be specialized after it is declared but before it is used... -#ifdef EIGEN_FUNCTORS_PLUGIN -#include EIGEN_FUNCTORS_PLUGIN -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_FUNCTORS_H diff --git a/nuparu/include/Eigen/src/Core/Fuzzy.h b/nuparu/include/Eigen/src/Core/Fuzzy.h index fe63bd29..3e403a09 100644 --- a/nuparu/include/Eigen/src/Core/Fuzzy.h +++ b/nuparu/include/Eigen/src/Core/Fuzzy.h @@ -19,18 +19,19 @@ namespace internal template::IsInteger> struct isApprox_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { - using std::min; - typename internal::nested::type nested(x); - typename internal::nested::type otherNested(y); - return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + typename internal::nested_eval::type nested(x); + typename internal::nested_eval::type otherNested(y); + return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); } }; template struct isApprox_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&) { return x.matrix() == y.matrix(); @@ -40,6 +41,7 @@ struct isApprox_selector template::IsInteger> struct isMuchSmallerThan_object_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum(); @@ -49,6 +51,7 @@ struct isMuchSmallerThan_object_selector template struct isMuchSmallerThan_object_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&) { return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); @@ -58,6 +61,7 @@ struct isMuchSmallerThan_object_selector template::IsInteger> struct isMuchSmallerThan_scalar_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec) { return x.cwiseAbs2().sum() <= numext::abs2(prec * y); @@ -67,6 +71,7 @@ struct isMuchSmallerThan_scalar_selector template struct isMuchSmallerThan_scalar_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&) { return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); diff --git a/nuparu/include/Eigen/src/Core/GeneralProduct.h b/nuparu/include/Eigen/src/Core/GeneralProduct.h index 2a59d946..fe8204ac 100644 --- a/nuparu/include/Eigen/src/Core/GeneralProduct.h +++ b/nuparu/include/Eigen/src/Core/GeneralProduct.h @@ -11,29 +11,7 @@ #ifndef EIGEN_GENERAL_PRODUCT_H #define EIGEN_GENERAL_PRODUCT_H -namespace Eigen { - -/** \class GeneralProduct - * \ingroup Core_Module - * - * \brief Expression of the product of two general matrices or vectors - * - * \param LhsNested the type used to store the left-hand side - * \param RhsNested the type used to store the right-hand side - * \param ProductMode the type of the product - * - * This class represents an expression of the product of two general matrices. - * We call a general matrix, a dense matrix with full storage. For instance, - * This excludes triangular, selfadjoint, and sparse matrices. - * It is the return type of the operator* between general matrices. Its template - * arguments are determined automatically by ProductReturnType. Therefore, - * GeneralProduct should never be used direclty. To determine the result type of a - * function which involves a matrix product, use ProductReturnType::Type. - * - * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase&) - */ -template::value> -class GeneralProduct; +namespace Eigen { enum { Large = 2, @@ -59,15 +37,14 @@ template struct product_type typedef typename remove_all::type _Lhs; typedef typename remove_all::type _Rhs; enum { - MaxRows = _Lhs::MaxRowsAtCompileTime, - Rows = _Lhs::RowsAtCompileTime, - MaxCols = _Rhs::MaxColsAtCompileTime, - Cols = _Rhs::ColsAtCompileTime, - MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime, - _Rhs::MaxRowsAtCompileTime), - Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, - _Rhs::RowsAtCompileTime), - LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, + Rows = traits<_Lhs>::RowsAtCompileTime, + MaxCols = traits<_Rhs>::MaxColsAtCompileTime, + Cols = traits<_Rhs>::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, + traits<_Rhs>::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, + traits<_Rhs>::RowsAtCompileTime) }; // the splitting into different lines of code here, introducing the _select enums and the typedef below, @@ -82,7 +59,8 @@ template struct product_type public: enum { - value = selector::ret + value = selector::ret, + ret = selector::ret }; #ifdef EIGEN_DEBUG_PRODUCT static void debug() @@ -98,6 +76,31 @@ template struct product_type #endif }; +// template struct product_tag +// { +// private: +// +// typedef typename remove_all::type _Lhs; +// typedef typename remove_all::type _Rhs; +// enum { +// Rows = _Lhs::RowsAtCompileTime, +// Cols = _Rhs::ColsAtCompileTime, +// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime) +// }; +// +// enum { +// rows_select = Rows==1 ? int(Rows) : int(Large), +// cols_select = Cols==1 ? int(Cols) : int(Large), +// depth_select = Depth==1 ? int(Depth) : int(Large) +// }; +// typedef product_type_selector selector; +// +// public: +// enum { +// ret = selector::ret +// }; +// +// }; /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. @@ -128,54 +131,6 @@ template<> struct product_type_selector { enum } // end namespace internal -/** \class ProductReturnType - * \ingroup Core_Module - * - * \brief Helper class to get the correct and optimized returned type of operator* - * - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * \param ProductMode the type of the product (determined automatically by internal::product_mode) - * - * This class defines the typename Type representing the optimized product expression - * between two matrix expressions. In practice, using ProductReturnType::Type - * is the recommended way to define the result type of a function returning an expression - * which involve a matrix product. The class Product should never be - * used directly. - * - * \sa class Product, MatrixBase::operator*(const MatrixBase&) - */ -template -struct ProductReturnType -{ - // TODO use the nested type to reduce instanciations ???? -// typedef typename internal::nested::type LhsNested; -// typedef typename internal::nested::type RhsNested; - - typedef GeneralProduct Type; -}; - -template -struct ProductReturnType -{ - typedef typename internal::nested::type >::type LhsNested; - typedef typename internal::nested::type >::type RhsNested; - typedef CoeffBasedProduct Type; -}; - -template -struct ProductReturnType -{ - typedef typename internal::nested::type >::type LhsNested; - typedef typename internal::nested::type >::type RhsNested; - typedef CoeffBasedProduct Type; -}; - -// this is a workaround for sun CC -template -struct LazyProductReturnType : public ProductReturnType -{}; - /*********************************************************************** * Implementation of Inner Vector Vector Product ***********************************************************************/ @@ -187,119 +142,10 @@ struct LazyProductReturnType : public ProductReturnType with: operator=(Scalar x); -namespace internal { - -template -struct traits > - : traits::ReturnType,1,1> > -{}; - -} - -template -class GeneralProduct - : internal::no_assignment_operator, - public Matrix::ReturnType,1,1> -{ - typedef Matrix::ReturnType,1,1> Base; - public: - GeneralProduct(const Lhs& lhs, const Rhs& rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); - } - - /** Convertion to scalar */ - operator const typename Base::Scalar() const { - return Base::coeff(0,0); - } -}; - /*********************************************************************** * Implementation of Outer Vector Vector Product ***********************************************************************/ -namespace internal { - -// Column major -template -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&) -{ - typedef typename Dest::Index Index; - // FIXME make sure lhs is sequentially stored - // FIXME not very good if rhs is real and lhs complex while alpha is real too - const Index cols = dest.cols(); - for (Index j=0; j -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) { - typedef typename Dest::Index Index; - // FIXME make sure rhs is sequentially stored - // FIXME not very good if lhs is real and rhs complex while alpha is real too - const Index rows = dest.rows(); - for (Index i=0; i -struct traits > - : traits, Lhs, Rhs> > -{}; - -} - -template -class GeneralProduct - : public ProductBase, Lhs, Rhs> -{ - template struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; - - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; - struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; - struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; - struct adds { - Scalar m_scale; - adds(const Scalar& s) : m_scale(s) {} - template void operator()(const Dst& dst, const Src& src) const { - dst.const_cast_derived() += m_scale * src; - } - }; - - template - inline void evalTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, set(), IsRowMajor()); - } - - template - inline void addTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, add(), IsRowMajor()); - } - - template - inline void subTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor()); - } - - template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor()); - } -}; - /*********************************************************************** * Implementation of General Matrix Vector Product ***********************************************************************/ @@ -313,60 +159,13 @@ class GeneralProduct */ namespace internal { -template -struct traits > - : traits, Lhs, Rhs> > -{}; - template -struct gemv_selector; +struct gemv_dense_selector; } // end namespace internal -template -class GeneralProduct - : public ProductBase, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - typedef typename Lhs::Scalar LhsScalar; - typedef typename Rhs::Scalar RhsScalar; - - GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs) - { -// EIGEN_STATIC_ASSERT((internal::is_same::value), -// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; - typedef typename internal::conditional::type MatrixType; - - template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const - { - eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); - internal::gemv_selector::HasUsableDirectAccess)>::run(*this, dst, alpha); - } -}; - namespace internal { -// The vector is on the left => transposition -template -struct gemv_selector -{ - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) - { - Transpose destT(dest); - enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_selector - ::run(GeneralProduct,Transpose, GemvProduct> - (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); - } -}; - template struct gemv_static_vector_if; template @@ -384,7 +183,7 @@ struct gemv_static_vector_if template struct gemv_static_vector_if { - #if EIGEN_ALIGN_STATICALLY + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 internal::plain_array m_data; EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } #else @@ -397,33 +196,48 @@ struct gemv_static_vector_if internal::plain_array m_data; EIGEN_STRONG_INLINE Scalar* data() { return ForceAlignment - ? reinterpret_cast((reinterpret_cast(m_data.array) & ~(size_t(15))) + 16) + ? reinterpret_cast((reinterpret_cast(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) : m_data.array; } #endif }; -template<> struct gemv_selector +// The vector is on the left => transposition +template +struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_selector + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_dense_selector { - template - static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef Map, Aligned> MappedDest; - ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); - ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 @@ -435,18 +249,18 @@ template<> struct gemv_selector gemv_static_vector_if static_dest; - bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); - bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; - + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), evalToDest ? dest.data() : static_dest.data()); - + if(!evalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = dest.size(); + Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif if(!alphaIsCompatible) @@ -458,11 +272,13 @@ template<> struct gemv_selector MappedDest(actualDestPtr, dest.size()) = dest; } + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; general_matrix_vector_product - ::run( + ::run( actualLhs.rows(), actualLhs.cols(), - actualLhs.data(), actualLhs.outerStride(), - actualRhs.data(), actualRhs.innerStride(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), actualDestPtr, 1, compatibleAlpha); @@ -476,34 +292,34 @@ template<> struct gemv_selector } }; -template<> struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - - typename add_const::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const::type actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; + + typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if static_rhs; + gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); @@ -511,45 +327,46 @@ template<> struct gemv_selector if(!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = actualRhs.size(); + Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map(actualRhsPtr, actualRhs.size()) = actualRhs; + Map(actualRhsPtr, actualRhs.size()) = actualRhs; } + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; general_matrix_vector_product - ::run( + ::run( actualLhs.rows(), actualLhs.cols(), - actualLhs.data(), actualLhs.outerStride(), - actualRhsPtr, 1, + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhsPtr, 1), dest.data(), dest.innerStride(), actualAlpha); } }; -template<> struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename Dest::Index Index; - // TODO makes sure dest is sequentially stored in memory, otherwise use a temp - const Index size = prod.rhs().rows(); + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); + const Index size = rhs.rows(); for(Index k=0; k struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename Dest::Index Index; - // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp - const Index rows = prod.rows(); + typename nested_eval::type actual_rhs(rhs); + const Index rows = dest.rows(); for(Index i=0; i struct gemv_selector * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ +#ifndef __CUDACC__ + template template -inline const typename ProductReturnType::Type +inline const Product MatrixBase::operator*(const MatrixBase &other) const { // A note regarding the function declaration: In MSVC, this function will sometimes @@ -592,9 +411,12 @@ MatrixBase::operator*(const MatrixBase &other) const #ifdef EIGEN_DEBUG_PRODUCT internal::product_type::debug(); #endif - return typename ProductReturnType::Type(derived(), other.derived()); + + return Product(derived(), other.derived()); } +#endif // __CUDACC__ + /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be @@ -608,7 +430,7 @@ MatrixBase::operator*(const MatrixBase &other) const */ template template -const typename LazyProductReturnType::Type +const Product MatrixBase::lazyProduct(const MatrixBase &other) const { enum { @@ -627,7 +449,7 @@ MatrixBase::lazyProduct(const MatrixBase &other) const INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - return typename LazyProductReturnType::Type(derived(), other.derived()); + return Product(derived(), other.derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/GenericPacketMath.h b/nuparu/include/Eigen/src/Core/GenericPacketMath.h index 5f783ebe..8ad51bad 100644 --- a/nuparu/include/Eigen/src/Core/GenericPacketMath.h +++ b/nuparu/include/Eigen/src/Core/GenericPacketMath.h @@ -42,21 +42,27 @@ namespace internal { struct default_packet_traits { enum { + HasHalfPacket = 0, + HasAdd = 1, HasSub = 1, HasMul = 1, HasNegate = 1, HasAbs = 1, + HasArg = 0, HasAbs2 = 1, HasMin = 1, HasMax = 1, HasConj = 1, HasSetLinear = 1, + HasBlend = 0, HasDiv = 0, HasSqrt = 0, + HasRsqrt = 0, HasExp = 0, HasLog = 0, + HasLog10 = 0, HasPow = 0, HasSin = 0, @@ -64,17 +70,31 @@ struct default_packet_traits HasTan = 0, HasASin = 0, HasACos = 0, - HasATan = 0 + HasATan = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, + HasLGamma = 0, + HasErf = 0, + HasErfc = 0, + + HasRound = 0, + HasFloor = 0, + HasCeil = 0, + + HasSign = 0 }; }; template struct packet_traits : default_packet_traits { typedef T type; + typedef T half; enum { Vectorizable = 0, size = 1, - AlignedOnScalar = 0 + AlignedOnScalar = 0, + HasHalfPacket = 0 }; enum { HasAdd = 0, @@ -90,135 +110,250 @@ template struct packet_traits : default_packet_traits }; }; +template struct packet_traits : packet_traits { }; + +template struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +/** \internal \returns static_cast(a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a) { + return static_cast(a); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/) { + return static_cast(a); +} + + /** \internal \returns a + b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) { return a+b; } /** \internal \returns a - b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) { return a-b; } /** \internal \returns -a (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) { return -a; } /** \internal \returns conj(a) (coeff-wise) */ -template inline Packet + +template EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) { return numext::conj(a); } /** \internal \returns a * b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) { return a*b; } /** \internal \returns a / b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) { return a/b; } /** \internal \returns the min of \a a and \a b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, - const Packet& b) { using std::min; return (min)(a, b); } + const Packet& b) { return numext::mini(a, b); } /** \internal \returns the max of \a a and \a b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, - const Packet& b) { using std::max; return (max)(a, b); } + const Packet& b) { return numext::maxi(a, b); } /** \internal \returns the absolute value of \a a */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) { using std::abs; return abs(a); } +/** \internal \returns the phase angle of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +parg(const Packet& a) { using numext::arg; return arg(a); } + /** \internal \returns the bitwise and of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) { return a & b; } /** \internal \returns the bitwise or of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) { return a | b; } /** \internal \returns the bitwise xor of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) { return a ^ b; } /** \internal \returns the bitwise andnot of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return a & (!b); } /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits::type* from) { return *from; } /** \internal \returns a packet version of \a *from, (un-aligned load) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits::type* from) { return *from; } +/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +template EIGEN_DEVICE_FUNC inline Packet +pset1(const typename unpacket_traits::type& a) { return a; } + +/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ +template EIGEN_DEVICE_FUNC inline Packet +pload1(const typename unpacket_traits::type *a) { return pset1(*a); } + /** \internal \returns a packet with elements of \a *from duplicated. - * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and - * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]} + * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and + * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} * Currently, this function is only used for scalar * complex products. - */ -template inline Packet + */ +template EIGEN_DEVICE_FUNC inline Packet ploaddup(const typename unpacket_traits::type* from) { return *from; } -/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ -template inline Packet -pset1(const typename unpacket_traits::type& a) { return a; } +/** \internal \returns a packet with elements of \a *from quadrupled. + * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and + * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]} + * Currently, this function is only used in matrix products. + * For packet-size smaller or equal to 4, this function is equivalent to pload1 + */ +template EIGEN_DEVICE_FUNC inline Packet +ploadquad(const typename unpacket_traits::type* from) +{ return pload1(from); } + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * a2 = pload1(a+2); + * a3 = pload1(a+3); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast2 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast4(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1, Packet& a2, Packet& a3) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); + a2 = pload1(a+2); + a3 = pload1(a+3); +} + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast4 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast2(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); +} /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ -template inline typename packet_traits::type -plset(const Scalar& a) { return a; } +template inline Packet +plset(const typename unpacket_traits::type& a) { return a; } /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ -template inline void pstore(Scalar* to, const Packet& from) +template EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) { (*to) = from; } /** \internal copy the packet \a from to \a *to, (un-aligned store) */ -template inline void pstoreu(Scalar* to, const Packet& from) -{ (*to) = from; } +template EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) +{ (*to) = from; } + + template EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) + { return ploadu(from); } + + template EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) + { pstore(to, from); } /** \internal tries to do cache prefetching of \a addr */ template inline void prefetch(const Scalar* addr) { -#if !defined(_MSC_VER) -__builtin_prefetch(addr); +#ifdef __CUDA_ARCH__ +#if defined(__LP64__) + // 64-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); +#else + // 32-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); +#endif +#elif !EIGEN_COMP_MSVC + __builtin_prefetch(addr); #endif } /** \internal \returns the first element of a packet */ -template inline typename unpacket_traits::type pfirst(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type pfirst(const Packet& a) { return a; } /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet preduxp(const Packet* vecs) { return vecs[0]; } /** \internal \returns the sum of the elements of \a a*/ -template inline typename unpacket_traits::type predux(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux(const Packet& a) +{ return a; } + +/** \internal \returns the sum of the elements of \a a by block of 4 elements. + * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} + * For packet-size smaller or equal to 4, this boils down to a noop. + */ +template EIGEN_DEVICE_FUNC inline +typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type +predux4(const Packet& a) { return a; } /** \internal \returns the product of the elements of \a a*/ -template inline typename unpacket_traits::type predux_mul(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_mul(const Packet& a) { return a; } /** \internal \returns the min of the elements of \a a*/ -template inline typename unpacket_traits::type predux_min(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) { return a; } /** \internal \returns the max of the elements of \a a*/ -template inline typename unpacket_traits::type predux_max(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) { return a; } /** \internal \returns the reversed elements of \a a*/ -template inline Packet preverse(const Packet& a) +template EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) { return a; } +template +struct protate_impl +{ + // Empty so attempts to use this unimplemented path will fail to compile. + // Only specializations of this template should be used. +}; + +/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention, + * by the given offset, e.g. for offset == 1: + * (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1]) + */ +template EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a) +{ + return offset ? protate_impl::run(a) : a; +} /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ -template inline Packet pcplxflip(const Packet& a) +template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) { // FIXME: uncomment the following in case we drop the internal imag and real functions. // using std::imag; @@ -250,6 +385,22 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) { using std::acos; return acos(a); } +/** \internal \returns the arc tangent of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet patan(const Packet& a) { using std::atan; return atan(a); } + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psinh(const Packet& a) { using std::sinh; return sinh(a); } + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); } + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } + /** \internal \returns the exp of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } @@ -258,10 +409,44 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } +/** \internal \returns the log10 of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog10(const Packet& a) { using std::log10; return log10(a); } + /** \internal \returns the square-root of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } +/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet prsqrt(const Packet& a) { + return pdiv(pset1(1), psqrt(a)); +} + +/** \internal \returns the rounded value of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pround(const Packet& a) { using numext::round; return round(a); } + +/** \internal \returns the floor of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } + +/** \internal \returns the ceil of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } + +/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } + +/** \internal \returns the erf(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { using numext::erf; return erf(a); } + +/** \internal \returns the erfc(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ @@ -275,34 +460,45 @@ inline void pstore1(typename unpacket_traits::type* to, const typename u } /** \internal \returns a * b + c (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { return padd(pmul(a, b),c); } /** \internal \returns a packet version of \a *from. - * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ -template -inline Packet ploadt(const typename unpacket_traits::type* from) + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits::alignment) return pload(from); else return ploadu(from); } /** \internal copy the packet \a from to \a *to. - * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ -template -inline void pstoret(Scalar* to, const Packet& from) + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits::alignment) pstore(to, from); else pstoreu(to, from); } +/** \internal \returns a packet version of \a *from. + * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the + * hardware if available to speedup the loading of data that won't be modified + * by the current computation. + */ +template +inline Packet ploadt_ro(const typename unpacket_traits::type* from) +{ + return ploadt(from); +} + /** \internal default implementation of palign() allowing partial specialization */ template struct palign_impl @@ -336,15 +532,46 @@ inline void palign(PacketType& first, const PacketType& second) * Fast complex products (GCC generates a function call which is very slow) ***************************************************************************/ +// Eigen+CUDA does not support complexes. +#ifndef __CUDACC__ + template<> inline std::complex pmul(const std::complex& a, const std::complex& b) { return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } template<> inline std::complex pmul(const std::complex& a, const std::complex& b) { return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } +#endif + + +/*************************************************************************** + * PacketBlock, that is a collection of N packets where the number of words + * in the packet is a multiple of N. +***************************************************************************/ +template ::size> struct PacketBlock { + Packet packet[N]; +}; + +template EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock& /*kernel*/) { + // Nothing to do in the scalar case, i.e. a 1x1 matrix. +} + +/*************************************************************************** + * Selector, i.e. vector of N boolean values used to select (i.e. blend) + * words from 2 packets. +***************************************************************************/ +template struct Selector { + bool select[N]; +}; + +template EIGEN_DEVICE_FUNC inline Packet +pblend(const Selector::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) { + return ifPacket.select[0] ? thenPacket : elsePacket; +} + } // end namespace internal } // end namespace Eigen #endif // EIGEN_GENERIC_PACKET_MATH_H - diff --git a/nuparu/include/Eigen/src/Core/GlobalFunctions.h b/nuparu/include/Eigen/src/Core/GlobalFunctions.h index 2acf9772..62fec700 100644 --- a/nuparu/include/Eigen/src/Core/GlobalFunctions.h +++ b/nuparu/include/Eigen/src/Core/GlobalFunctions.h @@ -14,8 +14,8 @@ #define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \ template \ inline const Eigen::CwiseUnaryOp, const Derived> \ - NAME(const Eigen::ArrayBase& x) { \ - return x.derived(); \ + (NAME)(const Eigen::ArrayBase& x) { \ + return Eigen::CwiseUnaryOp, const Derived>(x.derived()); \ } #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ @@ -30,25 +30,44 @@ { \ static inline typename NAME##_retval >::type run(const Eigen::ArrayBase& x) \ { \ - return x.derived(); \ + return typename NAME##_retval >::type(x.derived()); \ } \ }; - namespace Eigen { EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op) template inline const Eigen::CwiseUnaryOp, const Derived> @@ -56,16 +75,46 @@ namespace Eigen return x.derived().pow(exponent); } - template - inline const Eigen::CwiseBinaryOp, const Derived, const Derived> - pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) + /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power. + * + * Example: \include Cwise_array_power_array.cpp + * Output: \verbinclude Cwise_array_power_array.out + * + * \sa ArrayBase::pow() + */ + template + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { - return Eigen::CwiseBinaryOp, const Derived, const Derived>( + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( x.derived(), exponents.derived() ); } + /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power between a scalar and an array of exponents. + * Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same. + * + * Example: \include Cwise_scalar_power_array.cpp + * Output: \verbinclude Cwise_scalar_power_array.out + * + * \sa ArrayBase::pow() + */ + template + inline const Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived> + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + { + typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x); + return Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived>( + constant_x, + exponents.derived() + ); + } + /** * \brief Component-wise division of a scalar by array elements. **/ diff --git a/nuparu/include/Eigen/src/Core/IO.h b/nuparu/include/Eigen/src/Core/IO.h index c8d5f637..9ae37bb5 100644 --- a/nuparu/include/Eigen/src/Core/IO.h +++ b/nuparu/include/Eigen/src/Core/IO.h @@ -49,7 +49,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& */ struct IOFormat { - /** Default contructor, see class IOFormat for the meaning of the parameters */ + /** Default constructor, see class IOFormat for the meaning of the parameters */ IOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _coeffSeparator = " ", const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="", @@ -57,6 +57,10 @@ struct IOFormat : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator), rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags) { + // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline + // don't add rowSpacer if columns are not to be aligned + if((flags & DontAlignCols)) + return; int i = int(matSuffix.length())-1; while (i>=0 && matSuffix[i]!='\n') { @@ -160,7 +164,6 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& typename Derived::Nested m = _m; typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; Index width = 0; @@ -185,21 +188,22 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& explicit_precision = fmt.precision; } + std::streamsize old_precision = 0; + if(explicit_precision) old_precision = s.precision(explicit_precision); + bool align_cols = !(fmt.flags & DontAlignCols); if(align_cols) { // compute the largest width - for(Index j = 1; j < m.cols(); ++j) + for(Index j = 0; j < m.cols(); ++j) for(Index i = 0; i < m.rows(); ++i) { std::stringstream sstr; - if(explicit_precision) sstr.precision(explicit_precision); + sstr.copyfmt(s); sstr << m.coeff(i,j); width = std::max(width, Index(sstr.str().length())); } } - std::streamsize old_precision = 0; - if(explicit_precision) old_precision = s.precision(explicit_precision); s << fmt.matPrefix; for(Index i = 0; i < m.rows(); ++i) { diff --git a/nuparu/include/Eigen/src/Core/Inverse.h b/nuparu/include/Eigen/src/Core/Inverse.h new file mode 100644 index 00000000..f3ec8499 --- /dev/null +++ b/nuparu/include/Eigen/src/Core/Inverse.h @@ -0,0 +1,117 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_H +#define EIGEN_INVERSE_H + +namespace Eigen { + +template class InverseImpl; + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename XprType::PlainObject PlainObject; + typedef traits BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit + }; +}; + +} // end namespace internal + +/** \class Inverse + * + * \brief Expression of the inverse of another expression + * + * \tparam XprType the type of the expression we are taking the inverse + * + * This class represents an abstract expression of A.inverse() + * and most of the time this is the only way it is used. + * + */ +template +class Inverse : public InverseImpl::StorageKind> +{ +public: + typedef typename XprType::StorageIndex StorageIndex; + typedef typename XprType::PlainObject PlainObject; + typedef typename internal::ref_selector::type XprTypeNested; + typedef typename internal::remove_all::type XprTypeNestedCleaned; + typedef typename internal::ref_selector::type Nested; + typedef typename internal::remove_all::type NestedExpression; + + explicit Inverse(const XprType &xpr) + : m_xpr(xpr) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + + EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + +protected: + XprTypeNested m_xpr; +}; + +// Generic API dispatcher +template +class InverseImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; + typedef typename XprType::Scalar Scalar; +private: + + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +/** \internal + * \brief Default evaluator for Inverse expression. + * + * This default evaluator for Inverse expression simply evaluate the inverse into a temporary + * by a call to internal::call_assignment_no_alias. + * Therefore, inverse implementers only have to specialize Assignment, ...> for + * there own nested expression. + * + * \sa class Inverse + */ +template +struct unary_evaluator > + : public evaluator::PlainObject> +{ + typedef Inverse InverseType; + typedef typename InverseType::PlainObject PlainObject; + typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + unary_evaluator(const InverseType& inv_xpr) + : m_result(inv_xpr.rows(), inv_xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + internal::call_assignment_no_alias(m_result, inv_xpr); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_H diff --git a/nuparu/include/Eigen/src/Core/Map.h b/nuparu/include/Eigen/src/Core/Map.h index f804c89d..3a8375da 100644 --- a/nuparu/include/Eigen/src/Core/Map.h +++ b/nuparu/include/Eigen/src/Core/Map.h @@ -19,7 +19,7 @@ namespace Eigen { * \brief A matrix or vector expression mapping an existing array of data. * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout * of an ordinary, contiguous array. This can be overridden by specifying strides. @@ -70,8 +70,6 @@ struct traits > : public traits { typedef traits TraitsBase; - typedef typename PlainObjectType::Index Index; - typedef typename PlainObjectType::Scalar Scalar; enum { InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 ? int(PlainObjectType::InnerStrideAtCompileTime) @@ -79,22 +77,9 @@ struct traits > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), - HasNoInnerStride = InnerStrideAtCompileTime == 1, - HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, - HasNoStride = HasNoInnerStride && HasNoOuterStride, - IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), - IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - KeepsPacketAccess = bool(HasNoInnerStride) - && ( bool(IsDynamicSize) - || HasNoOuterStride - || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), + Alignment = int(MapOptions)&int(AlignedMask), Flags0 = TraitsBase::Flags & (~NestByRefBit), - Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), - Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags1) : int(Flags1 & ~LinearAccessBit), - Flags3 = is_lvalue::value ? int(Flags2) : (int(Flags2) & ~LvalueBit), - Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit) + Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) }; private: enum { Options }; // Expressions don't have Options @@ -110,19 +95,17 @@ template class Ma EIGEN_DENSE_PUBLIC_INTERFACE(Map) typedef typename Base::PointerType PointerType; -#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API - typedef const Scalar* PointerArgType; - inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast(ptr); } -#else typedef PointerType PointerArgType; + EIGEN_DEVICE_FUNC inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } -#endif + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() @@ -134,10 +117,11 @@ template class Ma /** Constructor in the fixed-size case. * * \param dataPtr pointer to the array to map - * \param a_stride optional Stride object, passing the strides. + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride) + EIGEN_DEVICE_FUNC + explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -145,11 +129,12 @@ template class Ma /** Constructor in the dynamic-size vector case. * * \param dataPtr pointer to the array to map - * \param a_size the size of the vector expression - * \param a_stride optional Stride object, passing the strides. + * \param size the size of the vector expression + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride) + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -157,12 +142,13 @@ template class Ma /** Constructor in the dynamic-size matrix case. * * \param dataPtr pointer to the array to map - * \param nbRows the number of rows of the matrix expression - * \param nbCols the number of columns of the matrix expression - * \param a_stride optional Stride object, passing the strides. + * \param rows the number of rows of the matrix expression + * \param cols the number of columns of the matrix expression + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride) + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -173,19 +159,6 @@ template class Ma StrideType m_stride; }; -template -inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> - ::Array(const Scalar *data) -{ - this->_set_noalias(Eigen::Map(data)); -} - -template -inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> - ::Matrix(const Scalar *data) -{ - this->_set_noalias(Eigen::Map(data)); -} } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/MapBase.h b/nuparu/include/Eigen/src/Core/MapBase.h index 6876de58..75a80daa 100644 --- a/nuparu/include/Eigen/src/Core/MapBase.h +++ b/nuparu/include/Eigen/src/Core/MapBase.h @@ -12,7 +12,7 @@ #define EIGEN_MAPBASE_H #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ - EIGEN_STATIC_ASSERT((int(internal::traits::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + EIGEN_STATIC_ASSERT((int(internal::evaluator::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) namespace Eigen { @@ -37,7 +37,6 @@ template class MapBase }; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -76,8 +75,8 @@ template class MapBase typedef typename Base::CoeffReturnType CoeffReturnType; - inline Index rows() const { return m_rows.value(); } - inline Index cols() const { return m_cols.value(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); } /** Returns a pointer to the first coefficient of the matrix or vector. * @@ -85,24 +84,28 @@ template class MapBase * * \sa innerStride(), outerStride() */ - inline const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const { return m_data[colId * colStride() + rowId * rowStride()]; } + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index index) const { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) return m_data[index * innerStride()]; } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return this->m_data[colId * colStride() + rowId * rowStride()]; } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) @@ -123,12 +126,14 @@ template class MapBase return internal::ploadt(m_data + index * innerStride()); } - inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) + EIGEN_DEVICE_FUNC + explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) checkSanity(); } + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : m_data(dataPtr), m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)), @@ -140,24 +145,28 @@ template class MapBase checkSanity(); } - inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) - : m_data(dataPtr), m_rows(nbRows), m_cols(nbCols) + EIGEN_DEVICE_FUNC + inline MapBase(PointerType dataPtr, Index rows, Index cols) + : m_data(dataPtr), m_rows(rows), m_cols(cols) { eigen_assert( (dataPtr == 0) - || ( nbRows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows) - && nbCols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols))); + || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); checkSanity(); } + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + protected: + EIGEN_DEVICE_FUNC void checkSanity() const { - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits::Flags&PacketAccessBit, - internal::inner_stride_at_compile_time::ret==1), - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); - eigen_assert(EIGEN_IMPLIES(internal::traits::Flags&AlignedBit, (size_t(m_data) % 16) == 0) - && "data is not aligned"); +#if EIGEN_MAX_ALIGN_BYTES>0 + eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits::Alignment)) == 0) && "data is not aligned"); +#endif } PointerType m_data; @@ -168,13 +177,14 @@ template class MapBase template class MapBase : public MapBase { + typedef MapBase ReadOnlyMapBase; public: typedef MapBase Base; typedef typename Base::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; - typedef typename Base::Index Index; + typedef typename Base::StorageIndex StorageIndex; typedef typename Base::PointerType PointerType; using Base::derived; @@ -195,14 +205,18 @@ template class MapBase const Scalar >::type ScalarWithConstIfNotLvalue; + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return this->m_data; } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) { return this->m_data[col * colStride() + row * rowStride()]; } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index index) { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) @@ -224,19 +238,24 @@ template class MapBase (this->m_data + index * innerStride(), val); } - explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} - inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} - inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {} + EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {} + EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) { - Base::Base::operator=(other); + ReadOnlyMapBase::Base::operator=(other); return derived(); } - using Base::Base::operator=; + // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base, + // see bugs 821 and 920. + using ReadOnlyMapBase::Base::operator=; }; +#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS + } // end namespace Eigen #endif // EIGEN_MAPBASE_H diff --git a/nuparu/include/Eigen/src/Core/MathFunctions.h b/nuparu/include/Eigen/src/Core/MathFunctions.h index 2bfc5ebd..48cf565f 100644 --- a/nuparu/include/Eigen/src/Core/MathFunctions.h +++ b/nuparu/include/Eigen/src/Core/MathFunctions.h @@ -10,8 +10,20 @@ #ifndef EIGEN_MATHFUNCTIONS_H #define EIGEN_MATHFUNCTIONS_H +// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html +#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406 + namespace Eigen { +// On WINCE, std::abs is defined for int only, so let's defined our own overloads: +// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too. +#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500 +long abs(long x) { return (labs(x)); } +double abs(double x) { return (fabs(x)); } +float abs(float x) { return (fabsf(x)); } +long double abs(long double x) { return (fabsl(x)); } +#endif + namespace internal { /** \internal \struct global_math_functions_filtering_base @@ -62,6 +74,7 @@ template::IsComplex> struct real_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x; @@ -72,6 +85,7 @@ template struct real_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { using std::real; @@ -87,7 +101,6 @@ struct real_retval typedef typename NumTraits::Real type; }; - /**************************************************************************** * Implementation of imag * ****************************************************************************/ @@ -96,6 +109,7 @@ template::IsComplex> struct imag_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar&) { return RealScalar(0); @@ -106,6 +120,7 @@ template struct imag_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { using std::imag; @@ -129,10 +144,12 @@ template struct real_ref_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[0]; } + EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { return reinterpret_cast(&x)[0]; @@ -153,10 +170,12 @@ template struct imag_ref_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[1]; } + EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { return reinterpret_cast(&x)[1]; @@ -166,10 +185,12 @@ struct imag_ref_default_impl template struct imag_ref_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(Scalar&) { return Scalar(0); } + EIGEN_DEVICE_FUNC static inline const Scalar run(const Scalar&) { return Scalar(0); @@ -192,6 +213,7 @@ struct imag_ref_retval template::IsComplex> struct conj_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { return x; @@ -201,6 +223,7 @@ struct conj_impl template struct conj_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { using std::conj; @@ -218,25 +241,39 @@ struct conj_retval * Implementation of abs2 * ****************************************************************************/ -template -struct abs2_impl +template +struct abs2_impl_default { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x*x; } }; -template -struct abs2_impl > +template +struct abs2_impl_default // IsComplex { - static inline RealScalar run(const std::complex& x) + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) { return real(x)*real(x) + imag(x)*imag(x); } }; +template +struct abs2_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return abs2_impl_default::IsComplex>::run(x); + } +}; + template struct abs2_retval { @@ -251,9 +288,10 @@ template struct norm1_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { - using std::abs; + EIGEN_USING_STD_MATH(abs); return abs(real(x)) + abs(imag(x)); } }; @@ -261,9 +299,10 @@ struct norm1_default_impl template struct norm1_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { - using std::abs; + EIGEN_USING_STD_MATH(abs); return abs(x); } }; @@ -287,16 +326,22 @@ struct hypot_impl typedef typename NumTraits::Real RealScalar; static inline RealScalar run(const Scalar& x, const Scalar& y) { - using std::max; - using std::min; - using std::abs; - using std::sqrt; + EIGEN_USING_STD_MATH(abs); + EIGEN_USING_STD_MATH(sqrt); RealScalar _x = abs(x); RealScalar _y = abs(y); - RealScalar p = (max)(_x, _y); - if(p==RealScalar(0)) return 0; - RealScalar q = (min)(_x, _y); - RealScalar qp = q/p; + Scalar p, qp; + if(_x>_y) + { + p = _x; + qp = _y / p; + } + else + { + p = _y; + qp = _x / p; + } + if(p==RealScalar(0)) return RealScalar(0); return p * sqrt(RealScalar(1) + qp*qp); } }; @@ -314,6 +359,7 @@ struct hypot_retval template struct cast_impl { + EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x) { return static_cast(x); @@ -323,48 +369,121 @@ struct cast_impl // here, for once, we're plainly returning NewType: we don't want cast to do weird things. template +EIGEN_DEVICE_FUNC inline NewType cast(const OldType& x) { return cast_impl::run(x); } /**************************************************************************** -* Implementation of atanh2 * +* Implementation of round * ****************************************************************************/ -template -struct atanh2_default_impl -{ - typedef Scalar retval; - typedef typename NumTraits::Real RealScalar; - static inline Scalar run(const Scalar& x, const Scalar& y) +#if EIGEN_HAS_CXX11_MATH + template + struct round_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + using std::round; + return round(x); + } + }; +#else + template + struct round_impl { - using std::abs; - using std::log; - using std::sqrt; - Scalar z = x / y; - if (y == Scalar(0) || abs(z) > sqrt(NumTraits::epsilon())) - return RealScalar(0.5) * log((y + x) / (y - x)); - else - return z + z*z*z / RealScalar(3); - } + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + EIGEN_USING_STD_MATH(floor); + EIGEN_USING_STD_MATH(ceil); + return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5)); + } + }; +#endif + +template +struct round_retval +{ + typedef Scalar type; }; +/**************************************************************************** +* Implementation of arg * +****************************************************************************/ + +#if EIGEN_HAS_CXX11_MATH + template + struct arg_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; +#else + template::IsComplex> + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); } + }; + + template + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; + + template struct arg_impl : arg_default_impl {}; +#endif + template -struct atanh2_default_impl +struct arg_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of log1p * +****************************************************************************/ +template::IsComplex > +struct log1p_impl { - static inline Scalar run(const Scalar&, const Scalar&) + static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - return Scalar(0); + typedef typename NumTraits::Real RealScalar; + EIGEN_USING_STD_MATH(log); + Scalar x1p = RealScalar(1) + x; + return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); } }; +#if EIGEN_HAS_CXX11_MATH template -struct atanh2_impl : atanh2_default_impl::IsInteger> {}; +struct log1p_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + using std::log1p; + return log1p(x); + } +}; +#endif template -struct atanh2_retval +struct log1p_retval { typedef Scalar type; }; @@ -379,7 +498,7 @@ struct pow_default_impl typedef Scalar retval; static inline Scalar run(const Scalar& x, const Scalar& y) { - using std::pow; + EIGEN_USING_STD_MATH(pow); return pow(x, y); } }; @@ -447,48 +566,48 @@ struct random_default_impl }; enum { - floor_log2_terminate, - floor_log2_move_up, - floor_log2_move_down, - floor_log2_bogus + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus }; -template struct floor_log2_selector +template struct meta_floor_log2_selector { enum { middle = (lower + upper) / 2, - value = (upper <= lower + 1) ? int(floor_log2_terminate) - : (n < (1 << middle)) ? int(floor_log2_move_down) - : (n==0) ? int(floor_log2_bogus) - : int(floor_log2_move_up) + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) }; }; template::value> -struct floor_log2 {}; + int selector = meta_floor_log2_selector::value> +struct meta_floor_log2 {}; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle>::value }; + enum { value = meta_floor_log2::middle>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle, upper>::value }; + enum { value = meta_floor_log2::middle, upper>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; }; template -struct floor_log2 +struct meta_floor_log2 { // no value, error at compile time }; @@ -496,11 +615,22 @@ struct floor_log2 template struct random_default_impl { - typedef typename NumTraits::NonInteger NonInteger; - static inline Scalar run(const Scalar& x, const Scalar& y) - { - return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1))); + { + typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; + if(y range); + + return Scalar(ScalarX(x) + offset); } static inline Scalar run() @@ -508,7 +638,7 @@ struct random_default_impl #ifdef EIGEN_MAKING_DOCS return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); #else - enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value, + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, scalar_bits = sizeof(Scalar) * CHAR_BIT, shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 @@ -545,97 +675,322 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); } +// Implementatin of is* functions + +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isnan_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isinf_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isfinite_impl(const T&) { return true; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isfinite_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isfinite; + return isfinite EIGEN_NOT_A_MACRO (x); + #else + return x::highest() && x>NumTraits::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isinf_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isnan_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isnan; + return isnan EIGEN_NOT_A_MACRO (x); + #else + return x != x; + #endif +} + +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) +{ + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; +} + +//MSVC defines a _isnan builtin function, but for double only +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } + +EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) +#else + // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), + // while the second prevent too aggressive optimizations in fast-math mode: + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); } + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + +// The following overload are defined at the end of this file +template bool isfinite_impl(const std::complex& x); +template bool isnan_impl(const std::complex& x); +template bool isinf_impl(const std::complex& x); + } // end namespace internal /**************************************************************************** -* Generic math function * +* Generic math functions * ****************************************************************************/ namespace numext { +#ifndef __CUDA_ARCH__ +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(min); + return min EIGEN_NOT_A_MACRO (x,y); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(max); + return max EIGEN_NOT_A_MACRO (x,y); +} +#else +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + return y < x ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) +{ + return fmin(x, y); +} +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + return x < y ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) +{ + return fmax(x, y); +} +#endif + + template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x) { return internal::real_ref_impl::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) { return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x) { return internal::imag_ref_impl::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x) { return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y) { return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); } template -inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y) +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y); + return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) { return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } -// std::isfinite is non standard, so let's define our own version, -// even though it is not very efficient. -template bool (isfinite)(const T& x) +template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } +template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } +template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +T (floor)(const T& x) +{ + EIGEN_USING_STD_MATH(floor); + return floor(x); +} + +template +EIGEN_DEVICE_FUNC +T (ceil)(const T& x) { - return x::highest() && x>NumTraits::lowest(); + EIGEN_USING_STD_MATH(ceil); + return ceil(x); +} + +// Log base 2 for 32 bits positive integers. +// Conveniently returns 0 for x==0. +inline int log2(int x) +{ + eigen_assert(x>=0); + unsigned int v(x); + static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return table[(v * 0x07C4ACDDU) >> 27]; } } // end namespace numext namespace internal { +template +bool isfinite_impl(const std::complex& x) +{ + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +bool isnan_impl(const std::complex& x) +{ + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +bool isinf_impl(const std::complex& x) +{ + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + /**************************************************************************** * Implementation of fuzzy comparisons * ****************************************************************************/ @@ -649,18 +1004,19 @@ template struct scalar_fuzzy_default_impl { typedef typename NumTraits::Real RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) { - using std::abs; + EIGEN_USING_STD_MATH(abs); return abs(x) <= abs(y) * prec; } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - using std::min; - using std::abs; - return abs(x - y) <= (min)(abs(x), abs(y)) * prec; + EIGEN_USING_STD_MATH(abs); + return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) { return x <= y || isApprox(x, y, prec); @@ -671,15 +1027,17 @@ template struct scalar_fuzzy_default_impl { typedef typename NumTraits::Real RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&) { return x == Scalar(0); } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&) { return x == y; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&) { return x <= y; @@ -697,29 +1055,28 @@ struct scalar_fuzzy_default_impl } static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - using std::min; - return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec; + return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; } }; template struct scalar_fuzzy_impl : scalar_fuzzy_default_impl::IsComplex, NumTraits::IsInteger> {}; -template +template EIGEN_DEVICE_FUNC inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, typename NumTraits::Real precision = NumTraits::dummy_precision()) { return scalar_fuzzy_impl::template isMuchSmallerThan(x, y, precision); } -template +template EIGEN_DEVICE_FUNC inline bool isApprox(const Scalar& x, const Scalar& y, typename NumTraits::Real precision = NumTraits::dummy_precision()) { return scalar_fuzzy_impl::isApprox(x, y, precision); } -template +template EIGEN_DEVICE_FUNC inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, typename NumTraits::Real precision = NumTraits::dummy_precision()) { @@ -742,17 +1099,19 @@ template<> struct scalar_fuzzy_impl { typedef bool RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { return x == y; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&) { return (!x) || y; diff --git a/nuparu/include/Eigen/src/Core/Matrix.h b/nuparu/include/Eigen/src/Core/Matrix.h index 0ba5d90c..ce1b70d2 100644 --- a/nuparu/include/Eigen/src/Core/Matrix.h +++ b/nuparu/include/Eigen/src/Core/Matrix.h @@ -24,13 +24,13 @@ namespace Eigen { * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note"). * * The first three template parameters are required: - * \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex. - * User defined sclar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). * \tparam _Rows Number of rows, or \b Dynamic * \tparam _Cols Number of columns, or \b Dynamic * * The remaining template parameters are optional -- in most cases you don't have to worry about them. - * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either * \b #AutoAlign or \b #DontAlign. * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required * for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size. @@ -97,6 +97,40 @@ namespace Eigen { * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic. * * + * ABI and storage layout + * + * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3. + * + * + * + * + * + * + *
Matrix typeEquivalent C structure
\code Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index rows, cols; + * }; + * \endcode
\code + * Matrix + * Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index size; + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0 + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0 + * Eigen::Index rows, cols; + * }; + * \endcode
+ * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two + * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES. + * * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, * \ref TopicStorageOrders */ @@ -105,9 +139,23 @@ namespace internal { template struct traits > { +private: + enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret }; + typedef typename find_best_packet<_Scalar,size>::type PacketScalar; + enum { + row_major_bit = _Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, + default_alignment = compute_default_alignment<_Scalar,max_size>::value, + actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, + required_alignment = unpacket_traits::alignment, + packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 + }; + +public: typedef _Scalar Scalar; typedef Dense StorageKind; - typedef DenseIndex Index; + typedef Eigen::Index StorageIndex; typedef MatrixXpr XprKind; enum { RowsAtCompileTime = _Rows, @@ -115,10 +163,13 @@ struct traits > MaxRowsAtCompileTime = _MaxRows, MaxColsAtCompileTime = _MaxCols, Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, - CoeffReadCost = NumTraits::ReadCost, Options = _Options, InnerStrideAtCompileTime = 1, - OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment }; }; } @@ -151,6 +202,7 @@ class Matrix * * \callgraph */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other) { return Base::_set(other); @@ -167,7 +219,8 @@ class Matrix * remain row-vectors and vectors remain vectors. */ template - EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase& other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase& other) { return Base::_set(other); } @@ -179,12 +232,14 @@ class Matrix * \copydetails DenseBase::operator=(const EigenBase &other) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase &other) { return Base::operator=(other); } template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue& func) { return Base::operator=(func); @@ -200,6 +255,7 @@ class Matrix * * \sa resize(Index,Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix() : Base() { Base::_check_template_params(); @@ -207,45 +263,87 @@ class Matrix } // FIXME is it still needed - Matrix(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + explicit Matrix(internal::constructor_without_unaligned_array_assert) : Base(internal::constructor_without_unaligned_array_assert()) { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } - /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors - * - * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, - * it is redundant to pass the dimension here, so it makes more sense to use the default - * constructor Matrix() instead. - */ - EIGEN_STRONG_INLINE explicit Matrix(Index dim) - : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim) +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Matrix(Matrix&& other) + : Base(std::move(other)) { Base::_check_template_params(); - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix) - eigen_assert(dim >= 0); - eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); } + EIGEN_DEVICE_FUNC + Matrix& operator=(Matrix&& other) + { + other.swap(*this); + return *this; + } +#endif #ifndef EIGEN_PARSED_BY_DOXYGEN + + // This constructor is for both 1x1 matrices and dynamic vectors + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Matrix(const T& x) + { + Base::_check_template_params(); + Base::template _init1(x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) { Base::_check_template_params(); Base::template _init2(x, y); } #else + /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC + explicit Matrix(const Scalar *data); + + /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * This is useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance, + * calling Matrix(1) will call the initialization constructor: Matrix(const Scalar&). + * For fixed-size \c 1x1 matrices it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_STRONG_INLINE explicit Matrix(Index dim); + /** \brief Constructs an initialized 1x1 matrix with the given coefficient */ + Matrix(const Scalar& x); /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns. * * This is useful for dynamic-size matrices. For fixed-size matrices, * it is redundant to pass these parameters, so one should use the default constructor - * Matrix() instead. */ + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance, + * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y). + * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_DEVICE_FUNC Matrix(Index rows, Index cols); + /** \brief Constructs an initialized 2D vector with given coefficients */ Matrix(const Scalar& x, const Scalar& y); #endif /** \brief Constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) { Base::_check_template_params(); @@ -255,6 +353,7 @@ class Matrix m_storage.data()[2] = z; } /** \brief Constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) { Base::_check_template_params(); @@ -265,76 +364,33 @@ class Matrix m_storage.data()[3] = w; } - explicit Matrix(const Scalar *data); - /** \brief Constructor copying the value of the expression \a other */ - template - EIGEN_STRONG_INLINE Matrix(const MatrixBase& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - // This test resides here, to bring the error messages closer to the user. Normally, these checks - // are performed deeply within the library, thus causing long and scary error traces. - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::_check_template_params(); - Base::_set_noalias(other); - } /** \brief Copy constructor */ - EIGEN_STRONG_INLINE Matrix(const Matrix& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } - /** \brief Copy constructor with in-place evaluation */ - template - EIGEN_STRONG_INLINE Matrix(const ReturnByValue& other) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - other.evalTo(*this); - } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other) + { } /** \brief Copy constructor for generic expressions. * \sa MatrixBase::operator=(const EigenBase&) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase &other) - : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to - // go for pure _set() implementations, right? - *this = other; - } - - /** \internal - * \brief Override MatrixBase::swap() since for dynamic-sized matrices - * of same type it is enough to swap the data pointers. - */ - template - void swap(MatrixBase const & other) - { this->_swap(other.derived()); } + : Base(other.derived()) + { } - inline Index innerStride() const { return 1; } - inline Index outerStride() const { return this->innerSize(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } /////////// Geometry module /////////// template + EIGEN_DEVICE_FUNC explicit Matrix(const RotationBase& r); template + EIGEN_DEVICE_FUNC Matrix& operator=(const RotationBase& r); - #ifdef EIGEN2_SUPPORT - template - explicit Matrix(const eigen2_RotationBase& r); - template - Matrix& operator=(const eigen2_RotationBase& r); - #endif - // allow to extend Matrix outside Eigen #ifdef EIGEN_MATRIX_PLUGIN #include EIGEN_MATRIX_PLUGIN diff --git a/nuparu/include/Eigen/src/Core/MatrixBase.h b/nuparu/include/Eigen/src/Core/MatrixBase.h index 9193b6ab..9d612c85 100644 --- a/nuparu/include/Eigen/src/Core/MatrixBase.h +++ b/nuparu/include/Eigen/src/Core/MatrixBase.h @@ -52,7 +52,7 @@ template class MatrixBase #ifndef EIGEN_PARSED_BY_DOXYGEN typedef MatrixBase StorageBaseType; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -66,8 +66,7 @@ template class MatrixBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -81,6 +80,8 @@ template class MatrixBase using Base::operator-=; using Base::operator*=; using Base::operator/=; + using Base::operator*; + using Base::operator/; typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; @@ -98,25 +99,14 @@ template class MatrixBase /** \returns the size of the main diagonal, which is min(rows(),cols()). * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC inline Index diagonalSize() const { return (std::min)(rows(),cols()); } - /** \brief The plain matrix type corresponding to this expression. - * - * This is not necessarily exactly the return type of eval(). In the case of plain matrices, - * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed - * that the return type of eval() is either PlainObject or const PlainObject&. - */ - typedef Matrix::Scalar, - internal::traits::RowsAtCompileTime, - internal::traits::ColsAtCompileTime, - AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), - internal::traits::MaxRowsAtCompileTime, - internal::traits::MaxColsAtCompileTime - > PlainObject; + typedef typename Base::PlainObject PlainObject; #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; /** \internal the return type of MatrixBase::adjoint() */ typedef typename internal::conditional::IsComplex, CwiseUnaryOp, ConstTransposeReturnType>, @@ -125,7 +115,7 @@ template class MatrixBase /** \internal Return type of eigenvalues() */ typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; /** \internal the return type of identity */ - typedef CwiseNullaryOp,Derived> IdentityReturnType; + typedef CwiseNullaryOp,PlainObject> IdentityReturnType; /** \internal the return type of unit vectors */ typedef Block, SquareMatrixType>, internal::traits::RowsAtCompileTime, @@ -145,39 +135,48 @@ template class MatrixBase /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase& other); // We cannot inherit here via Base::operator= since it is causing // trouble with MSVC. template + EIGEN_DEVICE_FUNC Derived& operator=(const DenseBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& other); -#ifndef EIGEN_PARSED_BY_DOXYGEN - template - Derived& lazyAssign(const ProductBase& other); - - template - Derived& lazyAssign(const MatrixPowerProduct& other); -#endif // not EIGEN_PARSED_BY_DOXYGEN - template + EIGEN_DEVICE_FUNC Derived& operator+=(const MatrixBase& other); template + EIGEN_DEVICE_FUNC Derived& operator-=(const MatrixBase& other); +#ifdef __CUDACC__ template - const typename ProductReturnType::Type + EIGEN_DEVICE_FUNC + const Product + operator*(const MatrixBase &other) const + { return this->lazyProduct(other); } +#else + + template + const Product operator*(const MatrixBase &other) const; +#endif + template - const typename LazyProductReturnType::Type + EIGEN_DEVICE_FUNC + const Product lazyProduct(const MatrixBase &other) const; template @@ -190,88 +189,91 @@ template class MatrixBase void applyOnTheRight(const EigenBase& other); template - const DiagonalProduct + EIGEN_DEVICE_FUNC + const Product operator*(const DiagonalBase &diagonal) const; template + EIGEN_DEVICE_FUNC typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType dot(const MatrixBase& other) const; - #ifdef EIGEN2_SUPPORT - template - Scalar eigen2_dot(const MatrixBase& other) const; - #endif - - RealScalar squaredNorm() const; - RealScalar norm() const; + EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; + EIGEN_DEVICE_FUNC RealScalar norm() const; RealScalar stableNorm() const; RealScalar blueNorm() const; RealScalar hypotNorm() const; - const PlainObject normalized() const; - void normalize(); + EIGEN_DEVICE_FUNC const PlainObject normalized() const; + EIGEN_DEVICE_FUNC void normalize(); - const AdjointReturnType adjoint() const; - void adjointInPlace(); + EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; + EIGEN_DEVICE_FUNC void adjointInPlace(); typedef Diagonal DiagonalReturnType; + EIGEN_DEVICE_FUNC DiagonalReturnType diagonal(); - typedef typename internal::add_const >::type ConstDiagonalReturnType; + + typedef typename internal::add_const >::type ConstDiagonalReturnType; + EIGEN_DEVICE_FUNC ConstDiagonalReturnType diagonal() const; template struct DiagonalIndexReturnType { typedef Diagonal Type; }; template struct ConstDiagonalIndexReturnType { typedef const Diagonal Type; }; - template typename DiagonalIndexReturnType::Type diagonal(); - template typename ConstDiagonalIndexReturnType::Type diagonal() const; - - // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations. - // On the other hand they confuse MSVC8... - #if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later - typename MatrixBase::template DiagonalIndexReturnType::Type diagonal(Index index); - typename MatrixBase::template ConstDiagonalIndexReturnType::Type diagonal(Index index) const; - #else - typename DiagonalIndexReturnType::Type diagonal(Index index); - typename ConstDiagonalIndexReturnType::Type diagonal(Index index) const; - #endif - - #ifdef EIGEN2_SUPPORT - template typename internal::eigen2_part_return_type::type part(); - template const typename internal::eigen2_part_return_type::type part() const; + template + EIGEN_DEVICE_FUNC + typename DiagonalIndexReturnType::Type diagonal(); + + template + EIGEN_DEVICE_FUNC + typename ConstDiagonalIndexReturnType::Type diagonal() const; - // huuuge hack. make Eigen2's matrix.part() work in eigen3. Problem: Diagonal is now a class template instead - // of an integer constant. Solution: overload the part() method template wrt template parameters list. - template class U> - const DiagonalWrapper part() const - { return diagonal().asDiagonal(); } - #endif // EIGEN2_SUPPORT + typedef Diagonal DiagonalDynamicIndexReturnType; + typedef typename internal::add_const >::type ConstDiagonalDynamicIndexReturnType; + + EIGEN_DEVICE_FUNC + DiagonalDynamicIndexReturnType diagonal(Index index); + EIGEN_DEVICE_FUNC + ConstDiagonalDynamicIndexReturnType diagonal(Index index) const; template struct TriangularViewReturnType { typedef TriangularView Type; }; template struct ConstTriangularViewReturnType { typedef const TriangularView Type; }; - template typename TriangularViewReturnType::Type triangularView(); - template typename ConstTriangularViewReturnType::Type triangularView() const; + template + EIGEN_DEVICE_FUNC + typename TriangularViewReturnType::Type triangularView(); + template + EIGEN_DEVICE_FUNC + typename ConstTriangularViewReturnType::Type triangularView() const; template struct SelfAdjointViewReturnType { typedef SelfAdjointView Type; }; template struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView Type; }; - template typename SelfAdjointViewReturnType::Type selfadjointView(); - template typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + template + EIGEN_DEVICE_FUNC + typename SelfAdjointViewReturnType::Type selfadjointView(); + template + EIGEN_DEVICE_FUNC + typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; const SparseView sparseView(const Scalar& m_reference = Scalar(0), const typename NumTraits::Real& m_epsilon = NumTraits::dummy_precision()) const; - static const IdentityReturnType Identity(); - static const IdentityReturnType Identity(Index rows, Index cols); - static const BasisReturnType Unit(Index size, Index i); - static const BasisReturnType Unit(Index i); - static const BasisReturnType UnitX(); - static const BasisReturnType UnitY(); - static const BasisReturnType UnitZ(); - static const BasisReturnType UnitW(); - + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(); + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitX(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitY(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitW(); + + EIGEN_DEVICE_FUNC const DiagonalWrapper asDiagonal() const; const PermutationWrapper asPermutation() const; + EIGEN_DEVICE_FUNC Derived& setIdentity(); + EIGEN_DEVICE_FUNC Derived& setIdentity(Index rows, Index cols); bool isIdentity(const RealScalar& prec = NumTraits::dummy_precision()) const; @@ -303,59 +305,49 @@ template class MatrixBase NoAlias noalias(); - inline const ForceAlignedAccess forceAlignedAccess() const; - inline ForceAlignedAccess forceAlignedAccess(); - template inline typename internal::add_const_on_value_type,Derived&>::type>::type forceAlignedAccessIf() const; - template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + // TODO forceAlignedAccess is temporarily disabled + // Need to find a nicer workaround. + inline const Derived& forceAlignedAccess() const { return derived(); } + inline Derived& forceAlignedAccess() { return derived(); } + template inline const Derived& forceAlignedAccessIf() const { return derived(); } + template inline Derived& forceAlignedAccessIf() { return derived(); } - Scalar trace() const; + EIGEN_DEVICE_FUNC Scalar trace() const; -/////////// Array module /////////// + template EIGEN_DEVICE_FUNC RealScalar lpNorm() const; - template RealScalar lpNorm() const; - - MatrixBase& matrix() { return *this; } - const MatrixBase& matrix() const { return *this; } + EIGEN_DEVICE_FUNC MatrixBase& matrix() { return *this; } + EIGEN_DEVICE_FUNC const MatrixBase& matrix() const { return *this; } /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - ArrayWrapper array() { return derived(); } - const ArrayWrapper array() const { return derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return ArrayWrapper(derived()); } + /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { return ArrayWrapper(derived()); } /////////// LU module /////////// - const FullPivLU fullPivLu() const; - const PartialPivLU partialPivLu() const; + EIGEN_DEVICE_FUNC + inline const FullPivLU fullPivLu() const; + EIGEN_DEVICE_FUNC + inline const PartialPivLU partialPivLu() const; - #if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS - const LU lu() const; - #endif + EIGEN_DEVICE_FUNC + inline const PartialPivLU lu() const; - #ifdef EIGEN2_SUPPORT - const LU eigen2_lu() const; - #endif - - #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - const PartialPivLU lu() const; - #endif + EIGEN_DEVICE_FUNC + inline const Inverse inverse() const; - #ifdef EIGEN2_SUPPORT template - void computeInverse(MatrixBase *result) const { - *result = this->inverse(); - } - #endif - - const internal::inverse_impl inverse() const; - template - void computeInverseAndDetWithCheck( + inline void computeInverseAndDetWithCheck( ResultType& inverse, typename ResultType::Scalar& determinant, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() ) const; template - void computeInverseWithCheck( + inline void computeInverseWithCheck( ResultType& inverse, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() @@ -364,29 +356,24 @@ template class MatrixBase /////////// Cholesky module /////////// - const LLT llt() const; - const LDLT ldlt() const; + inline const LLT llt() const; + inline const LDLT ldlt() const; /////////// QR module /////////// - const HouseholderQR householderQr() const; - const ColPivHouseholderQR colPivHouseholderQr() const; - const FullPivHouseholderQR fullPivHouseholderQr() const; - - #ifdef EIGEN2_SUPPORT - const QR qr() const; - #endif + inline const HouseholderQR householderQr() const; + inline const ColPivHouseholderQR colPivHouseholderQr() const; + inline const FullPivHouseholderQR fullPivHouseholderQr() const; - EigenvaluesReturnType eigenvalues() const; - RealScalar operatorNorm() const; +/////////// Eigenvalues module /////////// -/////////// SVD module /////////// + inline EigenvaluesReturnType eigenvalues() const; + inline RealScalar operatorNorm() const; - JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; +/////////// SVD module /////////// - #ifdef EIGEN2_SUPPORT - SVD svd() const; - #endif + inline JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + inline BDCSVD bdcSvd(unsigned int computationOptions = 0) const; /////////// Geometry module /////////// @@ -398,20 +385,25 @@ template class MatrixBase }; #endif // EIGEN_PARSED_BY_DOXYGEN template - typename cross_product_return_type::type + EIGEN_DEVICE_FUNC + inline typename cross_product_return_type::type cross(const MatrixBase& other) const; + template - PlainObject cross3(const MatrixBase& other) const; - PlainObject unitOrthogonal(void) const; - Matrix eulerAngles(Index a0, Index a1, Index a2) const; + EIGEN_DEVICE_FUNC + inline PlainObject cross3(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC + inline PlainObject unitOrthogonal(void) const; - #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - ScalarMultipleReturnType operator*(const UniformScaling& s) const; + inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; + + inline ScalarMultipleReturnType operator*(const UniformScaling& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) - enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal }; + enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) + : ColsAtCompileTime==1 ? Vertical : Horizontal }; typedef Homogeneous HomogeneousReturnType; - HomogeneousReturnType homogeneous() const; - #endif + inline HomogeneousReturnType homogeneous() const; enum { SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 @@ -422,7 +414,7 @@ template class MatrixBase typedef CwiseUnaryOp::Scalar>, const ConstStartMinusOne > HNormalizedReturnType; - const HNormalizedReturnType hnormalized() const; + inline const HNormalizedReturnType hnormalized() const; ////////// Householder module /////////// @@ -446,6 +438,15 @@ template class MatrixBase template void applyOnTheRight(Index p, Index q, const JacobiRotation& j); +///////// SparseCore module ///////// + + template + EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type + cwiseProduct(const SparseMatrixBase &other) const + { + return other.cwiseProduct(derived()); + } + ///////// MatrixFunctions module ///////// typedef typename internal::stem_function::type StemFunction; @@ -458,49 +459,15 @@ template class MatrixBase const MatrixSquareRootReturnValue sqrt() const; const MatrixLogarithmReturnValue log() const; const MatrixPowerReturnValue pow(const RealScalar& p) const; - -#ifdef EIGEN2_SUPPORT - template - Derived& operator+=(const Flagged, 0, - EvalBeforeAssigningBit>& other); - - template - Derived& operator-=(const Flagged, 0, - EvalBeforeAssigningBit>& other); - - /** \deprecated because .lazy() is deprecated - * Overloaded for cache friendly product evaluation */ - template - Derived& lazyAssign(const Flagged& other) - { return lazyAssign(other._expression()); } - - template - const Flagged marked() const; - const Flagged lazy() const; - - inline const Cwise cwise() const; - inline Cwise cwise(); - - VectorBlock start(Index size); - const VectorBlock start(Index size) const; - VectorBlock end(Index size); - const VectorBlock end(Index size) const; - template VectorBlock start(); - template const VectorBlock start() const; - template VectorBlock end(); - template const VectorBlock end() const; - - Minor minor(Index row, Index col); - const Minor minor(Index row, Index col) const; -#endif + const MatrixComplexPowerReturnValue pow(const std::complex& p) const; protected: - MatrixBase() : Base() {} + EIGEN_DEVICE_FUNC MatrixBase() : Base() {} private: - explicit MatrixBase(int); - MatrixBase(int,int); - template explicit MatrixBase(const MatrixBase&); + EIGEN_DEVICE_FUNC explicit MatrixBase(int); + EIGEN_DEVICE_FUNC MatrixBase(int,int); + template EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase&); protected: // mixing arrays and matrices is not legal template Derived& operator+=(const ArrayBase& ) @@ -510,6 +477,51 @@ template class MatrixBase {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} }; + +/*************************************************************************** +* Implementation of matrix base methods +***************************************************************************/ + +/** replaces \c *this by \c *this * \a other. + * + * \returns a reference to \c *this + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline Derived& +MatrixBase::operator*=(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); + return derived(); +} + +/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=(). + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline void MatrixBase::applyOnTheRight(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); +} + +/** replaces \c *this by \a other * \c *this. + * + * Example: \include MatrixBase_applyOnTheLeft.cpp + * Output: \verbinclude MatrixBase_applyOnTheLeft.out + */ +template +template +inline void MatrixBase::applyOnTheLeft(const EigenBase &other) +{ + other.derived().applyThisOnTheLeft(derived()); +} + } // end namespace Eigen #endif // EIGEN_MATRIXBASE_H diff --git a/nuparu/include/Eigen/src/Core/NestByValue.h b/nuparu/include/Eigen/src/Core/NestByValue.h index a893b176..9aeaf8d1 100644 --- a/nuparu/include/Eigen/src/Core/NestByValue.h +++ b/nuparu/include/Eigen/src/Core/NestByValue.h @@ -40,29 +40,29 @@ template class NestByValue typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) - inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -91,7 +91,7 @@ template class NestByValue m_expression.const_cast_derived().template writePacket(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType m_expression; diff --git a/nuparu/include/Eigen/src/Core/NoAlias.h b/nuparu/include/Eigen/src/Core/NoAlias.h index 768bfb18..0ade7525 100644 --- a/nuparu/include/Eigen/src/Core/NoAlias.h +++ b/nuparu/include/Eigen/src/Core/NoAlias.h @@ -30,62 +30,36 @@ namespace Eigen { template class StorageBase> class NoAlias { - typedef typename ExpressionType::Scalar Scalar; public: - NoAlias(ExpressionType& expression) : m_expression(expression) {} - - /** Behaves like MatrixBase::lazyAssign(other) - * \sa MatrixBase::lazyAssign() */ + typedef typename ExpressionType::Scalar Scalar; + + explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) - { return internal::assign_selector::run(m_expression,other.derived()); } - - /** \sa MatrixBase::operator+= */ + { + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + return m_expression; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { - typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested::type OtherDerivedNested; - typedef typename internal::remove_all::type _OtherDerivedNested; - internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); return m_expression; } - - /** \sa MatrixBase::operator-= */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { - typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested::type OtherDerivedNested; - typedef typename internal::remove_all::type _OtherDerivedNested; - internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); return m_expression; } -#ifndef EIGEN_PARSED_BY_DOXYGEN - template - EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase& other) - { other.derived().addTo(m_expression); return m_expression; } - - template - EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase& other) - { other.derived().subTo(m_expression); return m_expression; } - - template - EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct& other) - { return m_expression.derived() += CoeffBasedProduct(other.lhs(), other.rhs()); } - - template - EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct& other) - { return m_expression.derived() -= CoeffBasedProduct(other.lhs(), other.rhs()); } - - template - ExpressionType& operator=(const ReturnByValue& func) - { return m_expression = func; } -#endif - + EIGEN_DEVICE_FUNC ExpressionType& expression() const { return m_expression; @@ -126,7 +100,7 @@ class NoAlias template NoAlias MatrixBase::noalias() { - return derived(); + return NoAlias(derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/NumTraits.h b/nuparu/include/Eigen/src/Core/NumTraits.h index bac9e50b..1d85dec7 100644 --- a/nuparu/include/Eigen/src/Core/NumTraits.h +++ b/nuparu/include/Eigen/src/Core/NumTraits.h @@ -68,21 +68,40 @@ template struct GenericNumTraits >::type NonInteger; typedef T Nested; - static inline Real epsilon() { return std::numeric_limits::epsilon(); } + EIGEN_DEVICE_FUNC + static inline Real epsilon() + { + #if defined(__CUDA_ARCH__) + return internal::device::numeric_limits::epsilon(); + #else + return std::numeric_limits::epsilon(); + #endif + } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { // make sure to override this for floating-point types return Real(0); } - static inline T highest() { return (std::numeric_limits::max)(); } - static inline T lowest() { return IsInteger ? (std::numeric_limits::min)() : (-(std::numeric_limits::max)()); } - -#ifdef EIGEN2_SUPPORT - enum { - HasFloatingPoint = !IsInteger - }; - typedef NonInteger FloatingPoint; + + + EIGEN_DEVICE_FUNC + static inline T highest() { +#if defined(__CUDA_ARCH__) + return (internal::device::numeric_limits::max)(); +#else + return (std::numeric_limits::max)(); #endif + } + + EIGEN_DEVICE_FUNC + static inline T lowest() { +#if defined(__CUDA_ARCH__) + return IsInteger ? (internal::device::numeric_limits::min)() : (-(internal::device::numeric_limits::max)()); +#else + return IsInteger ? (std::numeric_limits::min)() : (-(std::numeric_limits::max)()); +#endif + } }; template struct NumTraits : GenericNumTraits @@ -91,11 +110,13 @@ template struct NumTraits : GenericNumTraits template<> struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC static inline float dummy_precision() { return 1e-5f; } }; template<> struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC static inline double dummy_precision() { return 1e-12; } }; @@ -136,9 +157,9 @@ struct NumTraits > IsInteger = NumTraits::IsInteger, IsSigned = NumTraits::IsSigned, RequireInitialization = 1, - ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, - AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::AddCost, - MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::MulCost + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost }; static inline RealScalar epsilon() { return NumTraits::epsilon(); } diff --git a/nuparu/include/Eigen/src/Core/PermutationMatrix.h b/nuparu/include/Eigen/src/Core/PermutationMatrix.h index 4fc5dd31..90e1df23 100644 --- a/nuparu/include/Eigen/src/Core/PermutationMatrix.h +++ b/nuparu/include/Eigen/src/Core/PermutationMatrix.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2009 Benoit Jacob -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -13,8 +13,6 @@ namespace Eigen { -template class PermutedImpl; - /** \class PermutationBase * \ingroup Core_Module * @@ -41,10 +39,6 @@ template -struct permut_matrix_product_retval; -template -struct permut_sparsematrix_product_retval; enum PermPermProduct_t {PermPermProduct}; } // end namespace internal @@ -60,19 +54,20 @@ class PermutationBase : public EigenBase typedef typename Traits::IndicesType IndicesType; enum { Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, RowsAtCompileTime = Traits::RowsAtCompileTime, ColsAtCompileTime = Traits::ColsAtCompileTime, MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, MaxColsAtCompileTime = Traits::MaxColsAtCompileTime }; - typedef typename Traits::Scalar Scalar; - typedef typename Traits::Index Index; - typedef Matrix + typedef typename Traits::StorageIndex StorageIndex; + typedef Matrix DenseMatrixType; - typedef PermutationMatrix + typedef PermutationMatrix PlainPermutationType; + typedef PlainPermutationType PlainObject; using Base::derived; + typedef Inverse InverseReturnType; + typedef void Scalar; #endif /** Copies the other permutation into *this */ @@ -118,7 +113,7 @@ class PermutationBase : public EigenBase void evalTo(MatrixBase& other) const { other.setZero(); - for (int i=0; i /** Sets *this to be the identity permutation matrix */ void setIdentity() { - for(Index i = 0; i < size(); ++i) + StorageIndex n = StorageIndex(size()); + for(StorageIndex i = 0; i < n; ++i) indices().coeffRef(i) = i; } @@ -163,18 +159,18 @@ class PermutationBase : public EigenBase * * \returns a reference to *this. * - * \warning This is much slower than applyTranspositionOnTheRight(int,int): + * \warning This is much slower than applyTranspositionOnTheRight(Index,Index): * this has linear complexity and requires a lot of branching. * - * \sa applyTranspositionOnTheRight(int,int) + * \sa applyTranspositionOnTheRight(Index,Index) */ Derived& applyTranspositionOnTheLeft(Index i, Index j) { eigen_assert(i>=0 && j>=0 && i * * This is a fast operation, it only consists in swapping two indices. * - * \sa applyTranspositionOnTheLeft(int,int) + * \sa applyTranspositionOnTheLeft(Index,Index) */ Derived& applyTranspositionOnTheRight(Index i, Index j) { @@ -198,14 +194,14 @@ class PermutationBase : public EigenBase * * \note \note_try_to_help_rvo */ - inline Transpose inverse() const - { return derived(); } + inline InverseReturnType inverse() const + { return InverseReturnType(derived()); } /** \returns the tranpose permutation matrix. * * \note \note_try_to_help_rvo */ - inline Transpose transpose() const - { return derived(); } + inline InverseReturnType transpose() const + { return InverseReturnType(derived()); } /**** multiplication helpers to hopefully get RVO ****/ @@ -215,13 +211,13 @@ class PermutationBase : public EigenBase template void assignTranspose(const PermutationBase& other) { - for (int i=0; i void assignProduct(const Lhs& lhs, const Rhs& rhs) { eigen_assert(lhs.cols() == rhs.rows()); - for (int i=0; i * \note \note_try_to_help_rvo */ template - inline PlainPermutationType operator*(const Transpose >& other) const + inline PlainPermutationType operator*(const InverseImpl& other) const { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); } /** \returns the product of an inverse permutation with another permutation. @@ -248,8 +244,37 @@ class PermutationBase : public EigenBase * \note \note_try_to_help_rvo */ template friend - inline PlainPermutationType operator*(const Transpose >& other, const PermutationBase& perm) + inline PlainPermutationType operator*(const InverseImpl& other, const PermutationBase& perm) { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); } + + /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation. + * + * This function is O(\c n) procedure allocating a buffer of \c n booleans. + */ + Index determinant() const + { + Index res = 1; + Index n = size(); + Matrix mask(n); + mask.fill(false); + Index r = 0; + while(r < n) + { + // search for the next seed + while(r=n) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + mask.coeffRef(k0) = true; + for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k)) + { + mask.coeffRef(k) = true; + res = -res; + } + } + return res; + } protected: @@ -262,7 +287,7 @@ class PermutationBase : public EigenBase * * \param SizeAtCompileTime the number of rows/cols, or Dynamic * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. - * \param IndexType the interger type of the indices + * \param StorageIndex the integer type of the indices * * This class represents a permutation matrix, internally stored as a vector of integers. * @@ -270,24 +295,29 @@ class PermutationBase : public EigenBase */ namespace internal { -template -struct traits > - : traits > +template +struct traits > + : traits > { - typedef IndexType Index; - typedef Matrix IndicesType; + typedef PermutationStorage StorageKind; + typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } -template -class PermutationMatrix : public PermutationBase > +template +class PermutationMatrix : public PermutationBase > { typedef PermutationBase Base; typedef internal::traits Traits; public: + typedef const PermutationMatrix& Nested; + #ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Traits::IndicesType IndicesType; + typedef typename Traits::StorageIndex StorageIndex; #endif inline PermutationMatrix() @@ -295,8 +325,10 @@ class PermutationMatrix : public PermutationBase::highest()); + } /** Copy constructor. */ template @@ -317,7 +349,7 @@ class PermutationMatrix : public PermutationBase - explicit inline PermutationMatrix(const MatrixBase& a_indices) : m_indices(a_indices) + explicit inline PermutationMatrix(const MatrixBase& indices) : m_indices(indices) {} /** Convert the Transpositions \a tr to a permutation matrix */ @@ -364,10 +396,13 @@ class PermutationMatrix : public PermutationBase - PermutationMatrix(const Transpose >& other) - : m_indices(other.nestedPermutation().size()) + PermutationMatrix(const InverseImpl& other) + : m_indices(other.derived().nestedExpression().size()) { - for (int i=0; i::highest()); + StorageIndex end = StorageIndex(m_indices.size()); + for (StorageIndex i=0; i PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) @@ -384,18 +419,20 @@ class PermutationMatrix : public PermutationBase -struct traits,_PacketAccess> > - : traits > +template +struct traits,_PacketAccess> > + : traits > { - typedef IndexType Index; - typedef Map, _PacketAccess> IndicesType; + typedef PermutationStorage StorageKind; + typedef Map, _PacketAccess> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } -template -class Map,_PacketAccess> - : public PermutationBase,_PacketAccess> > +template +class Map,_PacketAccess> + : public PermutationBase,_PacketAccess> > { typedef PermutationBase Base; typedef internal::traits Traits; @@ -403,14 +440,14 @@ class Map, #ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Traits::IndicesType IndicesType; - typedef typename IndicesType::Scalar Index; + typedef typename IndicesType::Scalar StorageIndex; #endif - inline Map(const Index* indicesPtr) + inline Map(const StorageIndex* indicesPtr) : m_indices(indicesPtr) {} - inline Map(const Index* indicesPtr, Index size) + inline Map(const StorageIndex* indicesPtr, Index size) : m_indices(indicesPtr,size) {} @@ -457,24 +494,21 @@ class Map, * \sa class PermutationBase, class PermutationMatrix */ -struct PermutationStorage {}; - template class TranspositionsWrapper; namespace internal { template struct traits > { typedef PermutationStorage StorageKind; - typedef typename _IndicesType::Scalar Scalar; - typedef typename _IndicesType::Scalar Index; + typedef void Scalar; + typedef typename _IndicesType::Scalar StorageIndex; typedef _IndicesType IndicesType; enum { RowsAtCompileTime = _IndicesType::SizeAtCompileTime, ColsAtCompileTime = _IndicesType::SizeAtCompileTime, - MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime, - Flags = 0, - CoeffReadCost = _IndicesType::CoeffReadCost + MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + Flags = 0 }; }; } @@ -490,8 +524,8 @@ class PermutationWrapper : public PermutationBase -inline const internal::permut_matrix_product_retval -operator*(const MatrixBase& matrix, - const PermutationBase &permutation) +template +EIGEN_DEVICE_FUNC +const Product +operator*(const MatrixBase &matrix, + const PermutationBase& permutation) { - return internal::permut_matrix_product_retval - - (permutation.derived(), matrix.derived()); + return Product + (matrix.derived(), permutation.derived()); } /** \returns the matrix with the permutation applied to the rows. */ -template -inline const internal::permut_matrix_product_retval - +template +EIGEN_DEVICE_FUNC +const Product operator*(const PermutationBase &permutation, - const MatrixBase& matrix) + const MatrixBase& matrix) { - return internal::permut_matrix_product_retval - - (permutation.derived(), matrix.derived()); + return Product + (permutation.derived(), matrix.derived()); } -namespace internal { - -template -struct traits > -{ - typedef typename MatrixType::PlainObject ReturnType; -}; -template -struct permut_matrix_product_retval - : public ReturnByValue > +template +class InverseImpl + : public EigenBase > { - typedef typename remove_all::type MatrixTypeNestedCleaned; - typedef typename MatrixType::Index Index; - - permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix) - : m_permutation(perm), m_matrix(matrix) - {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - template inline void evalTo(Dest& dst) const - { - const Index n = Side==OnTheLeft ? rows() : cols(); - - if(is_same::value && extract_data(dst) == extract_data(m_matrix)) - { - // apply the permutation inplace - Matrix mask(m_permutation.size()); - mask.fill(false); - Index r = 0; - while(r < m_permutation.size()) - { - // search for the next seed - while(r=m_permutation.size()) - break; - // we got one, let's follow it until we are back to the seed - Index k0 = r++; - Index kPrev = k0; - mask.coeffRef(k0) = true; - for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k)) - { - Block(dst, k) - .swap(Block - (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); - - mask.coeffRef(k) = true; - kPrev = k; - } - } - } - else - { - for(int i = 0; i < n; ++i) - { - Block - (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i) - - = - - Block - (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i); - } - } - } - - protected: - const PermutationType& m_permutation; - typename MatrixType::Nested m_matrix; -}; - -/* Template partial specialization for transposed/inverse permutations */ - -template -struct traits > > - : traits -{}; - -} // end namespace internal - -template -class Transpose > - : public EigenBase > > -{ - typedef Derived PermutationType; - typedef typename PermutationType::IndicesType IndicesType; typedef typename PermutationType::PlainPermutationType PlainPermutationType; + typedef internal::traits PermTraits; + protected: + InverseImpl() {} public: + typedef Inverse InverseType; + using EigenBase >::derived; #ifndef EIGEN_PARSED_BY_DOXYGEN - typedef internal::traits Traits; - typedef typename Derived::DenseMatrixType DenseMatrixType; + typedef typename PermutationType::DenseMatrixType DenseMatrixType; enum { - Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, - RowsAtCompileTime = Traits::RowsAtCompileTime, - ColsAtCompileTime = Traits::ColsAtCompileTime, - MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + RowsAtCompileTime = PermTraits::RowsAtCompileTime, + ColsAtCompileTime = PermTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime }; - typedef typename Traits::Scalar Scalar; #endif - Transpose(const PermutationType& p) : m_permutation(p) {} - - inline int rows() const { return m_permutation.rows(); } - inline int cols() const { return m_permutation.cols(); } - #ifndef EIGEN_PARSED_BY_DOXYGEN template void evalTo(MatrixBase& other) const { other.setZero(); - for (int i=0; i friend - inline const internal::permut_matrix_product_retval - operator*(const MatrixBase& matrix, const Transpose& trPerm) + const Product + operator*(const MatrixBase& matrix, const InverseType& trPerm) { - return internal::permut_matrix_product_retval(trPerm.m_permutation, matrix.derived()); + return Product(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template - inline const internal::permut_matrix_product_retval + const Product operator*(const MatrixBase& matrix) const { - return internal::permut_matrix_product_retval(m_permutation, matrix.derived()); + return Product(derived(), matrix.derived()); } - - const PermutationType& nestedPermutation() const { return m_permutation; } - - protected: - const PermutationType& m_permutation; }; template @@ -683,6 +625,12 @@ const PermutationWrapper MatrixBase::asPermutation() con return derived(); } +namespace internal { + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/nuparu/include/Eigen/src/Core/PlainObjectBase.h b/nuparu/include/Eigen/src/Core/PlainObjectBase.h index af0a479c..1225e85b 100644 --- a/nuparu/include/Eigen/src/Core/PlainObjectBase.h +++ b/nuparu/include/Eigen/src/Core/PlainObjectBase.h @@ -28,6 +28,7 @@ namespace internal { template struct check_rows_cols_for_overflow { template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE void run(Index, Index) { } @@ -35,6 +36,7 @@ template struct check_rows_cols_for_overflow { template<> struct check_rows_cols_for_overflow { template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols) { // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 @@ -47,7 +49,10 @@ template<> struct check_rows_cols_for_overflow { } }; -template struct conservative_resize_like_impl; +template +struct conservative_resize_like_impl; template struct matrix_swap_impl; @@ -64,8 +69,9 @@ template struct m #ifdef EIGEN_PARSED_BY_DOXYGEN namespace internal { -// this is a warkaround to doxygen not being able to understand the inheritence logic +// this is a workaround to doxygen not being able to understand the inheritance logic // when it is hidden by the dense_xpr_base helper struct. +/** This class is just a workaround for Doxygen and it does not not actually exist. */ template struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase {}; /** This class is just a workaround for Doxygen and it does not not actually exist. */ template @@ -90,8 +96,8 @@ class PlainObjectBase : public internal::dense_xpr_base::type typedef typename internal::dense_xpr_base::type Base; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef Derived DenseType; @@ -110,28 +116,36 @@ class PlainObjectBase : public internal::dense_xpr_base::type typedef Eigen::Map MapType; friend class Eigen::Map; typedef const Eigen::Map ConstMapType; - friend class Eigen::Map; - typedef Eigen::Map AlignedMapType; - friend class Eigen::Map; - typedef const Eigen::Map ConstAlignedMapType; +#if EIGEN_MAX_ALIGN_BYTES>0 + // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice. + friend class Eigen::Map; + friend class Eigen::Map; +#endif + typedef Eigen::Map AlignedMapType; + typedef const Eigen::Map ConstAlignedMapType; template struct StridedMapType { typedef Eigen::Map type; }; template struct StridedConstMapType { typedef Eigen::Map type; }; - template struct StridedAlignedMapType { typedef Eigen::Map type; }; - template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; + template struct StridedAlignedMapType { typedef Eigen::Map type; }; + template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; protected: DenseStorage m_storage; public: - enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits::Flags & AlignedBit) != 0 }; + enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits::Alignment>0) }; EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) + EIGEN_DEVICE_FUNC Base& base() { return *static_cast(this); } + EIGEN_DEVICE_FUNC const Base& base() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const { if(Flags & RowMajorBit) @@ -140,11 +154,13 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { return m_storage.data()[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId) { if(Flags & RowMajorBit) @@ -153,11 +169,13 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_storage.data()[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const { if(Flags & RowMajorBit) @@ -166,6 +184,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const { return m_storage.data()[index]; @@ -206,11 +225,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type } /** \returns a const pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE const Scalar *data() const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } /** \returns a pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE Scalar *data() + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } /** Resizes \c *this to a \a rows x \a cols matrix. @@ -229,22 +248,22 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t) */ - EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols) - { - eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime) - && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,nbCols==ColsAtCompileTime) - && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,nbRows<=MaxRowsAtCompileTime) - && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime) - && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array."); - internal::check_rows_cols_for_overflow::run(nbRows, nbCols); + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void resize(Index rows, Index cols) + { + eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime) + && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime) + && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array."); + internal::check_rows_cols_for_overflow::run(rows, cols); #ifdef EIGEN_INITIALIZE_COEFFS - Index size = nbRows*nbCols; + Index size = rows*cols; bool size_changed = size != this->size(); - m_storage.resize(size, nbRows, nbCols); + m_storage.resize(size, rows, cols); if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED #else - internal::check_rows_cols_for_overflow::run(nbRows, nbCols); - m_storage.resize(nbRows*nbCols, nbRows, nbCols); + m_storage.resize(rows*cols, rows, cols); #endif } @@ -259,6 +278,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t) */ + EIGEN_DEVICE_FUNC inline void resize(Index size) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase) @@ -283,9 +303,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index) */ - inline void resize(NoChange_t, Index nbCols) + EIGEN_DEVICE_FUNC + inline void resize(NoChange_t, Index cols) { - resize(rows(), nbCols); + resize(rows(), cols); } /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange @@ -296,9 +317,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index) */ - inline void resize(Index nbRows, NoChange_t) + EIGEN_DEVICE_FUNC + inline void resize(Index rows, NoChange_t) { - resize(nbRows, cols()); + resize(rows, cols()); } /** Resizes \c *this to have the same dimensions as \a other. @@ -309,6 +331,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * remain row-vectors and vectors remain vectors. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) { const OtherDerived& other = _other.derived(); @@ -336,9 +359,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * Matrices are resized relative to the top-left element. In case values need to be * appended to the matrix they will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols) { - internal::conservative_resize_like_impl::run(*this, nbRows, nbCols); + internal::conservative_resize_like_impl::run(*this, rows, cols); } /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. @@ -348,10 +372,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * In case the matrix is growing, new rows will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t) { // Note: see the comment in conservativeResize(Index,Index) - conservativeResize(nbRows, cols()); + conservativeResize(rows, cols()); } /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. @@ -361,10 +386,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * In case the matrix is growing, new columns will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols) { // Note: see the comment in conservativeResize(Index,Index) - conservativeResize(rows(), nbCols); + conservativeResize(rows(), cols); } /** Resizes the vector to \a size while retaining old values. @@ -375,6 +401,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * When values are appended, they will be uninitialized. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResize(Index size) { internal::conservative_resize_like_impl::run(*this, size); @@ -390,6 +417,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * appended to the matrix they will copied from \c other. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase& other) { internal::conservative_resize_like_impl::run(*this, other); @@ -398,6 +426,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) { return _set(other); @@ -405,6 +434,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** \sa MatrixBase::lazyAssign() */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase& other) { _resize_to_match(other); @@ -412,12 +442,18 @@ class PlainObjectBase : public internal::dense_xpr_base::type } template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue& func) { resize(func.rows(), func.cols()); return Base::operator=(func); } + // Prevent user from trying to instantiate PlainObjectBase objects + // by making all its constructor protected. See bug 1074. + protected: + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() { // _check_template_params(); @@ -427,38 +463,85 @@ class PlainObjectBase : public internal::dense_xpr_base::type #ifndef EIGEN_PARSED_BY_DOXYGEN // FIXME is it still needed ? /** \internal */ - PlainObjectBase(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) : m_storage(internal::constructor_without_unaligned_array_assert()) { // _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } #endif - EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols) - : m_storage(a_size, nbRows, nbCols) +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + PlainObjectBase(PlainObjectBase&& other) + : m_storage( std::move(other.m_storage) ) + { + } + + EIGEN_DEVICE_FUNC + PlainObjectBase& operator=(PlainObjectBase&& other) + { + using std::swap; + swap(m_storage, other.m_storage); + return *this; + } +#endif + + /** Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other) + : Base(), m_storage(other.m_storage) { } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) + : m_storage(size, rows, cols) { // _check_template_params(); // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } - /** \copydoc MatrixBase::operator=(const EigenBase&) - */ + /** \sa PlainObjectBase::operator=(const EigenBase&) */ template - EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase &other) + : m_storage() { - _resize_to_match(other); - Base::operator=(other.derived()); - return this->derived(); + _check_template_params(); + resizeLike(other); + _set_noalias(other); } - /** \sa MatrixBase::operator=(const EigenBase&) */ + /** \sa PlainObjectBase::operator=(const EigenBase&) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) - : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) + : m_storage() { _check_template_params(); - internal::check_rows_cols_for_overflow::run(other.derived().rows(), other.derived().cols()); + resizeLike(other); + *this = other.derived(); + } + /** \brief Copy constructor with in-place evaluation */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue& other) + { + _check_template_params(); + // FIXME this does not automatically transpose vectors if necessary + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + public: + + /** \copydoc MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + { + _resize_to_match(other); Base::operator=(other.derived()); + return this->derived(); } /** \name Map @@ -535,16 +618,16 @@ class PlainObjectBase : public internal::dense_xpr_base::type //@} using Base::setConstant; - Derived& setConstant(Index size, const Scalar& value); - Derived& setConstant(Index rows, Index cols, const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value); using Base::setZero; - Derived& setZero(Index size); - Derived& setZero(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setZero(Index size); + EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols); using Base::setOnes; - Derived& setOnes(Index size); - Derived& setOnes(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setOnes(Index size); + EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols); using Base::setRandom; Derived& setRandom(Index size); @@ -563,6 +646,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * remain row-vectors and vectors remain vectors. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase& other) { #ifdef EIGEN_NO_AUTOMATIC_RESIZING @@ -589,25 +673,23 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \internal */ + // aliasing is dealt once in internall::call_assignment + // so at this stage we have to assume aliasing... and resising has to be done later. template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set(const DenseBase& other) { - _set_selector(other.derived(), typename internal::conditional(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type()); + internal::call_assignment(this->derived(), other.derived()); return this->derived(); } - template - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); } - - template - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); } - /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which * is the case when creating a new matrix) so one can enforce lazy evaluation. * * \sa operator=(const MatrixBase&), _set() */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase& other) { // I don't think we need this resize call since the lazyAssign will anyways resize @@ -615,40 +697,166 @@ class PlainObjectBase : public internal::dense_xpr_base::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - return internal::assign_selector::run(this->derived(), other.derived()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + return this->derived(); } template - EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if::type* = 0) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if::type* = 0) { EIGEN_STATIC_ASSERT(bool(NumTraits::IsInteger) && bool(NumTraits::IsInteger), FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) - resize(nbRows,nbCols); + resize(rows,cols); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if::type* = 0) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) m_storage.data()[0] = val0; m_storage.data()[1] = val1; } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==2,T1>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, + // then the argument is meant to be the size of the object. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible::value) + && ((!internal::is_same::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0) + { + // NOTE MSVC 2008 complains if we directly put bool(NumTraits::IsInteger) as the EIGEN_STATIC_ASSERT argument. + const bool is_integer = NumTraits::IsInteger; + EIGEN_STATIC_ASSERT(is_integer, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(size); + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if::value,T>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = val0; + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==1 + && internal::is_convertible::value,T*>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = Scalar(val0); + } + + // Initialize a fixed size matrix from a pointer to raw data + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar* data){ + this->_set_noalias(ConstMapType(data)); + } + + // Initialize an arbitrary matrix from a dense expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const DenseBase& other){ + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from a generic Eigen expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const EigenBase& other){ + this->derived() = other; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const ReturnByValue& other) + { + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const RotationBase& r) + { + this->derived() = r; + } + + // For fixed -size arrays: + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, + typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T>::type* = 0) + { + Base::setConstant(val0); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T*>::type* = 0) + { + Base::setConstant(val0); + } + template friend struct internal::matrix_swap_impl; - /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. + public: + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal + * \brief Override DenseBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. */ template - void _swap(DenseBase const & other) + EIGEN_DEVICE_FUNC + void swap(DenseBase & other) { enum { SwapPointers = internal::is_same::value && Base::SizeAtCompileTime==Dynamic }; - internal::matrix_swap_impl::run(this->derived(), other.const_cast_derived()); + internal::matrix_swap_impl::run(this->derived(), other.derived()); } - - public: -#ifndef EIGEN_PARSED_BY_DOXYGEN + + /** \internal + * \brief const version forwarded to DenseBase::swap + */ + template + EIGEN_DEVICE_FUNC + void swap(DenseBase const & other) + { Base::swap(other.derived()); } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void _check_template_params() { EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor) @@ -662,16 +870,16 @@ class PlainObjectBase : public internal::dense_xpr_base::type && (Options & (DontAlign|RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS) } -#endif -private: - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 1 }; +#endif }; +namespace internal { + template -struct internal::conservative_resize_like_impl +struct conservative_resize_like_impl { - typedef typename Derived::Index Index; static void run(DenseBase& _this, Index rows, Index cols) { if (_this.rows() == rows && _this.cols() == cols) return; @@ -729,12 +937,14 @@ struct internal::conservative_resize_like_impl } }; -namespace internal { - +// Here, the specialization for vectors inherits from the general matrix case +// to allow calling .conservativeResize(rows,cols) on vectors. template struct conservative_resize_like_impl + : conservative_resize_like_impl { - typedef typename Derived::Index Index; + using conservative_resize_like_impl::run; + static void run(DenseBase& _this, Index size) { const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size; @@ -760,6 +970,7 @@ struct conservative_resize_like_impl template struct matrix_swap_impl { + EIGEN_DEVICE_FUNC static inline void run(MatrixTypeA& a, MatrixTypeB& b) { a.base().swap(b); @@ -769,6 +980,7 @@ struct matrix_swap_impl template struct matrix_swap_impl { + EIGEN_DEVICE_FUNC static inline void run(MatrixTypeA& a, MatrixTypeB& b) { static_cast(a).m_storage.swap(static_cast(b).m_storage); diff --git a/nuparu/include/Eigen/src/Core/Product.h b/nuparu/include/Eigen/src/Core/Product.h new file mode 100644 index 00000000..fdd2fed3 --- /dev/null +++ b/nuparu/include/Eigen/src/Core/Product.h @@ -0,0 +1,222 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PRODUCT_H +#define EIGEN_PRODUCT_H + +namespace Eigen { + +template class ProductImpl; + +/** \class Product + * \ingroup Core_Module + * + * \brief Expression of the product of two arbitrary matrices or vectors + * + * \param Lhs the type of the left-hand side expression + * \param Rhs the type of the right-hand side expression + * + * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct + * + */ + + +namespace internal { + +// Determine the scalar of Product. This is normally the same as Lhs::Scalar times +// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. +template::Shape, + typename RhsShape = typename evaluator_traits::Shape > +struct product_result_scalar +{ + typedef typename scalar_product_traits::ReturnType Scalar; +}; + +template +struct product_result_scalar +{ + typedef typename Rhs::Scalar Scalar; +}; + +template + struct product_result_scalar +{ + typedef typename Lhs::Scalar Scalar; +}; + +template +struct product_result_scalar +{ + typedef typename Rhs::Scalar Scalar; +}; + +template + struct product_result_scalar +{ + typedef typename Lhs::Scalar Scalar; +}; + +template +struct traits > +{ + typedef typename remove_all::type LhsCleaned; + typedef typename remove_all::type RhsCleaned; + typedef traits LhsTraits; + typedef traits RhsTraits; + + typedef MatrixXpr XprKind; + + typedef typename product_result_scalar::Scalar Scalar; + typedef typename product_promote_storage_type::ret>::ret StorageKind; + typedef typename promote_index_type::type StorageIndex; + + enum { + RowsAtCompileTime = LhsTraits::RowsAtCompileTime, + ColsAtCompileTime = RhsTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime, + + // FIXME: only needed by GeneralMatrixMatrixTriangular + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime), + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit)) + || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit + : NoPreferredStorageOrderBit + }; +}; + +} // end namespace internal + + +template +class Product : public ProductImpl<_Lhs,_Rhs,Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits<_Rhs>::StorageKind, + internal::product_type<_Lhs,_Rhs>::ret>::ret> +{ + public: + + typedef _Lhs Lhs; + typedef _Rhs Rhs; + + typedef typename ProductImpl< + Lhs, Rhs, Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + internal::product_type::ret>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Product) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) + { + eigen_assert(lhs.cols() == rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } + + protected: + + LhsNested m_lhs; + RhsNested m_rhs; +}; + +namespace internal { + +template::ret> +class dense_product_base + : public internal::dense_xpr_base >::type +{}; + +/** Convertion to scalar for inner-products */ +template +class dense_product_base + : public internal::dense_xpr_base >::type +{ + typedef Product ProductXpr; + typedef typename internal::dense_xpr_base::type Base; +public: + using Base::derived; + typedef typename Base::Scalar Scalar; + + operator const Scalar() const + { + return internal::evaluator(derived()).coeff(0,0); + } +}; + +} // namespace internal + +// Generic API dispatcher +template +class ProductImpl : public internal::generic_xpr_base, MatrixXpr, StorageKind>::type +{ + public: + typedef typename internal::generic_xpr_base, MatrixXpr, StorageKind>::type Base; +}; + +template +class ProductImpl + : public internal::dense_product_base +{ + typedef Product Derived; + + public: + + typedef typename internal::dense_product_base Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + protected: + enum { + IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && + (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic), + EnableCoeff = IsOneByOne || Option==LazyProduct + }; + + public: + + EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(i); + } + + +}; + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/nuparu/include/Eigen/src/Core/ProductBase.h b/nuparu/include/Eigen/src/Core/ProductBase.h deleted file mode 100644 index a494b5f8..00000000 --- a/nuparu/include/Eigen/src/Core/ProductBase.h +++ /dev/null @@ -1,278 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PRODUCTBASE_H -#define EIGEN_PRODUCTBASE_H - -namespace Eigen { - -/** \class ProductBase - * \ingroup Core_Module - * - */ - -namespace internal { -template -struct traits > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<_Lhs>::type Lhs; - typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; - enum { - RowsAtCompileTime = traits::RowsAtCompileTime, - ColsAtCompileTime = traits::ColsAtCompileTime, - MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = traits::MaxColsAtCompileTime, - Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) - | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, - // Note that EvalBeforeNestingBit and NestByRefBit - // are not used in practice because nested is overloaded for products - CoeffReadCost = 0 // FIXME why is it needed ? - }; -}; -} - -#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \ - typedef ProductBase Base; \ - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Base::LhsNested LhsNested; \ - typedef typename Base::_LhsNested _LhsNested; \ - typedef typename Base::LhsBlasTraits LhsBlasTraits; \ - typedef typename Base::ActualLhsType ActualLhsType; \ - typedef typename Base::_ActualLhsType _ActualLhsType; \ - typedef typename Base::RhsNested RhsNested; \ - typedef typename Base::_RhsNested _RhsNested; \ - typedef typename Base::RhsBlasTraits RhsBlasTraits; \ - typedef typename Base::ActualRhsType ActualRhsType; \ - typedef typename Base::_ActualRhsType _ActualRhsType; \ - using Base::m_lhs; \ - using Base::m_rhs; - -template -class ProductBase : public MatrixBase -{ - public: - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase) - - typedef typename Lhs::Nested LhsNested; - typedef typename internal::remove_all::type _LhsNested; - typedef internal::blas_traits<_LhsNested> LhsBlasTraits; - typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; - typedef typename internal::remove_all::type _ActualLhsType; - typedef typename internal::traits::Scalar LhsScalar; - - typedef typename Rhs::Nested RhsNested; - typedef typename internal::remove_all::type _RhsNested; - typedef internal::blas_traits<_RhsNested> RhsBlasTraits; - typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef typename internal::remove_all::type _ActualRhsType; - typedef typename internal::traits::Scalar RhsScalar; - - // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once - typedef CoeffBasedProduct FullyLazyCoeffBaseProductType; - - public: - - typedef typename Base::PlainObject PlainObject; - - ProductBase(const Lhs& a_lhs, const Rhs& a_rhs) - : m_lhs(a_lhs), m_rhs(a_rhs) - { - eigen_assert(a_lhs.cols() == a_rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); } - - const _LhsNested& lhs() const { return m_lhs; } - const _RhsNested& rhs() const { return m_rhs; } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - operator const PlainObject& () const - { - m_result.resize(m_lhs.rows(), m_rhs.cols()); - derived().evalTo(m_result); - return m_result; - } - - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - template - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - const Diagonal diagonal(Index index) const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); } - - // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression - typename Base::CoeffReturnType coeff(Index row, Index col) const - { -#ifdef EIGEN2_SUPPORT - return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum(); -#else - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(row,col); -#endif - } - - typename Base::CoeffReturnType coeff(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(i); - } - - const Scalar& coeffRef(Index row, Index col) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(row,col); - } - - const Scalar& coeffRef(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(i); - } - - protected: - - LhsNested m_lhs; - RhsNested m_rhs; - - mutable PlainObject m_result; -}; - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -namespace internal { -template -struct nested, N, PlainObject> -{ - typedef PlainObject const& type; -}; -} - -template -class ScaledProduct; - -// Note that these two operator* functions are not defined as member -// functions of ProductBase, because, otherwise we would have to -// define all overloads defined in MatrixBase. Furthermore, Using -// "using Base::operator*" would not work with MSVC. -// -// Also note that here we accept any compatible scalar types -template -const ScaledProduct -operator*(const ProductBase& prod, const typename Derived::Scalar& x) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const ProductBase& prod, const typename Derived::RealScalar& x) -{ return ScaledProduct(prod.derived(), x); } - - -template -const ScaledProduct -operator*(const typename Derived::Scalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const typename Derived::RealScalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -namespace internal { -template -struct traits > - : traits, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> > -{ - typedef typename traits::StorageKind StorageKind; -}; -} - -template -class ScaledProduct - : public ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> -{ - public: - typedef ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::PlainObject PlainObject; -// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct) - - ScaledProduct(const NestedProduct& prod, const Scalar& x) - : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {} - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); } - - const Scalar& alpha() const { return m_alpha; } - - protected: - const NestedProduct& m_prod; - Scalar m_alpha; -}; - -/** \internal - * Overloaded to perform an efficient C = (A*B).lazy() */ -template -template -Derived& MatrixBase::lazyAssign(const ProductBase& other) -{ - other.derived().evalTo(derived()); - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_PRODUCTBASE_H diff --git a/nuparu/include/Eigen/src/Core/ProductEvaluators.h b/nuparu/include/Eigen/src/Core/ProductEvaluators.h new file mode 100644 index 00000000..794038a2 --- /dev/null +++ b/nuparu/include/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,1061 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2011 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * Evaluator of a product expression. + * Since products require special treatments to handle all possible cases, + * we simply deffer the evaluation logic to a product_evaluator class + * which offers more partial specialization possibilities. + * + * \sa class product_evaluator + */ +template +struct evaluator > + : public product_evaluator > +{ + typedef Product XprType; + typedef product_evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// TODO we should apply that rule only if that's really helpful +template +struct evaluator_traits, const Product > > + : evaluator_traits_base, const Product > > +{ + enum { AssumeAliasing = 1 }; +}; +template +struct evaluator, const Product > > + : public evaluator,const Lhs>, Rhs, DefaultProduct> > +{ + typedef CwiseUnaryOp, const Product > XprType; + typedef evaluator,const Lhs>, Rhs, DefaultProduct> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + {} +}; + + +template +struct evaluator, DiagIndex> > + : public evaluator, DiagIndex> > +{ + typedef Diagonal, DiagIndex> XprType; + typedef evaluator, DiagIndex> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(Diagonal, DiagIndex>( + Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), + xpr.index() )) + {} +}; + + +// Helper class to perform a matrix product with the destination at hand. +// Depending on the sizes of the factors, there are different evaluation strategies +// as controlled by internal::product_type. +template< typename Lhs, typename Rhs, + typename LhsShape = typename evaluator_traits::Shape, + typename RhsShape = typename evaluator_traits::Shape, + int ProductType = internal::product_type::value> +struct generic_product_impl; + +template +struct evaluator_traits > + : evaluator_traits_base > +{ + enum { AssumeAliasing = 1 }; +}; + +template +struct evaluator_traits > + : evaluator_traits_base > +{ + enum { AssumeAliasing = 0 }; +}; + +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template +struct product_evaluator, ProductTag, LhsShape, RhsShape> + : public evaluator::PlainObject> +{ + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + +// FIXME shall we handle nested_eval here?, +// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) +// typedef typename internal::nested_eval::type LhsNested; +// typedef typename internal::nested_eval::type RhsNested; +// typedef typename internal::remove_all::type LhsNestedCleaned; +// typedef typename internal::remove_all::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl::evalTo(m_result, lhs, rhs); + + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +// Dense = Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense += Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::add_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl::addTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense -= Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::sub_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl::subTo(dst, src.lhs(), src.rhs()); + } +}; + + +// Dense ?= scalar * Product +// TODO we should apply that rule if that's really helpful +// for instance, this is not good for inner products +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> +struct Assignment, + const Product >, AssignFunc, Dense2Dense, Scalar> +{ + typedef CwiseUnaryOp, + const Product > SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + { + call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); + } +}; + +//---------------------------------------- +// Catch "Dense ?= xpr + Product<>" expression to save one temporary +// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct + +template +struct assignment_from_xpr_plus_product +{ + typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + { + call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.rhs(), Func2()); + } +}; + +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::assign_op, internal::add_assign_op > +{}; +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::add_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::add_assign_op, internal::add_assign_op > +{}; +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::sub_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::sub_assign_op, internal::sub_assign_op > +{}; +//---------------------------------------- + +template +struct generic_product_impl +{ + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } +}; + + +/*********************************************************************** +* Implementation of outer dense * dense vector product +***********************************************************************/ + +// Column major result +template +EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +{ + evaluator rhsEval(rhs); + typename nested_eval::type actual_lhs(lhs); + // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + const Index cols = dst.cols(); + for (Index j=0; j +EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) +{ + evaluator lhsEval(lhs); + typename nested_eval::type actual_rhs(rhs); + // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + const Index rows = dst.rows(); + for (Index i=0; i +struct generic_product_impl +{ + template struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + typedef typename Product::Scalar Scalar; + + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct adds { + Scalar m_scale; + explicit adds(const Scalar& s) : m_scale(s) {} + template void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); + } + + template + static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); + } + +}; + + +// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo +template +struct generic_product_impl_base +{ + typedef typename Product::Scalar Scalar; + + template + static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + + template + static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } + + template + static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } + +}; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef typename internal::conditional::type MatrixType; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::gemv_dense_selector::HasUsableDirectAccess) + >::run(lhs, rhs, dst, alpha); + } +}; + +template +struct generic_product_impl +{ + typedef typename Product::Scalar Scalar; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // Same as: dst.noalias() = lhs.lazyProduct(rhs); + // but easier on the compiler side + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() += lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() -= lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + } + +// template +// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) +// { dst.noalias() += alpha * lhs.lazyProduct(rhs); } +}; + +// This specialization enforces the use of a coefficient-based evaluation strategy +template +struct generic_product_impl + : generic_product_impl {}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template +struct etor_product_coeff_impl; + +template +struct etor_product_packet_impl; + +template +struct product_evaluator, ProductTag, DenseShape, DenseShape> + : evaluator_base > +{ + typedef Product XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, + // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. + m_innerDim(xpr.lhs().cols()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + // Everything below here is taken from CoeffBasedProduct.h + + typedef typename internal::nested_eval::type LhsNested; + typedef typename internal::nested_eval::type RhsNested; + + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + typedef evaluator LhsEtorType; + typedef evaluator RhsEtorType; + + enum { + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, + + PacketSize = packet_traits::size, + + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, + RhsCoeffReadCost = RhsEtorType::CoeffReadCost, + CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost + : InnerSize == Dynamic ? HugeCost + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost, + + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsAlignment = LhsEtorType::Alignment, + RhsAlignment = RhsEtorType::Alignment, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + SameType = is_same::value, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (RhsRowMajor && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) + | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), + + LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), + RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), + + Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) + : 0, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (InnerSize % packet_traits::size == 0) + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const + { + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector + */ + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const + { + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + template + const PacketType packet(Index row, Index col) const + { + PacketType res; + typedef etor_product_packet_impl PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template + const PacketType packet(Index index) const + { + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + return packet(row,col); + } + +protected: + const LhsNested m_lhs; + const RhsNested m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +template +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> +{ + typedef Product XprType; + typedef Product BaseProduct; + typedef product_evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(BaseProduct(xpr.lhs(),xpr.rhs())) + {} +}; + +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet(UnrollingIndex-1, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet(row, UnrollingIndex-1), pset1(rhs.coeff(UnrollingIndex-1, col)), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(0); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(0); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(0); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(0); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); + } +}; + + +/*************************************************************************** +* Triangular products +***************************************************************************/ +template +struct triangular_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl + ::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* SelfAdjoint products +***************************************************************************/ +template +struct selfadjoint_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* Diagonal products +***************************************************************************/ + +template +struct diagonal_product_evaluator_base + : evaluator_base +{ + typedef typename scalar_product_traits::ReturnType Scalar; +public: + enum { + CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, + + MatrixFlags = evaluator::Flags, + DiagFlags = evaluator::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), + Alignment = evaluator::Alignment + }; + + diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) + : m_diagImpl(diag), m_matImpl(mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + } + +protected: + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const + { + return internal::pmul(m_matImpl.template packet(row, col), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const + { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + }; + return internal::pmul(m_matImpl.template packet(row, col), + m_diagImpl.template packet(id)); + } + + evaluator m_diagImpl; + evaluator m_matImpl; +}; + +// diagonal * dense +template +struct product_evaluator, ProductTag, DiagonalShape, DenseShape> + : diagonal_product_evaluator_base, OnTheLeft> +{ + typedef diagonal_product_evaluator_base, OnTheLeft> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { + StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.rhs(), xpr.lhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. + return this->template packet_impl(row,col, row, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +// dense * diagonal +template +struct product_evaluator, ProductTag, DenseShape, DiagonalShape> + : diagonal_product_evaluator_base, OnTheRight> +{ + typedef diagonal_product_evaluator_base, OnTheRight> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + return this->template packet_impl(row,col, col, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +/*************************************************************************** +* Products with permutation matrices +***************************************************************************/ + +/** \internal + * \class permutation_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h + */ +template +struct permutation_matrix_product; + +template +struct permutation_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) + { + MatrixType mat(xpr); + const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); + // FIXME we need an is_same for expression that is not sensitive to constness. For instance + // is_same_xpr, Block >::value should be true. + //if(is_same::value && extract_data(dst) == extract_data(mat)) + if(is_same_dense(dst, mat)) + { + // apply the permutation inplace + Matrix mask(perm.size()); + mask.fill(false); + Index r = 0; + while(r < perm.size()) + { + // search for the next seed + while(r=perm.size()) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + Index kPrev = k0; + mask.coeffRef(k0) = true; + for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k)) + { + Block(dst, k) + .swap(Block + (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); + + mask.coeffRef(k) = true; + kPrev = k; + } + } + } + else + { + for(Index i = 0; i < n; ++i) + { + Block + (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) + + = + + Block + (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); + } + } + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, rhs, lhs); + } +}; + +template +struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) + { + permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + + +/*************************************************************************** +* Products with transpositions matrices +***************************************************************************/ + +// FIXME could we unify Transpositions and Permutation into a single "shape"?? + +/** \internal + * \class transposition_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + */ +template +struct transposition_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) + { + MatrixType mat(xpr); + typedef typename TranspositionType::StorageIndex StorageIndex; + const Index size = tr.size(); + StorageIndex j = 0; + + if(!(is_same::value && extract_data(dst) == extract_data(mat))) + dst = mat; + + for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, rhs, lhs); + } +}; + + +template +struct generic_product_impl, Rhs, TranspositionsShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, TranspositionsShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + { + transposition_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H diff --git a/nuparu/include/Eigen/src/Core/Random.h b/nuparu/include/Eigen/src/Core/Random.h index 480fea40..02038e9e 100644 --- a/nuparu/include/Eigen/src/Core/Random.h +++ b/nuparu/include/Eigen/src/Core/Random.h @@ -28,12 +28,18 @@ struct functor_traits > /** \returns a random matrix expression * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * + * \not_reentrant + * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used * instead. + * * * Example: \include MatrixBase_random_int_int.cpp * Output: \verbinclude MatrixBase_random_int_int.out @@ -41,22 +47,28 @@ struct functor_traits > * This expression has the "evaluate before nesting" flag so that it will be evaluated into * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. + * + * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators. * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random() + * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index rows, Index cols) { return NullaryExpr(rows, cols, internal::scalar_random_op()); } /** \returns a random vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. * * The parameter \a size is the size of the returned vector. * Must be compatible with this MatrixBase type. * * \only_for_vectors + * \not_reentrant * * This variant is meant to be used for dynamic-size vector types. For fixed-size types, * it is redundant to pass \a size as argument, so Random() should be used @@ -69,10 +81,10 @@ DenseBase::Random(Index rows, Index cols) * a temporary vector whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random() + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random() */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index size) { return NullaryExpr(size, internal::scalar_random_op()); @@ -80,6 +92,9 @@ DenseBase::Random(Index size) /** \returns a fixed-size random matrix or vector expression * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you * need to use the variants taking size arguments. * @@ -89,11 +104,13 @@ DenseBase::Random(Index size) * This expression has the "evaluate before nesting" flag so that it will be evaluated into * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. + * + * \not_reentrant * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index) + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index) */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random() { return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op()); @@ -101,6 +118,11 @@ DenseBase::Random() /** Sets all coefficients in this expression to random values. * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * * Example: \include MatrixBase_setRandom.cpp * Output: \verbinclude MatrixBase_setRandom.out * @@ -114,12 +136,16 @@ inline Derived& DenseBase::setRandom() /** Resizes to the given \a newSize, and sets all coefficients in this expression to random values. * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * \only_for_vectors + * \not_reentrant * * Example: \include Matrix_setRandom_int.cpp * Output: \verbinclude Matrix_setRandom_int.out * - * \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random() + * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random() */ template EIGEN_STRONG_INLINE Derived& @@ -131,19 +157,24 @@ PlainObjectBase::setRandom(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to random values. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setRandom_int_int.cpp * Output: \verbinclude Matrix_setRandom_int_int.out * - * \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random() + * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random() */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setRandom(Index nbRows, Index nbCols) +PlainObjectBase::setRandom(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setRandom(); } diff --git a/nuparu/include/Eigen/src/Core/Redux.h b/nuparu/include/Eigen/src/Core/Redux.h index 50548fa9..d170cae2 100644 --- a/nuparu/include/Eigen/src/Core/Redux.h +++ b/nuparu/include/Eigen/src/Core/Redux.h @@ -50,21 +50,34 @@ struct redux_traits public: enum { - Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost == Dynamic - || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost == Dynamic) - ) ? Dynamic - : Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: enum { - Unrolling = Cost != Dynamic && Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(Derived::Flags) + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; + } +#endif }; /*************************************************************************** @@ -82,6 +95,7 @@ struct redux_novec_unroller typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { return func(redux_novec_unroller::run(mat,func), @@ -99,6 +113,7 @@ struct redux_novec_unroller typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) { return mat.coeffByOuterInner(outer, inner); @@ -112,6 +127,7 @@ template struct redux_novec_unroller { typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } }; @@ -143,7 +159,7 @@ struct redux_vec_unroller index = Start * packet_traits::size, outer = index / int(Derived::InnerSizeAtCompileTime), inner = index % int(Derived::InnerSizeAtCompileTime), - alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned + alignment = Derived::Alignment }; typedef typename Derived::Scalar Scalar; @@ -151,7 +167,7 @@ struct redux_vec_unroller static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) { - return mat.template packetByOuterInner(outer, inner); + return mat.template packetByOuterInner(outer, inner); } }; @@ -169,8 +185,8 @@ template struct redux_impl { typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); Scalar res; @@ -194,18 +210,18 @@ struct redux_impl { typedef typename Derived::Scalar Scalar; typedef typename packet_traits::type PacketScalar; - typedef typename Derived::Index Index; - static Scalar run(const Derived& mat, const Func& func) + static Scalar run(const Derived &mat, const Func& func) { const Index size = mat.size(); - eigen_assert(size && "you are using an empty matrix"); + const Index packetSize = packet_traits::size; - const Index alignedStart = internal::first_aligned(mat); + const int packetAlignment = unpacket_traits::alignment; enum { - alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit) - ? Aligned : Unaligned + alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), + alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) }; + const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); const Index alignedEnd2 = alignedStart + alignedSize2; @@ -213,19 +229,19 @@ struct redux_impl Scalar res; if(alignedSize) { - PacketScalar packet_res0 = mat.template packet(alignedStart); + PacketScalar packet_res0 = mat.template packet(alignedStart); if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop { - PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); + PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) { - packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); - packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); + packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); + packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); } packet_res0 = func.packetOp(packet_res0,packet_res1); if(alignedEnd>alignedEnd2) - packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); + packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); } res = func.predux(packet_res0); @@ -247,14 +263,14 @@ struct redux_impl } }; -template -struct redux_impl +// NOTE: for SliceVectorizedTraversal we simply bypass unrolling +template +struct redux_impl { typedef typename Derived::Scalar Scalar; - typedef typename packet_traits::type PacketScalar; - typedef typename Derived::Index Index; + typedef typename packet_traits::type PacketType; - static Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); const Index innerSize = mat.innerSize(); @@ -266,10 +282,10 @@ struct redux_impl Scalar res; if(packetedInnerSize) { - PacketScalar packet_res = mat.template packet(0,0); + PacketType packet_res = mat.template packet(0,0); for(Index j=0; j(j,i)); + packet_res = func.packetOp(packet_res, mat.template packetByOuterInner(j,i)); res = func.predux(packet_res); for(Index j=0; j Size = Derived::SizeAtCompileTime, VectorizedSize = (Size / PacketSize) * PacketSize }; - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); - Scalar res = func.predux(redux_vec_unroller::run(mat,func)); - if (VectorizedSize != Size) - res = func(res,redux_novec_unroller::run(mat,func)); - return res; + if (VectorizedSize > 0) { + Scalar res = func.predux(redux_vec_unroller::run(mat,func)); + if (VectorizedSize != Size) + res = func(res,redux_novec_unroller::run(mat,func)); + return res; + } + else { + return redux_novec_unroller::run(mat,func); + } } }; +// evaluator adaptor +template +class redux_evaluator +{ +public: + typedef _XprType XprType; + EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + enum { + MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, + // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator + Flags = evaluator::Flags & ~DirectAccessBit, + IsRowMajor = XprType::IsRowMajor, + SizeAtCompileTime = XprType::SizeAtCompileTime, + InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, + CoeffReadCost = evaluator::CoeffReadCost, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index row, Index col) const + { return m_evaluator.coeff(row, col); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index index) const + { return m_evaluator.coeff(index); } + + template + PacketReturnType packet(Index row, Index col) const + { return m_evaluator.template packet(row, col); } + + template + PacketReturnType packet(Index index) const + { return m_evaluator.template packet(index); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + template + PacketReturnType packetByOuterInner(Index outer, Index inner) const + { return m_evaluator.template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + const XprType & nestedExpression() const { return m_xpr; } + +protected: + internal::evaluator m_evaluator; + const XprType &m_xpr; +}; + } // end namespace internal /*************************************************************************** @@ -316,18 +399,21 @@ struct redux_impl /** \returns the result of a full redux operation on the whole matrix or vector using \a func * * The template parameter \a BinaryOp is the type of the functor \a func which must be - * an associative operator. Both current STL and TR1 functor styles are handled. + * an associative operator. Both current C++98 and C++11 functor styles are handled. * * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() */ template template -EIGEN_STRONG_INLINE typename internal::result_of::Scalar)>::type +typename internal::traits::Scalar DenseBase::redux(const Func& func) const { - typedef typename internal::remove_all::type ThisNested; - return internal::redux_impl - ::run(derived(), func); + eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); + + typedef typename internal::redux_evaluator ThisEvaluator; + ThisEvaluator thisEval(derived()); + + return internal::redux_impl::run(thisEval, func); } /** \returns the minimum of all coefficients of \c *this. @@ -337,7 +423,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { - return this->redux(Eigen::internal::scalar_min_op()); + return derived().redux(Eigen::internal::scalar_min_op()); } /** \returns the maximum of all coefficients of \c *this. @@ -347,7 +433,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { - return this->redux(Eigen::internal::scalar_max_op()); + return derived().redux(Eigen::internal::scalar_max_op()); } /** \returns the sum of all coefficients of *this @@ -360,7 +446,7 @@ DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return this->redux(Eigen::internal::scalar_sum_op()); + return derived().redux(Eigen::internal::scalar_sum_op()); } /** \returns the mean of all coefficients of *this @@ -371,7 +457,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::mean() const { - return Scalar(this->redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); } /** \returns the product of all coefficients of *this @@ -387,7 +473,7 @@ DenseBase::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(1); - return this->redux(Eigen::internal::scalar_product_op()); + return derived().redux(Eigen::internal::scalar_product_op()); } /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. diff --git a/nuparu/include/Eigen/src/Core/Ref.h b/nuparu/include/Eigen/src/Core/Ref.h index aba795bd..61de5ed1 100644 --- a/nuparu/include/Eigen/src/Core/Ref.h +++ b/nuparu/include/Eigen/src/Core/Ref.h @@ -12,24 +12,20 @@ namespace Eigen { -template class RefBase; -template,OuterStride<> >::type > class Ref; - /** \class Ref * \ingroup Core_Module * - * \brief A matrix or vector expression mapping an existing expressions + * \brief A matrix or vector expression mapping an existing expression * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), - * but accept a variable outer stride (leading dimension). + * but accepts a variable outer stride (leading dimension). * This can be overridden by specifying strides. * The type passed here must be a specialization of the Stride template, see examples below. * - * This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies. + * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies. * A Ref<> object can represent either a const expression or a l-value: * \code * // in-out argument: @@ -39,10 +35,10 @@ template& x); * \endcode * - * In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. + * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. * By default, a Ref can reference any dense vector expression of float having a contiguous memory layout. - * Likewise, a Ref can reference any column major dense matrix expression of float whose column's elements are contiguously stored with - * the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension), + * Likewise, a Ref can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with + * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) * can be greater than the number of rows. * * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. @@ -52,21 +48,22 @@ template > x); * foo3(A.row()); // OK * \endcode - * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more - * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more + * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a * template function, e.g.: * \code * // in the .h: @@ -94,24 +91,27 @@ struct traits > typedef _PlainObjectType PlainObjectType; typedef _StrideType StrideType; enum { - Options = _Options + Options = _Options, + Flags = traits >::Flags | NestByRefBit, + Alignment = traits >::Alignment }; template struct match { enum { HasDirectAccess = internal::has_direct_access::ret, - StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)), + StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)), InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic) || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime) || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), OuterStrideMatch = Derived::IsVectorAtCompileTime || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), - AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits::Flags&AlignedBit)==AlignedBit), - MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch + AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (int(evaluator::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment + ScalarTypeMatch = internal::is_same::value, + MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch }; typedef typename internal::conditional::type type; }; - + }; template @@ -130,12 +130,12 @@ template class RefBase typedef MapBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() : IsVectorAtCompileTime ? this->size() @@ -143,7 +143,7 @@ template class RefBase : this->rows(); } - RefBase() + EIGEN_DEVICE_FUNC RefBase() : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, @@ -157,7 +157,7 @@ template class RefBase typedef Stride StrideBase; template - void construct(Expression& expr) + EIGEN_DEVICE_FUNC void construct(Expression& expr) { if(PlainObjectType::RowsAtCompileTime==1) { @@ -171,8 +171,12 @@ template class RefBase } else ::new (static_cast(this)) Base(expr.data(), expr.rows(), expr.cols()); - ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(), - StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride()); + + if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit))) + ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1); + else + ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(), + StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride()); } StrideBase m_stride; @@ -182,7 +186,11 @@ template class RefBase template class Ref : public RefBase > { + private: typedef internal::traits Traits; + template + EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0); public: typedef RefBase Base; @@ -191,20 +199,23 @@ template class Ref #ifndef EIGEN_PARSED_BY_DOXYGEN template - inline Ref(PlainObjectBase& expr, - typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) { - Base::construct(expr); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); } template - inline Ref(const DenseBase& expr, - typename internal::enable_if::value&&bool(Traits::template match::MatchAtCompileTime)),Derived>::type* = 0, - int = Derived::ThisConstantIsPrivateInPlainObjectBase) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) #else template inline Ref(DenseBase& expr) #endif { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); Base::construct(expr.const_cast_derived()); } @@ -223,7 +234,8 @@ template class Ref< EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template - inline Ref(const DenseBase& expr) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::ScalarTypeMatch),Derived>::type* = 0) { // std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; @@ -231,18 +243,27 @@ template class Ref< construct(expr.derived(), typename Traits::template match::type()); } + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template + EIGEN_DEVICE_FUNC inline Ref(const RefBase& other) { + construct(other.derived(), typename Traits::template match::type()); + } + protected: template - void construct(const Expression& expr,internal::true_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type) { Base::construct(expr); } template - void construct(const Expression& expr, internal::false_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - m_object.lazyAssign(expr); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); Base::construct(m_object); } diff --git a/nuparu/include/Eigen/src/Core/Replicate.h b/nuparu/include/Eigen/src/Core/Replicate.h index dde86a83..bec59831 100644 --- a/nuparu/include/Eigen/src/Core/Replicate.h +++ b/nuparu/include/Eigen/src/Core/Replicate.h @@ -35,10 +35,7 @@ struct traits > typedef typename MatrixType::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - enum { - Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor - }; - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; enum { RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic @@ -53,8 +50,9 @@ struct traits > IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0), - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 }; }; } @@ -68,10 +66,12 @@ template class Replicate typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + typedef typename internal::remove_all::type NestedExpression; template - inline explicit Replicate(const OriginalMatrixType& a_matrix) - : m_matrix(a_matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) + EIGEN_DEVICE_FUNC + inline explicit Replicate(const OriginalMatrixType& matrix) + : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) { EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) @@ -79,41 +79,20 @@ template class Replicate } template - inline Replicate(const OriginalMatrixType& a_matrix, Index rowFactor, Index colFactor) - : m_matrix(a_matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) + EIGEN_DEVICE_FUNC + inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) + : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) { EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); } - inline Scalar coeff(Index rowId, Index colId) const - { - // try to avoid using modulo; this is a pure optimization strategy - const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 - : RowFactor==1 ? rowId - : rowId%m_matrix.rows(); - const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 - : ColFactor==1 ? colId - : colId%m_matrix.cols(); - - return m_matrix.coeff(actual_row, actual_col); - } - template - inline PacketScalar packet(Index rowId, Index colId) const - { - const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 - : RowFactor==1 ? rowId - : rowId%m_matrix.rows(); - const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 - : ColFactor==1 ? colId - : colId%m_matrix.cols(); - - return m_matrix.template packet(actual_row, actual_col); - } - + EIGEN_DEVICE_FUNC const _MatrixTypeNested& nestedExpression() const { return m_matrix; @@ -135,27 +114,12 @@ template class Replicate */ template template -inline const Replicate +const Replicate DenseBase::replicate() const { return Replicate(derived()); } -/** - * \return an expression of the replication of \c *this - * - * Example: \include MatrixBase_replicate_int_int.cpp - * Output: \verbinclude MatrixBase_replicate_int_int.out - * - * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate - */ -template -inline const Replicate -DenseBase::replicate(Index rowFactor,Index colFactor) const -{ - return Replicate(derived(),rowFactor,colFactor); -} - /** * \return an expression of the replication of each column (or row) of \c *this * diff --git a/nuparu/include/Eigen/src/Core/ReturnByValue.h b/nuparu/include/Eigen/src/Core/ReturnByValue.h index d66c24ba..7feb6e01 100644 --- a/nuparu/include/Eigen/src/Core/ReturnByValue.h +++ b/nuparu/include/Eigen/src/Core/ReturnByValue.h @@ -38,9 +38,10 @@ struct traits > * So internal::nested always gives the plain return matrix type. * * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators */ template -struct nested, n, PlainObject> +struct nested_eval, n, PlainObject> { typedef typename traits::ReturnType type; }; @@ -48,7 +49,7 @@ struct nested, n, PlainObject> } // end namespace internal template class ReturnByValue - : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue >::type + : public internal::dense_xpr_base< ReturnByValue >::type, internal::no_assignment_operator { public: typedef typename internal::traits::ReturnType ReturnType; @@ -57,10 +58,11 @@ template class ReturnByValue EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { static_cast(this)->evalTo(dst); } - inline Index rows() const { return static_cast(this)->rows(); } - inline Index cols() const { return static_cast(this)->cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast(this)->rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast(this)->cols(); } #ifndef EIGEN_PARSED_BY_DOXYGEN #define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT @@ -72,6 +74,7 @@ template class ReturnByValue const Unusable& coeff(Index,Index) const { return *reinterpret_cast(this); } Unusable& coeffRef(Index) { return *reinterpret_cast(this); } Unusable& coeffRef(Index,Index) { return *reinterpret_cast(this); } +#undef Unusable #endif }; @@ -83,6 +86,33 @@ Derived& DenseBase::operator=(const ReturnByValue& other) return derived(); } +namespace internal { + +// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that +// when a ReturnByValue expression is assigned, the evaluator is not constructed. +// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world + +template +struct evaluator > + : public evaluator::ReturnType> +{ + typedef ReturnByValue XprType; + typedef typename internal::traits::ReturnType PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + xpr.evalTo(m_result); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_RETURNBYVALUE_H diff --git a/nuparu/include/Eigen/src/Core/Reverse.h b/nuparu/include/Eigen/src/Core/Reverse.h index e30ae3d2..d7c380c7 100644 --- a/nuparu/include/Eigen/src/Core/Reverse.h +++ b/nuparu/include/Eigen/src/Core/Reverse.h @@ -37,32 +37,25 @@ struct traits > typedef typename MatrixType::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - // let's enable LinearAccess only with vectorization because of the product overhead - LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) - ? LinearAccessBit : 0, - - Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess), - - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) }; }; -template struct reverse_packet_cond +template struct reverse_packet_cond { - static inline PacketScalar run(const PacketScalar& x) { return preverse(x); } + static inline PacketType run(const PacketType& x) { return preverse(x); } }; -template struct reverse_packet_cond +template struct reverse_packet_cond { - static inline PacketScalar run(const PacketScalar& x) { return x; } + static inline PacketType run(const PacketType& x) { return x; } }; } // end namespace internal @@ -74,12 +67,9 @@ template class Reverse typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + typedef typename internal::remove_all::type NestedExpression; using Base::IsRowMajor; - // next line is necessary because otherwise const version of operator() - // is hidden by non-const version defined in this file - using Base::operator(); - protected: enum { PacketSize = internal::packet_traits::size, @@ -95,82 +85,19 @@ template class Reverse typedef internal::reverse_packet_cond reverse_packet; public: - inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } - inline Scalar& operator()(Index row, Index col) - { - eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return coeffRef(row, col); - } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, - ReverseCol ? m_matrix.cols() - col - 1 : col); - } - - inline CoeffReturnType coeff(Index row, Index col) const - { - return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, - ReverseCol ? m_matrix.cols() - col - 1 : col); - } - - inline CoeffReturnType coeff(Index index) const - { - return m_matrix.coeff(m_matrix.size() - index - 1); - } - - inline Scalar& coeffRef(Index index) - { - return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); - } - - inline Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < m_matrix.size()); - return coeffRef(index); - } - - template - inline const PacketScalar packet(Index row, Index col) const - { - return reverse_packet::run(m_matrix.template packet( - ReverseRow ? m_matrix.rows() - row - OffsetRow : row, - ReverseCol ? m_matrix.cols() - col - OffsetCol : col)); - } - - template - inline void writePacket(Index row, Index col, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket( - ReverseRow ? m_matrix.rows() - row - OffsetRow : row, - ReverseCol ? m_matrix.cols() - col - OffsetCol : col, - reverse_packet::run(x)); - } - - template - inline const PacketScalar packet(Index index) const - { - return internal::preverse(m_matrix.template packet( m_matrix.size() - index - PacketSize )); - } - - template - inline void writePacket(Index index, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(m_matrix.size() - index - PacketSize, internal::preverse(x)); - } - - const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { return m_matrix; @@ -190,33 +117,93 @@ template inline typename DenseBase::ReverseReturnType DenseBase::reverse() { - return derived(); + return ReverseReturnType(derived()); } -/** This is the const version of reverse(). */ -template -inline const typename DenseBase::ConstReverseReturnType -DenseBase::reverse() const -{ - return derived(); -} + +//reverse const overload moved DenseBase.h due to a CUDA compiler bug /** This is the "in place" version of reverse: it reverses \c *this. * * In most cases it is probably better to simply use the reversed expression * of a matrix. However, when reversing the matrix data itself is really needed, * then this "in-place" version is probably the right choice because it provides - * the following additional features: + * the following additional benefits: * - less error prone: doing the same operation with .reverse() requires special care: * \code m = m.reverse().eval(); \endcode - * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap) + * - this API enables reverse operations without the need for a temporary * - it allows future optimizations (cache friendliness, etc.) * - * \sa reverse() */ + * \sa VectorwiseOp::reverseInPlace(), reverse() */ template inline void DenseBase::reverseInPlace() { - derived() = derived().reverse().eval(); + if(cols()>rows()) + { + Index half = cols()/2; + leftCols(half).swap(rightCols(half).reverse()); + if((cols()%2)==1) + { + Index half2 = rows()/2; + col(half).head(half2).swap(col(half).tail(half2).reverse()); + } + } + else + { + Index half = rows()/2; + topRows(half).swap(bottomRows(half).reverse()); + if((rows()%2)==1) + { + Index half2 = cols()/2; + row(half).head(half2).swap(row(half).tail(half2).reverse()); + } + } +} + +namespace internal { + +template +struct vectorwise_reverse_inplace_impl; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.rows()/2; + xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse()); + } +}; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.cols()/2; + xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse()); + } +}; + +} // end namespace internal + +/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * + * \sa DenseBase::reverseInPlace(), reverse() */ +template +void VectorwiseOp::reverseInPlace() +{ + internal::vectorwise_reverse_inplace_impl::run(_expression().const_cast_derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/Select.h b/nuparu/include/Eigen/src/Core/Select.h index 87993bbb..79eec1b5 100644 --- a/nuparu/include/Eigen/src/Core/Select.h +++ b/nuparu/include/Eigen/src/Core/Select.h @@ -43,23 +43,21 @@ struct traits > ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, - CoeffReadCost = traits::type>::CoeffReadCost - + EIGEN_SIZE_MAX(traits::type>::CoeffReadCost, - traits::type>::CoeffReadCost) + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit }; }; } template -class Select : internal::no_assignment_operator, - public internal::dense_xpr_base< Select >::type +class Select : public internal::dense_xpr_base< Select >::type, + internal::no_assignment_operator { public: typedef typename internal::dense_xpr_base