diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 index bed4e57a..92c450c7 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,7 +64,7 @@ endif() # set compiler flags for c++11 if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O3 -m64 -msse2 -w") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O0 -m64 -msse2 -w") elseif(WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") endif() @@ -86,6 +86,8 @@ set(CORE_SRC src/camera/camera.cpp src/viewer/viewer.cpp src/fluidSolver/fluidSolver.cpp + src/fluidSolver/flipsolver.cpp + src/fluidSolver/grid.cpp src/scene/scene.cpp src/geom/geom.cpp ) diff --git a/CMakeLists.txt.user.18 b/CMakeLists.txt.user.18 new file mode 100644 index 00000000..bee742c0 --- /dev/null +++ b/CMakeLists.txt.user.18 @@ -0,0 +1,190 @@ + + + + + + EnvironmentId + {d00c15fe-d5af-4660-9625-747312d9a523} + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + 80 + true + true + 1 + true + false + 0 + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Clang 5.4 + Clang 5.4 + {66a94819-28af-44ca-8dd8-ad399b4ff8dd} + 0 + 0 + 0 + + false + /Users/charleswang/Documents/School/Spring16/CIS563/FluidSolver/CIS563-FluidSolver-master-build + + + + + false + + true + Make + + CMakeProjectManager.MakeStep + + 1 + Build + + ProjectExplorer.BuildSteps.Build + + + + clean + + true + + true + Make + + CMakeProjectManager.MakeStep + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + all + + CMakeProjectManager.CMakeBuildConfiguration + + 1 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + + false + false + false + false + true + 0.01 + 10 + true + 1 + 25 + + 1 + true + false + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + Thanda + + false + + 2 + + Thanda + + CMakeProjectManager.CMakeRunConfiguration.Thanda + 3768 + false + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 18 + + + Version + 18 + + diff --git a/CMakeLists.txt.user.3.3-pre1 b/CMakeLists.txt.user.3.3-pre1 new file mode 100644 index 00000000..cd30e0c3 --- /dev/null +++ b/CMakeLists.txt.user.3.3-pre1 @@ -0,0 +1,188 @@ + + + + + + EnvironmentId + {d00c15fe-d5af-4660-9625-747312d9a523} + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + 80 + true + true + 1 + true + false + 0 + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Clang 5.4 + Clang 5.4 + {66a94819-28af-44ca-8dd8-ad399b4ff8dd} + 0 + 0 + 0 + + false + /Users/charleswang/Documents/School/Spring16/CIS563/FluidSolver/CIS563-FluidSolver-master-build + + + + + false + true + Make + + CMakeProjectManager.MakeStep + + 1 + Build + + ProjectExplorer.BuildSteps.Build + + + + clean + + true + true + Make + + CMakeProjectManager.MakeStep + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + all + + CMakeProjectManager.CMakeBuildConfiguration + + 1 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + + false + false + false + false + true + 0.01 + 10 + true + 1 + 25 + + 1 + true + false + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + Thanda + + true + + 2 + + Thanda + + CMakeProjectManager.CMakeRunConfiguration.Thanda + 3768 + false + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 16 + + + Version + 16 + + diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 54283261..b04ce9a7 --- a/README.md +++ b/README.md @@ -13,4 +13,63 @@ Fluid Solver Submission guidelines: - Submit on canvas with a direct link to your pull request on GitHub -And you're done! \ No newline at end of file +And you're done! + + +README: + +SPRINT 3: +EVERYTHING IS THERE BUT DOESN'T REALLY WORK?? + +SPRINT 2: +Running the program will prompt you to enter the json to be used in this scene (file directory is relative to the executable). +If you enter an invalid json, it will prompt you to enter the file again. +ALL REQUIRED FEATURES HAVE BEEN IMPLEMENTED + +notes: +Time step: In the viewer class, the time step of the simulation can be adjusted by changing the value of float time_step on line 259 +Gravity: At each update, all values of the V grid have been reset to a small negative value (-2.0f); +Update Particle Velocity: For this sprint I implemented forward Euler, rather than RK2. + +adjusted from last sprint: +store_particle_velocity_to_grid: + - the velocity of each grid is scaled by the number of particles that influence it + +still probably somewhat broken: + - interpolation of velocity needs to be written cleaner + +SPRINT 1: + +Running the program will prompt you to enter the json to be used in this scene (file directory is relative to the executable). +If you enter an invalid json, it will prompt you to enter the file again. +All functionality was implemented. + +Running the program as is will generate a default MACGrid of dimensions 5x5x5. +There are tests for the store_particle_velocity_to_grid and interpolate_velocity are in the scene.cpp file +The print statements for the tests are commented out. + +store_particle_velocity_to_grid: + - the stored velocity of each particle is weighted by v/dist where + - v is the velocity in the respective axis + - dist is the distance between the particle and the center of the face of the respective axis + +interpolate_velocity: + - given a position, this function will return a vec3 of the interpolated velocity of the faces nearest to that position. + +FROM VIEWER PROJECT: + +Citation: +base code for VBO/VAO/EBO indexing: +http://www.learnopengl.com/#!Getting-started/Hello-Triangle + - Helped me understand how to properly set up a gl context +this helped with understanding linking shaders with new attributes (color): +http://learnopengl.com/#!Getting-started/Shaders + - Helped me understand binding buffers in a particular order + +Running the program will prompt you to enter the json to be used in this scene (file directory is relative to the executable). +If you enter an invalid json, it will prompt you to enter the file again. +All functionality was implemented. + +Interfacing: +Arrow keys will rotate you around the origin +Maya/Houdini mouse controls are also implemented (hold space + mouse drags) \ No newline at end of file diff --git a/TextureFragmentShader.fragmentshader b/TextureFragmentShader.fragmentshader new file mode 100755 index 00000000..fb4e79de --- /dev/null +++ b/TextureFragmentShader.fragmentshader @@ -0,0 +1,16 @@ +#version 330 core + +// Interpolated values from the vertex shaders +in vec2 UV; + +// Ouput data +out vec3 color; + +// Values that stay constant for the whole mesh. +uniform sampler2D myTextureSampler; + +void main(){ + + // Output color = color of the texture at the specified UV + color = texture( myTextureSampler, UV ).rgb; +} \ No newline at end of file diff --git a/TransformVertexShader.vertexshader b/TransformVertexShader.vertexshader new file mode 100755 index 00000000..fe1dea85 --- /dev/null +++ b/TransformVertexShader.vertexshader @@ -0,0 +1,21 @@ +#version 330 core + +// Input vertex data, different for all executions of this shader. +layout(location = 0) in vec3 vertexPosition_modelspace; +layout(location = 1) in vec2 vertexUV; + +// Output data ; will be interpolated for each fragment. +out vec2 UV; + +// Values that stay constant for the whole mesh. +uniform mat4 MVP; + +void main(){ + + // Output position of the vertex, in clip space : MVP * position + gl_Position = MVP * vec4(vertexPosition_modelspace,1); + + // UV of the vertex. No special space for this one. + UV = vertexUV; +} + diff --git a/nuparu/.gitignore b/nuparu/.gitignore old mode 100644 new mode 100755 diff --git a/nuparu/README.md b/nuparu/README.md old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openexr/exrenvmap b/nuparu/bin/osx/openexr/exrenvmap old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openexr/exrheader b/nuparu/bin/osx/openexr/exrheader old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openexr/exrmakepreview b/nuparu/bin/osx/openexr/exrmakepreview old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openexr/exrmaketiled b/nuparu/bin/osx/openexr/exrmaketiled old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openexr/exrmultipart b/nuparu/bin/osx/openexr/exrmultipart old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openexr/exrmultiview b/nuparu/bin/osx/openexr/exrmultiview old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openexr/exrstdattr b/nuparu/bin/osx/openexr/exrstdattr old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openvdb/vdb_print b/nuparu/bin/osx/openvdb/vdb_print old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/openvdb/vdb_render b/nuparu/bin/osx/openvdb/vdb_render old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/makecircle b/nuparu/bin/osx/partio/makecircle old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/makeline b/nuparu/bin/osx/partio/makeline old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/partattr b/nuparu/bin/osx/partio/partattr old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/partconv b/nuparu/bin/osx/partio/partconv old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/partinfo b/nuparu/bin/osx/partio/partinfo old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/partview b/nuparu/bin/osx/partio/partview old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/test b/nuparu/bin/osx/partio/test old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/testcache b/nuparu/bin/osx/partio/testcache old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/testiterator b/nuparu/bin/osx/partio/testiterator old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/testkdtree b/nuparu/bin/osx/partio/testkdtree old mode 100644 new mode 100755 diff --git a/nuparu/bin/osx/partio/teststr b/nuparu/bin/osx/partio/teststr old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/eLut.exe b/nuparu/bin/win/openexr/eLut.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/exrenvmap.exe b/nuparu/bin/win/openexr/exrenvmap.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/exrheader.exe b/nuparu/bin/win/openexr/exrheader.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/exrmakepreview.exe b/nuparu/bin/win/openexr/exrmakepreview.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/exrmaketiled.exe b/nuparu/bin/win/openexr/exrmaketiled.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/exrmultipart.exe b/nuparu/bin/win/openexr/exrmultipart.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/exrmultiview.exe b/nuparu/bin/win/openexr/exrmultiview.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/exrstdattr.exe b/nuparu/bin/win/openexr/exrstdattr.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/openexr/toFloat.exe b/nuparu/bin/win/openexr/toFloat.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/makecircle.exe b/nuparu/bin/win/partio/makecircle.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/makeline.exe b/nuparu/bin/win/partio/makeline.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/partattr.exe b/nuparu/bin/win/partio/partattr.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/partconv.exe b/nuparu/bin/win/partio/partconv.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/partinfo.exe b/nuparu/bin/win/partio/partinfo.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/test.exe b/nuparu/bin/win/partio/test.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/testcache.exe b/nuparu/bin/win/partio/testcache.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/testiterator.exe b/nuparu/bin/win/partio/testiterator.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/testkdtree.exe b/nuparu/bin/win/partio/testkdtree.exe old mode 100644 new mode 100755 diff --git a/nuparu/bin/win/partio/teststr.exe b/nuparu/bin/win/partio/teststr.exe old mode 100644 new mode 100755 diff --git a/nuparu/include/Eigen/Array b/nuparu/include/Eigen/Array deleted file mode 100644 index 3d004fb6..00000000 --- a/nuparu/include/Eigen/Array +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef EIGEN_ARRAY_MODULE_H -#define EIGEN_ARRAY_MODULE_H - -// include Core first to handle Eigen2 support macros -#include "Core" - -#ifndef EIGEN2_SUPPORT - #error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core. -#endif - -#endif // EIGEN_ARRAY_MODULE_H diff --git a/nuparu/include/Eigen/COPYING.BSD b/nuparu/include/Eigen/COPYING.BSD deleted file mode 100644 index 11971ffe..00000000 --- a/nuparu/include/Eigen/COPYING.BSD +++ /dev/null @@ -1,26 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ \ No newline at end of file diff --git a/nuparu/include/Eigen/COPYING.GPL b/nuparu/include/Eigen/COPYING.GPL deleted file mode 100644 index 94a9ed02..00000000 --- a/nuparu/include/Eigen/COPYING.GPL +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/nuparu/include/Eigen/COPYING.LGPL b/nuparu/include/Eigen/COPYING.LGPL deleted file mode 100644 index 4362b491..00000000 --- a/nuparu/include/Eigen/COPYING.LGPL +++ /dev/null @@ -1,502 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 - - Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts - as the successor of the GNU Library Public License, version 2, hence - the version number 2.1.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Lesser General Public License, applies to some -specially designated software packages--typically libraries--of the -Free Software Foundation and other authors who decide to use it. You -can use it too, but we suggest you first think carefully about whether -this license or the ordinary General Public License is the better -strategy to use in any particular case, based on the explanations below. - - When we speak of free software, we are referring to freedom of use, -not price. Our General Public Licenses are designed to make sure that -you have the freedom to distribute copies of free software (and charge -for this service if you wish); that you receive source code or can get -it if you want it; that you can change the software and use pieces of -it in new free programs; and that you are informed that you can do -these things. - - To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for -you if you distribute copies of the library or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link other code with the library, you must provide -complete object files to the recipients, so that they can relink them -with the library after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - - To protect each distributor, we want to make it very clear that -there is no warranty for the free library. Also, if the library is -modified by someone else and passed on, the recipients should know -that what they have is not the original version, so that the original -author's reputation will not be affected by problems that might be -introduced by others. - - Finally, software patents pose a constant threat to the existence of -any free program. We wish to make sure that a company cannot -effectively restrict the users of a free program by obtaining a -restrictive license from a patent holder. Therefore, we insist that -any patent license obtained for a version of the library must be -consistent with the full freedom of use specified in this license. - - Most GNU software, including some libraries, is covered by the -ordinary GNU General Public License. This license, the GNU Lesser -General Public License, applies to certain designated libraries, and -is quite different from the ordinary General Public License. We use -this license for certain libraries in order to permit linking those -libraries into non-free programs. - - When a program is linked with a library, whether statically or using -a shared library, the combination of the two is legally speaking a -combined work, a derivative of the original library. The ordinary -General Public License therefore permits such linking only if the -entire combination fits its criteria of freedom. The Lesser General -Public License permits more lax criteria for linking other code with -the library. - - We call this license the "Lesser" General Public License because it -does Less to protect the user's freedom than the ordinary General -Public License. It also provides other free software developers Less -of an advantage over competing non-free programs. These disadvantages -are the reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - - For example, on rare occasions, there may be a special need to -encourage the widest possible use of a certain library, so that it becomes -a de-facto standard. To achieve this, non-free programs must be -allowed to use the library. A more frequent case is that a free -library does the same job as widely used non-free libraries. In this -case, there is little to gain by limiting the free library to free -software only, so we use the Lesser General Public License. - - In other cases, permission to use a particular library in non-free -programs enables a greater number of people to use a large body of -free software. For example, permission to use the GNU C Library in -non-free programs enables many more people to use the whole GNU -operating system, as well as its variant, the GNU/Linux operating -system. - - Although the Lesser General Public License is Less protective of the -users' freedom, it does ensure that the user of a program that is -linked with the Library has the freedom and the wherewithal to run -that program using a modified version of the Library. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, whereas the latter must -be combined with the library in order to run. - - GNU LESSER GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library or other -program which contains a notice placed by the copyright holder or -other authorized party saying it may be distributed under the terms of -this Lesser General Public License (also called "this License"). -Each licensee is addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also combine or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a - copy of the library already present on the user's computer system, - rather than copying library functions into the executable, and (2) - will operate properly with a modified version of the library, if - the user installs one, as long as the modified version is - interface-compatible with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - d) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - e) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the materials to be distributed need not include anything that is -normally distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties with -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Lesser General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - , 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! diff --git a/nuparu/include/Eigen/COPYING.MINPACK b/nuparu/include/Eigen/COPYING.MINPACK deleted file mode 100644 index ae7984da..00000000 --- a/nuparu/include/Eigen/COPYING.MINPACK +++ /dev/null @@ -1,52 +0,0 @@ -Minpack Copyright Notice (1999) University of Chicago. All rights reserved - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the -following conditions are met: - -1. Redistributions of source code must retain the above -copyright notice, this list of conditions and the following -disclaimer. - -2. Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following -disclaimer in the documentation and/or other materials -provided with the distribution. - -3. The end-user documentation included with the -redistribution, if any, must include the following -acknowledgment: - - "This product includes software developed by the - University of Chicago, as Operator of Argonne National - Laboratory. - -Alternately, this acknowledgment may appear in the software -itself, if and wherever such third-party acknowledgments -normally appear. - -4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" -WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE -UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND -THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE -OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY -OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR -USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF -THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) -DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION -UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL -BE CORRECTED. - -5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT -HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF -ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, -INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF -ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF -PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER -SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT -(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, -EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE -POSSIBILITY OF SUCH LOSS OR DAMAGES. - diff --git a/nuparu/include/Eigen/COPYING.MPL2 b/nuparu/include/Eigen/COPYING.MPL2 deleted file mode 100644 index 14e2f777..00000000 --- a/nuparu/include/Eigen/COPYING.MPL2 +++ /dev/null @@ -1,373 +0,0 @@ -Mozilla Public License Version 2.0 -================================== - -1. Definitions --------------- - -1.1. "Contributor" - means each individual or legal entity that creates, contributes to - the creation of, or owns Covered Software. - -1.2. "Contributor Version" - means the combination of the Contributions of others (if any) used - by a Contributor and that particular Contributor's Contribution. - -1.3. "Contribution" - means Covered Software of a particular Contributor. - -1.4. "Covered Software" - means Source Code Form to which the initial Contributor has attached - the notice in Exhibit A, the Executable Form of such Source Code - Form, and Modifications of such Source Code Form, in each case - including portions thereof. - -1.5. "Incompatible With Secondary Licenses" - means - - (a) that the initial Contributor has attached the notice described - in Exhibit B to the Covered Software; or - - (b) that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the - terms of a Secondary License. - -1.6. "Executable Form" - means any form of the work other than Source Code Form. - -1.7. "Larger Work" - means a work that combines Covered Software with other material, in - a separate file or files, that is not Covered Software. - -1.8. "License" - means this document. - -1.9. "Licensable" - means having the right to grant, to the maximum extent possible, - whether at the time of the initial grant or subsequently, any and - all of the rights conveyed by this License. - -1.10. "Modifications" - means any of the following: - - (a) any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered - Software; or - - (b) any new file in Source Code Form that contains any Covered - Software. - -1.11. "Patent Claims" of a Contributor - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the - License, by the making, using, selling, offering for sale, having - made, import, or transfer of either its Contributions or its - Contributor Version. - -1.12. "Secondary License" - means either the GNU General Public License, Version 2.0, the GNU - Lesser General Public License, Version 2.1, the GNU Affero General - Public License, Version 3.0, or any later versions of those - licenses. - -1.13. "Source Code Form" - means the form of the work preferred for making modifications. - -1.14. "You" (or "Your") - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that - controls, is controlled by, or is under common control with You. For - purposes of this definition, "control" means (a) the power, direct - or indirect, to cause the direction or management of such entity, - whether by contract or otherwise, or (b) ownership of more than - fifty percent (50%) of the outstanding shares or beneficial - ownership of such entity. - -2. License Grants and Conditions --------------------------------- - -2.1. Grants - -Each Contributor hereby grants You a world-wide, royalty-free, -non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and - -(b) under Patent Claims of such Contributor to make, use, sell, offer - for sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. - -2.2. Effective Date - -The licenses granted in Section 2.1 with respect to any Contribution -become effective for each Contribution on the date the Contributor first -distributes such Contribution. - -2.3. Limitations on Grant Scope - -The licenses granted in this Section 2 are the only rights granted under -this License. No additional rights or licenses will be implied from the -distribution or licensing of Covered Software under this License. -Notwithstanding Section 2.1(b) above, no patent license is granted by a -Contributor: - -(a) for any code that a Contributor has removed from Covered Software; - or - -(b) for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - -(c) under Patent Claims infringed by Covered Software in the absence of - its Contributions. - -This License does not grant any rights in the trademarks, service marks, -or logos of any Contributor (except as may be necessary to comply with -the notice requirements in Section 3.4). - -2.4. Subsequent Licenses - -No Contributor makes additional grants as a result of Your choice to -distribute the Covered Software under a subsequent version of this -License (see Section 10.2) or under the terms of a Secondary License (if -permitted under the terms of Section 3.3). - -2.5. Representation - -Each Contributor represents that the Contributor believes its -Contributions are its original creation(s) or it has sufficient rights -to grant the rights to its Contributions conveyed by this License. - -2.6. Fair Use - -This License is not intended to limit any rights You have under -applicable copyright doctrines of fair use, fair dealing, or other -equivalents. - -2.7. Conditions - -Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted -in Section 2.1. - -3. Responsibilities -------------------- - -3.1. Distribution of Source Form - -All distribution of Covered Software in Source Code Form, including any -Modifications that You create or to which You contribute, must be under -the terms of this License. You must inform recipients that the Source -Code Form of the Covered Software is governed by the terms of this -License, and how they can obtain a copy of this License. You may not -attempt to alter or restrict the recipients' rights in the Source Code -Form. - -3.2. Distribution of Executable Form - -If You distribute Covered Software in Executable Form then: - -(a) such Covered Software must also be made available in Source Code - Form, as described in Section 3.1, and You must inform recipients of - the Executable Form how they can obtain a copy of such Source Code - Form by reasonable means in a timely manner, at a charge no more - than the cost of distribution to the recipient; and - -(b) You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter - the recipients' rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - -You may create and distribute a Larger Work under terms of Your choice, -provided that You also comply with the requirements of this License for -the Covered Software. If the Larger Work is a combination of Covered -Software with a work governed by one or more Secondary Licenses, and the -Covered Software is not Incompatible With Secondary Licenses, this -License permits You to additionally distribute such Covered Software -under the terms of such Secondary License(s), so that the recipient of -the Larger Work may, at their option, further distribute the Covered -Software under the terms of either this License or such Secondary -License(s). - -3.4. Notices - -You may not remove or alter the substance of any license notices -(including copyright notices, patent notices, disclaimers of warranty, -or limitations of liability) contained within the Source Code Form of -the Covered Software, except that You may alter any license notices to -the extent required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - -You may choose to offer, and to charge a fee for, warranty, support, -indemnity or liability obligations to one or more recipients of Covered -Software. However, You may do so only on Your own behalf, and not on -behalf of any Contributor. You must make it absolutely clear that any -such warranty, support, indemnity, or liability obligation is offered by -You alone, and You hereby agree to indemnify every Contributor for any -liability incurred by such Contributor as a result of warranty, support, -indemnity or liability terms You offer. You may include additional -disclaimers of warranty and limitations of liability specific to any -jurisdiction. - -4. Inability to Comply Due to Statute or Regulation ---------------------------------------------------- - -If it is impossible for You to comply with any of the terms of this -License with respect to some or all of the Covered Software due to -statute, judicial order, or regulation then You must: (a) comply with -the terms of this License to the maximum extent possible; and (b) -describe the limitations and the code they affect. Such description must -be placed in a text file included with all distributions of the Covered -Software under this License. Except to the extent prohibited by statute -or regulation, such description must be sufficiently detailed for a -recipient of ordinary skill to be able to understand it. - -5. Termination --------------- - -5.1. The rights granted under this License will terminate automatically -if You fail to comply with any of its terms. However, if You become -compliant, then the rights granted under this License from a particular -Contributor are reinstated (a) provisionally, unless and until such -Contributor explicitly and finally terminates Your grants, and (b) on an -ongoing basis, if such Contributor fails to notify You of the -non-compliance by some reasonable means prior to 60 days after You have -come back into compliance. Moreover, Your grants from a particular -Contributor are reinstated on an ongoing basis if such Contributor -notifies You of the non-compliance by some reasonable means, this is the -first time You have received notice of non-compliance with this License -from such Contributor, and You become compliant prior to 30 days after -Your receipt of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent -infringement claim (excluding declaratory judgment actions, -counter-claims, and cross-claims) alleging that a Contributor Version -directly or indirectly infringes any patent, then the rights granted to -You by any and all Contributors for the Covered Software under Section -2.1 of this License shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all -end user license agreements (excluding distributors and resellers) which -have been validly granted by You or Your distributors under this License -prior to termination shall survive termination. - -************************************************************************ -* * -* 6. Disclaimer of Warranty * -* ------------------------- * -* * -* Covered Software is provided under this License on an "as is" * -* basis, without warranty of any kind, either expressed, implied, or * -* statutory, including, without limitation, warranties that the * -* Covered Software is free of defects, merchantable, fit for a * -* particular purpose or non-infringing. The entire risk as to the * -* quality and performance of the Covered Software is with You. * -* Should any Covered Software prove defective in any respect, You * -* (not any Contributor) assume the cost of any necessary servicing, * -* repair, or correction. This disclaimer of warranty constitutes an * -* essential part of this License. No use of any Covered Software is * -* authorized under this License except under this disclaimer. * -* * -************************************************************************ - -************************************************************************ -* * -* 7. Limitation of Liability * -* -------------------------- * -* * -* Under no circumstances and under no legal theory, whether tort * -* (including negligence), contract, or otherwise, shall any * -* Contributor, or anyone who distributes Covered Software as * -* permitted above, be liable to You for any direct, indirect, * -* special, incidental, or consequential damages of any character * -* including, without limitation, damages for lost profits, loss of * -* goodwill, work stoppage, computer failure or malfunction, or any * -* and all other commercial damages or losses, even if such party * -* shall have been informed of the possibility of such damages. This * -* limitation of liability shall not apply to liability for death or * -* personal injury resulting from such party's negligence to the * -* extent applicable law prohibits such limitation. Some * -* jurisdictions do not allow the exclusion or limitation of * -* incidental or consequential damages, so this exclusion and * -* limitation may not apply to You. * -* * -************************************************************************ - -8. Litigation -------------- - -Any litigation relating to this License may be brought only in the -courts of a jurisdiction where the defendant maintains its principal -place of business and such litigation shall be governed by laws of that -jurisdiction, without reference to its conflict-of-law provisions. -Nothing in this Section shall prevent a party's ability to bring -cross-claims or counter-claims. - -9. Miscellaneous ----------------- - -This License represents the complete agreement concerning the subject -matter hereof. If any provision of this License is held to be -unenforceable, such provision shall be reformed only to the extent -necessary to make it enforceable. Any law or regulation which provides -that the language of a contract shall be construed against the drafter -shall not be used to construe this License against a Contributor. - -10. Versions of the License ---------------------------- - -10.1. New Versions - -Mozilla Foundation is the license steward. Except as provided in Section -10.3, no one other than the license steward has the right to modify or -publish new versions of this License. Each version will be given a -distinguishing version number. - -10.2. Effect of New Versions - -You may distribute the Covered Software under the terms of the version -of the License under which You originally received the Covered Software, -or under the terms of any subsequent version published by the license -steward. - -10.3. Modified Versions - -If you create software not governed by this License, and you want to -create a new license for such software, you may create and use a -modified version of this License if you rename the license and remove -any references to the name of the license steward (except to note that -such modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary -Licenses - -If You choose to distribute Source Code Form that is Incompatible With -Secondary Licenses under the terms of this version of the License, the -notice described in Exhibit B of this License must be attached. - -Exhibit A - Source Code Form License Notice -------------------------------------------- - - This Source Code Form is subject to the terms of the Mozilla Public - License, v. 2.0. If a copy of the MPL was not distributed with this - file, You can obtain one at http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular -file, then You may include the notice in a location (such as a LICENSE -file in a relevant directory) where a recipient would be likely to look -for such a notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - "Incompatible With Secondary Licenses" Notice ---------------------------------------------------------- - - This Source Code Form is "Incompatible With Secondary Licenses", as - defined by the Mozilla Public License, v. 2.0. diff --git a/nuparu/include/Eigen/COPYING.README b/nuparu/include/Eigen/COPYING.README deleted file mode 100644 index de5b6321..00000000 --- a/nuparu/include/Eigen/COPYING.README +++ /dev/null @@ -1,18 +0,0 @@ -Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links: - http://www.mozilla.org/MPL/2.0/ - http://www.mozilla.org/MPL/2.0/FAQ.html - -Some files contain third-party code under BSD or LGPL licenses, whence the other -COPYING.* files here. - -All the LGPL code is either LGPL 2.1-only, or LGPL 2.1-or-later. -For this reason, the COPYING.LGPL file contains the LGPL 2.1 text. - -If you want to guarantee that the Eigen code that you are #including is licensed -under the MPL2 and possibly more permissive licenses (like BSD), #define this -preprocessor symbol: - EIGEN_MPL2_ONLY -For example, with most compilers, you could add this to your project CXXFLAGS: - -DEIGEN_MPL2_ONLY -This will cause a compilation error to be generated if you #include any code that is -LGPL licensed. diff --git a/nuparu/include/Eigen/Cholesky b/nuparu/include/Eigen/Cholesky index f727f5d8..705a04cc 100644 --- a/nuparu/include/Eigen/Cholesky +++ b/nuparu/include/Eigen/Cholesky @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLESKY_MODULE_H #define EIGEN_CHOLESKY_MODULE_H @@ -10,16 +17,17 @@ * * * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. - * Those decompositions are accessible via the following MatrixBase methods: - * - MatrixBase::llt(), + * Those decompositions are also accessible via the following methods: + * - MatrixBase::llt() * - MatrixBase::ldlt() + * - SelfAdjointView::llt() + * - SelfAdjointView::ldlt() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" #include "src/Cholesky/LLT.h" #include "src/Cholesky/LDLT.h" #ifdef EIGEN_USE_LAPACKE diff --git a/nuparu/include/Eigen/CholmodSupport b/nuparu/include/Eigen/CholmodSupport index 745b884e..83e2c1da 100644 --- a/nuparu/include/Eigen/CholmodSupport +++ b/nuparu/include/Eigen/CholmodSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLMODSUPPORT_MODULE_H #define EIGEN_CHOLMODSUPPORT_MODULE_H @@ -33,12 +40,8 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/CholmodSupport/CholmodSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_CHOLMODSUPPORT_MODULE_H diff --git a/nuparu/include/Eigen/Core b/nuparu/include/Eigen/Core index 9131cc3f..63602f4c 100644 --- a/nuparu/include/Eigen/Core +++ b/nuparu/include/Eigen/Core @@ -14,6 +14,48 @@ // first thing Eigen does: stop the compiler from committing suicide #include "src/Core/util/DisableStupidWarnings.h" +// Handle NVCC/CUDA +#ifdef __CUDACC__ + // Do not try asserts on CUDA! + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + // Do not try to vectorize on CUDA! + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif + + // All functions callable from CUDA code must be qualified with __device__ + #define EIGEN_DEVICE_FUNC __host__ __device__ + +#else + #define EIGEN_DEVICE_FUNC + +#endif + +#if defined(__CUDA_ARCH__) + #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; +#endif + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) + #define EIGEN_EXCEPTIONS +#endif + +#ifdef EIGEN_EXCEPTIONS + #include +#endif + // then include this file where all our macros are defined. It's really important to do it first because // it's where we do all the alignment settings (platform detection and honoring the user's will if he // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. @@ -21,7 +63,7 @@ // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. -#if defined(__MINGW32__) && EIGEN_GNUC_AT_LEAST(4,6) +#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) #pragma GCC optimize ("-fno-ipa-cp-clone") #endif @@ -31,26 +73,26 @@ // and inclusion of their respective header files #include "src/Core/util/MKL_support.h" -// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into -// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks -#if !EIGEN_ALIGN +// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into +// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks +#if EIGEN_MAX_ALIGN_BYTES==0 #ifndef EIGEN_DONT_VECTORIZE #define EIGEN_DONT_VECTORIZE #endif #endif -#ifdef _MSC_VER +#if EIGEN_COMP_MSVC #include // for _aligned_malloc -- need it regardless of whether vectorization is enabled - #if (_MSC_VER >= 1500) // 2008 or later + #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later // Remember that usage of defined() in a #define is undefined by the standard. // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. - #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64) + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER #endif #endif #else // Remember that usage of defined() in a #define is undefined by the standard - #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) ) + #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC #endif #endif @@ -82,6 +124,19 @@ #ifdef __SSE4_2__ #define EIGEN_VECTORIZE_SSE4_2 #endif + #ifdef __AVX__ + #define EIGEN_VECTORIZE_AVX + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX2__ + #define EIGEN_VECTORIZE_AVX2 + #endif + #ifdef __FMA__ + #define EIGEN_VECTORIZE_FMA + #endif // include files @@ -95,7 +150,7 @@ extern "C" { // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: - #ifdef __INTEL_COMPILER + #if EIGEN_COMP_ICC >= 1110 #include #else #include @@ -112,8 +167,20 @@ #ifdef EIGEN_VECTORIZE_SSE4_2 #include #endif + #ifdef EIGEN_VECTORIZE_AVX + #include + #endif #endif } // end extern "C" + #elif defined __VSX__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_ALTIVEC @@ -123,13 +190,18 @@ #undef bool #undef vector #undef pixel - #elif defined __ARM_NEON__ + #elif (defined __ARM_NEON) || (defined __ARM_NEON__) #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_NEON #include #endif #endif +#if defined __CUDACC__ + #define EIGEN_VECTORIZE_CUDA + #include +#endif + #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) #define EIGEN_HAS_OPENMP #endif @@ -139,7 +211,7 @@ #endif // MSVC for windows mobile does not have the errno.h file -#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION) +#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM #define EIGEN_HAS_ERRNO #endif @@ -165,23 +237,17 @@ #endif // required for __cpuid, needs to be included after cmath -#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) +#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE #include #endif -#if defined(_CPPUNWIND) || defined(__EXCEPTIONS) - #define EIGEN_EXCEPTIONS -#endif - -#ifdef EIGEN_EXCEPTIONS - #include -#endif - /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { inline static const char *SimdInstructionSetsInUse(void) { -#if defined(EIGEN_VECTORIZE_SSE4_2) +#if defined(EIGEN_VECTORIZE_AVX) + return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_2) return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; #elif defined(EIGEN_VECTORIZE_SSE4_1) return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; @@ -193,6 +259,8 @@ inline static const char *SimdInstructionSetsInUse(void) { return "SSE, SSE2"; #elif defined(EIGEN_VECTORIZE_ALTIVEC) return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; #elif defined(EIGEN_VECTORIZE_NEON) return "ARM NEON"; #else @@ -202,34 +270,9 @@ inline static const char *SimdInstructionSetsInUse(void) { } // end namespace Eigen -#define STAGE10_FULL_EIGEN2_API 10 -#define STAGE20_RESOLVE_API_CONFLICTS 20 -#define STAGE30_FULL_EIGEN3_API 30 -#define STAGE40_FULL_EIGEN3_STRICTNESS 40 -#define STAGE99_NO_EIGEN2_SUPPORT 99 - -#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS -#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API -#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS -#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API -#elif defined EIGEN2_SUPPORT - // default to stage 3, that's what it's always meant - #define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API - #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API -#else - #define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT -#endif - -#ifdef EIGEN2_SUPPORT -#undef minor +#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT +// This will generate an error message: +#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information #endif // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to @@ -249,45 +292,75 @@ using std::ptrdiff_t; */ #include "src/Core/util/Constants.h" -#include "src/Core/util/ForwardDeclarations.h" #include "src/Core/util/Meta.h" +#include "src/Core/util/ForwardDeclarations.h" #include "src/Core/util/StaticAssert.h" #include "src/Core/util/XprHelper.h" #include "src/Core/util/Memory.h" #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" +#include "src/Core/SpecialFunctions.h" #include "src/Core/GenericPacketMath.h" -#if defined EIGEN_VECTORIZE_SSE +#if defined EIGEN_VECTORIZE_AVX + // Use AVX for floats and doubles, SSE for integers + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/AVX/PacketMath.h" + #include "src/Core/arch/AVX/MathFunctions.h" + #include "src/Core/arch/AVX/Complex.h" + #include "src/Core/arch/AVX/TypeCasting.h" +#elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/Complex.h" -#elif defined EIGEN_VECTORIZE_ALTIVEC + #include "src/Core/arch/SSE/TypeCasting.h" +#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/PacketMath.h" + #include "src/Core/arch/AltiVec/MathFunctions.h" #include "src/Core/arch/AltiVec/Complex.h" #elif defined EIGEN_VECTORIZE_NEON #include "src/Core/arch/NEON/PacketMath.h" + #include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/Complex.h" #endif +#if defined EIGEN_VECTORIZE_CUDA + #include "src/Core/arch/CUDA/PacketMath.h" + #include "src/Core/arch/CUDA/MathFunctions.h" +#endif + #include "src/Core/arch/Default/Settings.h" -#include "src/Core/Functors.h" +#include "src/Core/functors/BinaryFunctors.h" +#include "src/Core/functors/UnaryFunctors.h" +#include "src/Core/functors/NullaryFunctors.h" +#include "src/Core/functors/StlFunctors.h" +#include "src/Core/functors/AssignmentFunctors.h" + #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" #include "src/Core/MatrixBase.h" #include "src/Core/EigenBase.h" +#include "src/Core/Product.h" +#include "src/Core/CoreEvaluators.h" +#include "src/Core/AssignEvaluator.h" + #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 // at least confirmed with Doxygen 1.5.5 and 1.5.6 #include "src/Core/Assign.h" #endif +#include "src/Core/ArrayBase.h" #include "src/Core/util/BlasUtil.h" #include "src/Core/DenseStorage.h" #include "src/Core/NestByValue.h" -#include "src/Core/ForceAlignedAccess.h" + +// #include "src/Core/ForceAlignedAccess.h" + #include "src/Core/ReturnByValue.h" #include "src/Core/NoAlias.h" #include "src/Core/PlainObjectBase.h" @@ -300,32 +373,33 @@ using std::ptrdiff_t; #include "src/Core/SelfCwiseBinaryOp.h" #include "src/Core/Dot.h" #include "src/Core/StableNorm.h" -#include "src/Core/MapBase.h" #include "src/Core/Stride.h" +#include "src/Core/MapBase.h" #include "src/Core/Map.h" +#include "src/Core/Ref.h" #include "src/Core/Block.h" #include "src/Core/VectorBlock.h" -#include "src/Core/Ref.h" #include "src/Core/Transpose.h" #include "src/Core/DiagonalMatrix.h" #include "src/Core/Diagonal.h" #include "src/Core/DiagonalProduct.h" -#include "src/Core/PermutationMatrix.h" -#include "src/Core/Transpositions.h" #include "src/Core/Redux.h" #include "src/Core/Visitor.h" #include "src/Core/Fuzzy.h" #include "src/Core/IO.h" #include "src/Core/Swap.h" #include "src/Core/CommaInitializer.h" -#include "src/Core/Flagged.h" -#include "src/Core/ProductBase.h" #include "src/Core/GeneralProduct.h" +#include "src/Core/Solve.h" +#include "src/Core/Inverse.h" +#include "src/Core/SolverBase.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/Parallelizer.h" -#include "src/Core/products/CoeffBasedProduct.h" +#include "src/Core/ProductEvaluators.h" #include "src/Core/products/GeneralMatrixVector.h" #include "src/Core/products/GeneralMatrixMatrix.h" #include "src/Core/SolveTriangular.h" @@ -347,7 +421,6 @@ using std::ptrdiff_t; #include "src/Core/Random.h" #include "src/Core/Replicate.h" #include "src/Core/Reverse.h" -#include "src/Core/ArrayBase.h" #include "src/Core/ArrayWrapper.h" #ifdef EIGEN_USE_BLAS @@ -369,8 +442,4 @@ using std::ptrdiff_t; #include "src/Core/util/ReenableStupidWarnings.h" -#ifdef EIGEN2_SUPPORT -#include "Eigen2Support" -#endif - #endif // EIGEN_CORE_H diff --git a/nuparu/include/Eigen/Eigen b/nuparu/include/Eigen/Eigen index 19b40ea4..654c8dc6 100644 --- a/nuparu/include/Eigen/Eigen +++ b/nuparu/include/Eigen/Eigen @@ -1,2 +1,2 @@ #include "Dense" -//#include "Sparse" +#include "Sparse" diff --git a/nuparu/include/Eigen/Eigen2Support b/nuparu/include/Eigen/Eigen2Support deleted file mode 100644 index 36156d29..00000000 --- a/nuparu/include/Eigen/Eigen2Support +++ /dev/null @@ -1,82 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2SUPPORT_H -#define EIGEN2SUPPORT_H - -#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H)) -#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header -#endif - -#include "src/Core/util/DisableStupidWarnings.h" - -/** \ingroup Support_modules - * \defgroup Eigen2Support_Module Eigen2 support module - * This module provides a couple of deprecated functions improving the compatibility with Eigen2. - * - * To use it, define EIGEN2_SUPPORT before including any Eigen header - * \code - * #define EIGEN2_SUPPORT - * \endcode - * - */ - -#include "src/Eigen2Support/Macros.h" -#include "src/Eigen2Support/Memory.h" -#include "src/Eigen2Support/Meta.h" -#include "src/Eigen2Support/Lazy.h" -#include "src/Eigen2Support/Cwise.h" -#include "src/Eigen2Support/CwiseOperators.h" -#include "src/Eigen2Support/TriangularSolver.h" -#include "src/Eigen2Support/Block.h" -#include "src/Eigen2Support/VectorBlock.h" -#include "src/Eigen2Support/Minor.h" -#include "src/Eigen2Support/MathFunctions.h" - - -#include "src/Core/util/ReenableStupidWarnings.h" - -// Eigen2 used to include iostream -#include - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ -using Eigen::Matrix##SizeSuffix##TypeSuffix; \ -using Eigen::Vector##SizeSuffix##TypeSuffix; \ -using Eigen::RowVector##SizeSuffix##TypeSuffix; - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ - -#define EIGEN_USING_MATRIX_TYPEDEFS \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd) - -#define USING_PART_OF_NAMESPACE_EIGEN \ -EIGEN_USING_MATRIX_TYPEDEFS \ -using Eigen::Matrix; \ -using Eigen::MatrixBase; \ -using Eigen::ei_random; \ -using Eigen::ei_real; \ -using Eigen::ei_imag; \ -using Eigen::ei_conj; \ -using Eigen::ei_abs; \ -using Eigen::ei_abs2; \ -using Eigen::ei_sqrt; \ -using Eigen::ei_exp; \ -using Eigen::ei_log; \ -using Eigen::ei_sin; \ -using Eigen::ei_cos; - -#endif // EIGEN2SUPPORT_H diff --git a/nuparu/include/Eigen/Eigenvalues b/nuparu/include/Eigen/Eigenvalues index 53c5a73a..ea93eb30 100644 --- a/nuparu/include/Eigen/Eigenvalues +++ b/nuparu/include/Eigen/Eigenvalues @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_EIGENVALUES_MODULE_H #define EIGEN_EIGENVALUES_MODULE_H diff --git a/nuparu/include/Eigen/Geometry b/nuparu/include/Eigen/Geometry index efd9d450..06b736e3 100644 --- a/nuparu/include/Eigen/Geometry +++ b/nuparu/include/Eigen/Geometry @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_GEOMETRY_MODULE_H #define EIGEN_GEOMETRY_MODULE_H @@ -9,10 +16,6 @@ #include "LU" #include -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - /** \defgroup Geometry_Module Geometry module * * @@ -33,27 +36,23 @@ #include "src/Geometry/OrthoMethods.h" #include "src/Geometry/EulerAngles.h" -#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - #include "src/Geometry/Homogeneous.h" - #include "src/Geometry/RotationBase.h" - #include "src/Geometry/Rotation2D.h" - #include "src/Geometry/Quaternion.h" - #include "src/Geometry/AngleAxis.h" - #include "src/Geometry/Transform.h" - #include "src/Geometry/Translation.h" - #include "src/Geometry/Scaling.h" - #include "src/Geometry/Hyperplane.h" - #include "src/Geometry/ParametrizedLine.h" - #include "src/Geometry/AlignedBox.h" - #include "src/Geometry/Umeyama.h" - - #if defined EIGEN_VECTORIZE_SSE - #include "src/Geometry/arch/Geometry_SSE.h" - #endif -#endif - -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/Geometry/All.h" +#include "src/Geometry/Homogeneous.h" +#include "src/Geometry/RotationBase.h" +#include "src/Geometry/Rotation2D.h" +#include "src/Geometry/Quaternion.h" +#include "src/Geometry/AngleAxis.h" +#include "src/Geometry/Transform.h" +#include "src/Geometry/Translation.h" +#include "src/Geometry/Scaling.h" +#include "src/Geometry/Hyperplane.h" +#include "src/Geometry/ParametrizedLine.h" +#include "src/Geometry/AlignedBox.h" +#include "src/Geometry/Umeyama.h" + +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX +#include "src/Geometry/arch/Geometry_SSE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/Householder b/nuparu/include/Eigen/Householder index 6e348db5..89cd81b1 100644 --- a/nuparu/include/Eigen/Householder +++ b/nuparu/include/Eigen/Householder @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_HOUSEHOLDER_MODULE_H #define EIGEN_HOUSEHOLDER_MODULE_H diff --git a/nuparu/include/Eigen/IterativeLinearSolvers b/nuparu/include/Eigen/IterativeLinearSolvers index 0f4159dc..957d5750 100644 --- a/nuparu/include/Eigen/IterativeLinearSolvers +++ b/nuparu/include/Eigen/IterativeLinearSolvers @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H #define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H @@ -12,28 +19,29 @@ * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. * Those solvers are accessible via the following classes: * - ConjugateGradient for selfadjoint (hermitian) matrices, + * - LeastSquaresConjugateGradient for rectangular least-square problems, * - BiCGSTAB for general square matrices. * * These iterative solvers are associated with some preconditioners: * - IdentityPreconditioner - not really useful - * - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices. - * - IncompleteILUT - incomplete LU factorization with dual thresholding + * - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices. + * - IncompleteLUT - incomplete LU factorization with dual thresholding * * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. * - * \code - * #include - * \endcode + \code + #include + \endcode */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - +#include "src/IterativeLinearSolvers/SolveWithGuess.h" #include "src/IterativeLinearSolvers/IterativeSolverBase.h" #include "src/IterativeLinearSolvers/BasicPreconditioners.h" #include "src/IterativeLinearSolvers/ConjugateGradient.h" +#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" +#include "src/IterativeLinearSolvers/IncompleteCholesky.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/Jacobi b/nuparu/include/Eigen/Jacobi index ba8a4dc3..17c1d785 100644 --- a/nuparu/include/Eigen/Jacobi +++ b/nuparu/include/Eigen/Jacobi @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_JACOBI_MODULE_H #define EIGEN_JACOBI_MODULE_H diff --git a/nuparu/include/Eigen/LU b/nuparu/include/Eigen/LU index db579550..2d70c92d 100644 --- a/nuparu/include/Eigen/LU +++ b/nuparu/include/Eigen/LU @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_LU_MODULE_H #define EIGEN_LU_MODULE_H @@ -16,7 +23,6 @@ * \endcode */ -#include "src/misc/Solve.h" #include "src/misc/Kernel.h" #include "src/misc/Image.h" #include "src/LU/FullPivLU.h" @@ -25,16 +31,14 @@ #include "src/LU/PartialPivLU_MKL.h" #endif #include "src/LU/Determinant.h" -#include "src/LU/Inverse.h" +#include "src/LU/InverseImpl.h" -#if defined EIGEN_VECTORIZE_SSE +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX #include "src/LU/arch/Inverse_SSE.h" #endif -#ifdef EIGEN2_SUPPORT - #include "src/Eigen2Support/LU.h" -#endif - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_LU_MODULE_H diff --git a/nuparu/include/Eigen/LeastSquares b/nuparu/include/Eigen/LeastSquares deleted file mode 100644 index 35137c25..00000000 --- a/nuparu/include/Eigen/LeastSquares +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef EIGEN_REGRESSION_MODULE_H -#define EIGEN_REGRESSION_MODULE_H - -#ifndef EIGEN2_SUPPORT -#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT) -#endif - -// exclude from normal eigen3-only documentation -#ifdef EIGEN2_SUPPORT - -#include "Core" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include "Eigenvalues" -#include "Geometry" - -/** \defgroup LeastSquares_Module LeastSquares module - * This module provides linear regression and related features. - * - * \code - * #include - * \endcode - */ - -#include "src/Eigen2Support/LeastSquares.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN2_SUPPORT - -#endif // EIGEN_REGRESSION_MODULE_H diff --git a/nuparu/include/Eigen/MetisSupport b/nuparu/include/Eigen/MetisSupport index 6a113f7a..85c41bf3 100644 --- a/nuparu/include/Eigen/MetisSupport +++ b/nuparu/include/Eigen/MetisSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_METISSUPPORT_MODULE_H #define EIGEN_METISSUPPORT_MODULE_H diff --git a/nuparu/include/Eigen/OrderingMethods b/nuparu/include/Eigen/OrderingMethods index 7c0f1fff..d8ea3619 100644 --- a/nuparu/include/Eigen/OrderingMethods +++ b/nuparu/include/Eigen/OrderingMethods @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ORDERINGMETHODS_MODULE_H #define EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/nuparu/include/Eigen/PaStiXSupport b/nuparu/include/Eigen/PaStiXSupport index 7c616ee5..3411dfac 100644 --- a/nuparu/include/Eigen/PaStiXSupport +++ b/nuparu/include/Eigen/PaStiXSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PASTIXSUPPORT_MODULE_H #define EIGEN_PASTIXSUPPORT_MODULE_H @@ -35,12 +42,8 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/PaStiXSupport/PaStiXSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/nuparu/include/Eigen/PardisoSupport b/nuparu/include/Eigen/PardisoSupport old mode 100644 new mode 100755 index 99330ce7..340edf51 --- a/nuparu/include/Eigen/PardisoSupport +++ b/nuparu/include/Eigen/PardisoSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PARDISOSUPPORT_MODULE_H #define EIGEN_PARDISOSUPPORT_MODULE_H @@ -7,8 +14,6 @@ #include -#include - /** \ingroup Support_modules * \defgroup PardisoSupport_Module PardisoSupport module * diff --git a/nuparu/include/Eigen/QR b/nuparu/include/Eigen/QR index ac5b0269..f74f365f 100644 --- a/nuparu/include/Eigen/QR +++ b/nuparu/include/Eigen/QR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_QR_MODULE_H #define EIGEN_QR_MODULE_H @@ -15,14 +22,15 @@ * * This module provides various QR decompositions * This module also provides some MatrixBase methods, including: - * - MatrixBase::qr(), + * - MatrixBase::householderQr() + * - MatrixBase::colPivHouseholderQr() + * - MatrixBase::fullPivHouseholderQr() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" #include "src/QR/HouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h" @@ -31,15 +39,7 @@ #include "src/QR/ColPivHouseholderQR_MKL.h" #endif -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/QR.h" -#endif - #include "src/Core/util/ReenableStupidWarnings.h" -#ifdef EIGEN2_SUPPORT -#include "Eigenvalues" -#endif - #endif // EIGEN_QR_MODULE_H /* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/nuparu/include/Eigen/QtAlignedMalloc b/nuparu/include/Eigen/QtAlignedMalloc index 46f7d83b..4044d5ac 100644 --- a/nuparu/include/Eigen/QtAlignedMalloc +++ b/nuparu/include/Eigen/QtAlignedMalloc @@ -1,3 +1,9 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_QTMALLOC_MODULE_H #define EIGEN_QTMALLOC_MODULE_H diff --git a/nuparu/include/Eigen/SPQRSupport b/nuparu/include/Eigen/SPQRSupport index 77016442..f9489dcd 100644 --- a/nuparu/include/Eigen/SPQRSupport +++ b/nuparu/include/Eigen/SPQRSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPQRSUPPORT_MODULE_H #define EIGEN_SPQRSUPPORT_MODULE_H @@ -21,8 +28,6 @@ * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" #include "src/CholmodSupport/CholmodSupport.h" #include "src/SPQRSupport/SuiteSparseQRSupport.h" diff --git a/nuparu/include/Eigen/SVD b/nuparu/include/Eigen/SVD index fd310017..b353f3f5 100644 --- a/nuparu/include/Eigen/SVD +++ b/nuparu/include/Eigen/SVD @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SVD_MODULE_H #define EIGEN_SVD_MODULE_H @@ -12,24 +19,25 @@ * * * This module provides SVD decomposition for matrices (both real and complex). - * This decomposition is accessible via the following MatrixBase method: + * Two decomposition algorithms are provided: + * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones. + * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems. + * These decompositions are accessible via the respective classes and following MatrixBase methods: * - MatrixBase::jacobiSvd() + * - MatrixBase::bdcSvd() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" +#include "src/SVD/UpperBidiagonalization.h" +#include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" +#include "src/SVD/BDCSVD.h" #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) #include "src/SVD/JacobiSVD_MKL.h" #endif -#include "src/SVD/UpperBidiagonalization.h" - -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/SVD.h" -#endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/Sparse b/nuparu/include/Eigen/Sparse index 7cc9c091..a2ef7a66 100644 --- a/nuparu/include/Eigen/Sparse +++ b/nuparu/include/Eigen/Sparse @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSE_MODULE_H #define EIGEN_SPARSE_MODULE_H @@ -11,9 +18,9 @@ * - \ref SparseQR_Module * - \ref IterativeLinearSolvers_Module * - * \code - * #include - * \endcode + \code + #include + \endcode */ #include "SparseCore" diff --git a/nuparu/include/Eigen/SparseCholesky b/nuparu/include/Eigen/SparseCholesky index 9f5056aa..b6a320c4 100644 --- a/nuparu/include/Eigen/SparseCholesky +++ b/nuparu/include/Eigen/SparseCholesky @@ -34,8 +34,6 @@ #error The SparseCholesky module has nothing to offer in MPL2 only mode #endif -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" #include "src/SparseCholesky/SimplicialCholesky.h" #ifndef EIGEN_MPL2_ONLY diff --git a/nuparu/include/Eigen/SparseCore b/nuparu/include/Eigen/SparseCore index 9b5be5e1..76966c4c 100644 --- a/nuparu/include/Eigen/SparseCore +++ b/nuparu/include/Eigen/SparseCore @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSECORE_MODULE_H #define EIGEN_SPARSECORE_MODULE_H @@ -14,7 +21,7 @@ /** * \defgroup SparseCore_Module SparseCore module * - * This module provides a sparse matrix representation, and basic associatd matrix manipulations + * This module provides a sparse matrix representation, and basic associated matrix manipulations * and operations. * * See the \ref TutorialSparse "Sparse tutorial" @@ -26,37 +33,35 @@ * This module depends on: Core. */ -namespace Eigen { - -/** The type used to identify a general sparse storage. */ -struct Sparse {}; - -} - #include "src/SparseCore/SparseUtil.h" #include "src/SparseCore/SparseMatrixBase.h" +#include "src/SparseCore/SparseAssign.h" #include "src/SparseCore/CompressedStorage.h" #include "src/SparseCore/AmbiVector.h" +#include "src/SparseCore/SparseCompressedBase.h" #include "src/SparseCore/SparseMatrix.h" +#include "src/SparseCore/SparseMap.h" #include "src/SparseCore/MappedSparseMatrix.h" #include "src/SparseCore/SparseVector.h" -#include "src/SparseCore/SparseBlock.h" -#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseRef.h" #include "src/SparseCore/SparseCwiseUnaryOp.h" #include "src/SparseCore/SparseCwiseBinaryOp.h" +#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseBlock.h" #include "src/SparseCore/SparseDot.h" -#include "src/SparseCore/SparsePermutation.h" #include "src/SparseCore/SparseRedux.h" -#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparseDiagonalProduct.h" #include "src/SparseCore/ConservativeSparseSparseProduct.h" #include "src/SparseCore/SparseSparseProductWithPruning.h" #include "src/SparseCore/SparseProduct.h" #include "src/SparseCore/SparseDenseProduct.h" -#include "src/SparseCore/SparseDiagonalProduct.h" -#include "src/SparseCore/SparseTriangularView.h" #include "src/SparseCore/SparseSelfAdjointView.h" +#include "src/SparseCore/SparseTriangularView.h" #include "src/SparseCore/TriangularSolver.h" -#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparsePermutation.h" +#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseSolverBase.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/SparseLU b/nuparu/include/Eigen/SparseLU index 8527a49b..38b38b53 100644 --- a/nuparu/include/Eigen/SparseLU +++ b/nuparu/include/Eigen/SparseLU @@ -20,9 +20,6 @@ * Please, see the documentation of the SparseLU class for more details. */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - // Ordering interface #include "OrderingMethods" diff --git a/nuparu/include/Eigen/SparseQR b/nuparu/include/Eigen/SparseQR index 4ee42065..a6f3b7f7 100644 --- a/nuparu/include/Eigen/SparseQR +++ b/nuparu/include/Eigen/SparseQR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSEQR_MODULE_H #define EIGEN_SPARSEQR_MODULE_H @@ -21,9 +28,6 @@ * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "OrderingMethods" #include "src/SparseCore/SparseColEtree.h" #include "src/SparseQR/SparseQR.h" diff --git a/nuparu/include/Eigen/StdDeque b/nuparu/include/Eigen/StdDeque index f2723477..be3a7f82 100644 --- a/nuparu/include/Eigen/StdDeque +++ b/nuparu/include/Eigen/StdDeque @@ -14,7 +14,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) diff --git a/nuparu/include/Eigen/StdList b/nuparu/include/Eigen/StdList index 225c1e18..07ba1297 100644 --- a/nuparu/include/Eigen/StdList +++ b/nuparu/include/Eigen/StdList @@ -13,7 +13,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) diff --git a/nuparu/include/Eigen/StdVector b/nuparu/include/Eigen/StdVector index 6b22627f..fdfc3776 100644 --- a/nuparu/include/Eigen/StdVector +++ b/nuparu/include/Eigen/StdVector @@ -14,7 +14,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ #define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) diff --git a/nuparu/include/Eigen/SuperLUSupport b/nuparu/include/Eigen/SuperLUSupport index 575e14fb..113f58ee 100644 --- a/nuparu/include/Eigen/SuperLUSupport +++ b/nuparu/include/Eigen/SuperLUSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SUPERLUSUPPORT_MODULE_H #define EIGEN_SUPERLUSUPPORT_MODULE_H @@ -36,6 +43,8 @@ namespace Eigen { struct SluMatrix; } * - class SuperLU: a supernodal sequential LU factorization. * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * + * \warning This wrapper is only for the 4.x versions of SuperLU. The 3.x and 5.x versions are not supported. + * * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * * \code @@ -48,12 +57,8 @@ namespace Eigen { struct SluMatrix; } * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/SuperLUSupport/SuperLUSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/nuparu/include/Eigen/UmfPackSupport b/nuparu/include/Eigen/UmfPackSupport index 984f64a8..4a9f46a1 100644 --- a/nuparu/include/Eigen/UmfPackSupport +++ b/nuparu/include/Eigen/UmfPackSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_UMFPACKSUPPORT_MODULE_H #define EIGEN_UMFPACKSUPPORT_MODULE_H @@ -26,9 +33,6 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/UmfPackSupport/UmfPackSupport.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/nuparu/include/Eigen/src/Cholesky/LDLT.h b/nuparu/include/Eigen/src/Cholesky/LDLT.h index d19cb396..6fcae01f 100644 --- a/nuparu/include/Eigen/src/Cholesky/LDLT.h +++ b/nuparu/include/Eigen/src/Cholesky/LDLT.h @@ -16,7 +16,10 @@ namespace Eigen { namespace internal { -template struct LDLT_Traits; + template struct LDLT_Traits; + + // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef + enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite }; } /** \ingroup Cholesky_Module @@ -40,7 +43,7 @@ template struct LDLT_Traits; * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky * decomposition to determine whether a system of equations has a solution. * - * \sa MatrixBase::ldlt(), class LLT + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT */ template class LDLT { @@ -56,7 +59,8 @@ template class LDLT }; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; - typedef typename MatrixType::Index Index; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; typedef Matrix TmpMatrixType; typedef Transpositions TranspositionType; @@ -69,7 +73,12 @@ template class LDLT * The default constructor is useful in cases in which the user intends to * perform decompositions via LDLT::compute(const MatrixType&). */ - LDLT() : m_matrix(), m_transpositions(), m_isInitialized(false) {} + LDLT() + : m_matrix(), + m_transpositions(), + m_sign(internal::ZeroSign), + m_isInitialized(false) + {} /** \brief Default Constructor with memory preallocation * @@ -77,10 +86,11 @@ template class LDLT * according to the specified problem \a size. * \sa LDLT() */ - LDLT(Index size) + explicit LDLT(Index size) : m_matrix(size, size), m_transpositions(size), m_temporary(size), + m_sign(internal::ZeroSign), m_isInitialized(false) {} @@ -89,13 +99,15 @@ template class LDLT * This calculates the decomposition for the input \a matrix. * \sa LDLT(Index size) */ - LDLT(const MatrixType& matrix) + template + explicit LDLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_transpositions(matrix.rows()), m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), m_isInitialized(false) { - compute(matrix); + compute(matrix.derived()); } /** Clear any existing decomposition @@ -139,21 +151,14 @@ template class LDLT inline bool isPositive() const { eigen_assert(m_isInitialized && "LDLT is not initialized."); - return m_sign == 1; - } - - #ifdef EIGEN2_SUPPORT - inline bool isPositiveDefinite() const - { - return isPositive(); + return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign; } - #endif /** \returns true if the matrix is negative (semidefinite) */ inline bool isNegative(void) const { eigen_assert(m_isInitialized && "LDLT is not initialized."); - return m_sign == -1; + return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign; } /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A. @@ -169,31 +174,23 @@ template class LDLT * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular. * - * \sa MatrixBase::ldlt() + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() */ template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LDLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); } - #ifdef EIGEN2_SUPPORT - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = this->solve(b); - return true; - } - #endif - template bool solveInPlace(MatrixBase &bAndX) const; - LDLT& compute(const MatrixType& matrix); + template + LDLT& compute(const EigenBase& matrix); template LDLT& rankUpdate(const MatrixBase& w, const RealScalar& alpha=1); @@ -223,8 +220,19 @@ template class LDLT eigen_assert(m_isInitialized && "LDLT is not initialized."); return Success; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } /** \internal * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. @@ -235,7 +243,7 @@ template class LDLT MatrixType m_matrix; TranspositionType m_transpositions; TmpMatrixType m_temporary; - int m_sign; + internal::SignMatrix m_sign; bool m_isInitialized; }; @@ -246,49 +254,32 @@ template struct ldlt_inplace; template<> struct ldlt_inplace { template - static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0) + static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) { using std::abs; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; + typedef typename TranspositionType::StorageIndex IndexType; eigen_assert(mat.rows()==mat.cols()); const Index size = mat.rows(); if (size <= 1) { transpositions.setIdentity(); - if(sign) - *sign = numext::real(mat.coeff(0,0))>0 ? 1:-1; + if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef; + else sign = ZeroSign; return true; } - RealScalar cutoff(0), biggest_in_corner; - for (Index k = 0; k < size; ++k) { // Find largest diagonal element Index index_of_biggest_in_corner; - biggest_in_corner = mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); + mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); index_of_biggest_in_corner += k; - if(k == 0) - { - // The biggest overall is the point of reference to which further diagonals - // are compared; if any diagonal is negligible compared - // to the largest overall, the algorithm bails. - cutoff = abs(NumTraits::epsilon() * biggest_in_corner); - } - - // Finish early if the matrix is not full rank. - if(biggest_in_corner < cutoff) - { - for(Index i = k; i < size; i++) transpositions.coeffRef(i) = i; - if(sign) *sign = 0; - break; - } - - transpositions.coeffRef(k) = index_of_biggest_in_corner; + transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner); if(k != index_of_biggest_in_corner) { // apply the transposition while taking care to consider only @@ -297,7 +288,7 @@ template<> struct ldlt_inplace mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k)); mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s)); std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner)); - for(int i=k+1;i struct ldlt_inplace if(k>0) { - temp.head(k) = mat.diagonal().head(k).asDiagonal() * A10.adjoint(); + temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint(); mat.coeffRef(k,k) -= (A10 * temp.head(k)).value(); if(rs>0) A21.noalias() -= A20 * temp.head(k); } - if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff)) - A21 /= mat.coeffRef(k,k); - if(sign) - { - // LDLT is not guaranteed to work for indefinite matrices, but let's try to get the sign right - int newSign = numext::real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0; - if(k == 0) - *sign = newSign; - else if(*sign != newSign) - *sign = 0; + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot + // was smaller than the cutoff value. However, since LDLT is not rank-revealing + // we should only make sure that we do not introduce INF or NaN values. + // Remark that LAPACK also uses 0 as the cutoff value. + RealScalar realAkk = numext::real(mat.coeffRef(k,k)); + if((rs>0) && (abs(realAkk) > RealScalar(0))) + A21 /= realAkk; + + if (sign == PositiveSemiDef) { + if (realAkk < 0) sign = Indefinite; + } else if (sign == NegativeSemiDef) { + if (realAkk > 0) sign = Indefinite; + } else if (sign == ZeroSign) { + if (realAkk > 0) sign = PositiveSemiDef; + else if (realAkk < 0) sign = NegativeSemiDef; } } @@ -353,7 +349,6 @@ template<> struct ldlt_inplace using numext::isfinite; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; const Index size = mat.rows(); eigen_assert(mat.cols() == size && w.size()==size); @@ -399,7 +394,7 @@ template<> struct ldlt_inplace template<> struct ldlt_inplace { template - static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0) + static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) { Transpose matt(mat); return ldlt_inplace::unblocked(matt, transpositions, temp, sign); @@ -417,16 +412,16 @@ template struct LDLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } }; template struct LDLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } }; } // end namespace internal @@ -434,18 +429,22 @@ template struct LDLT_Traits /** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix */ template -LDLT& LDLT::compute(const MatrixType& a) +template +LDLT& LDLT::compute(const EigenBase& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); - m_matrix = a; + m_matrix = a.derived(); m_transpositions.resize(size); m_isInitialized = false; m_temporary.resize(size); + m_sign = internal::ZeroSign; - internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign); + internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign); m_isInitialized = true; return *this; @@ -458,8 +457,9 @@ LDLT& LDLT::compute(const MatrixType& a) */ template template -LDLT& LDLT::rankUpdate(const MatrixBase& w, const typename NumTraits::Real& sigma) +LDLT& LDLT::rankUpdate(const MatrixBase& w, const typename LDLT::RealScalar& sigma) { + typedef typename TranspositionType::StorageIndex IndexType; const Index size = w.rows(); if (m_isInitialized) { @@ -471,9 +471,9 @@ LDLT& LDLT::rankUpdate(const MatrixBase=0 ? 1 : -1; + m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef; m_isInitialized = true; } @@ -482,48 +482,45 @@ LDLT& LDLT::rankUpdate(const MatrixBase -struct solve_retval, Rhs> - : solve_retval_base, Rhs> +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LDLT<_MatrixType,_UpLo> LDLTType; - EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs) - - template void evalTo(Dest& dst) const + eigen_assert(rhs.rows() == rows()); + // dst = P b + dst = m_transpositions * rhs; + + // dst = L^-1 (P b) + matrixL().solveInPlace(dst); + + // dst = D^-1 (L^-1 P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + const typename Diagonal::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and leads to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits::highest(); + + for (Index i = 0; i < vecD.size(); ++i) { - eigen_assert(rhs().rows() == dec().matrixLDLT().rows()); - // dst = P b - dst = dec().transpositionsP() * rhs(); - - // dst = L^-1 (P b) - dec().matrixL().solveInPlace(dst); - - // dst = D^-1 (L^-1 P b) - // more precisely, use pseudo-inverse of D (see bug 241) - using std::abs; - using std::max; - typedef typename LDLTType::MatrixType MatrixType; - typedef typename LDLTType::Scalar Scalar; - typedef typename LDLTType::RealScalar RealScalar; - const Diagonal vectorD = dec().vectorD(); - RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() * NumTraits::epsilon(), - RealScalar(1) / NumTraits::highest()); // motivated by LAPACK's xGELSS - for (Index i = 0; i < vectorD.size(); ++i) { - if(abs(vectorD(i)) > tolerance) - dst.row(i) /= vectorD(i); - else - dst.row(i).setZero(); - } + if(abs(vecD(i)) > tolerance) + dst.row(i) /= vecD(i); + else + dst.row(i).setZero(); + } - // dst = L^-T (D^-1 L^-1 P b) - dec().matrixU().solveInPlace(dst); + // dst = L^-T (D^-1 L^-1 P b) + matrixU().solveInPlace(dst); - // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b - dst = dec().transpositionsP().transpose() * dst; - } -}; + // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b + dst = m_transpositions.transpose() * dst; } +#endif /** \internal use x = ldlt_object.solve(x); * @@ -566,7 +563,7 @@ MatrixType LDLT::reconstructedMatrix() const // L^* P res = matrixU() * res; // D(L^*P) - res = vectorD().asDiagonal() * res; + res = vectorD().real().asDiagonal() * res; // L(DL^*P) res = matrixL() * res; // P^T (LDL^*P) @@ -575,8 +572,10 @@ MatrixType LDLT::reconstructedMatrix() const return res; } +#ifndef __CUDACC__ /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa MatrixBase::ldlt() */ template inline const LDLT::PlainObject, UpLo> @@ -587,6 +586,7 @@ SelfAdjointView::ldlt() const /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa SelfAdjointView::ldlt() */ template inline const LDLT::PlainObject> @@ -594,6 +594,7 @@ MatrixBase::ldlt() const { return LDLT(derived()); } +#endif // __CUDACC__ } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Cholesky/LLT.h b/nuparu/include/Eigen/src/Cholesky/LLT.h index 2e6189f7..1f0091f3 100644 --- a/nuparu/include/Eigen/src/Cholesky/LLT.h +++ b/nuparu/include/Eigen/src/Cholesky/LLT.h @@ -41,7 +41,7 @@ template struct LLT_Traits; * Example: \include LLT_example.cpp * Output: \verbinclude LLT_example.out * - * \sa MatrixBase::llt(), class LDLT + * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT */ /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) * Note that during the decomposition, only the upper triangular part of A is considered. Therefore, @@ -59,7 +59,8 @@ template class LLT }; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; - typedef typename MatrixType::Index Index; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; enum { PacketSize = internal::packet_traits::size, @@ -83,14 +84,15 @@ template class LLT * according to the specified problem \a size. * \sa LLT() */ - LLT(Index size) : m_matrix(size, size), + explicit LLT(Index size) : m_matrix(size, size), m_isInitialized(false) {} - LLT(const MatrixType& matrix) + template + explicit LLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_isInitialized(false) { - compute(matrix); + compute(matrix.derived()); } /** \returns a view of the upper triangular matrix U */ @@ -115,33 +117,23 @@ template class LLT * Example: \include LLT_solve.cpp * Output: \verbinclude LLT_solve.out * - * \sa solveInPlace(), MatrixBase::llt() + * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() */ template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); } - #ifdef EIGEN2_SUPPORT - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = this->solve(b); - return true; - } - - bool isPositiveDefinite() const { return true; } - #endif - template void solveInPlace(MatrixBase &bAndX) const; - LLT& compute(const MatrixType& matrix); + template + LLT& compute(const EigenBase& matrix); /** \returns the LLT decomposition matrix * @@ -172,8 +164,20 @@ template class LLT template LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + /** \internal * Used to compute and store L * The strict upper part is not used and even not initialized. @@ -188,12 +192,11 @@ namespace internal { template struct llt_inplace; template -static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) +static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) { using std::sqrt; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; typedef typename MatrixType::ColXpr ColXpr; typedef typename internal::remove_all::type ColXprCleaned; typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; @@ -262,10 +265,9 @@ template struct llt_inplace { typedef typename NumTraits::Real RealScalar; template - static typename MatrixType::Index unblocked(MatrixType& mat) + static Index unblocked(MatrixType& mat) { using std::sqrt; - typedef typename MatrixType::Index Index; eigen_assert(mat.rows()==mat.cols()); const Index size = mat.rows(); @@ -283,15 +285,14 @@ template struct llt_inplace return k; mat.coeffRef(k,k) = x = sqrt(x); if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint(); - if (rs>0) A21 *= RealScalar(1)/x; + if (rs>0) A21 /= x; } return -1; } template - static typename MatrixType::Index blocked(MatrixType& m) + static Index blocked(MatrixType& m) { - typedef typename MatrixType::Index Index; eigen_assert(m.rows()==m.cols()); Index size = m.rows(); if(size<32) @@ -322,7 +323,7 @@ template struct llt_inplace } template - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } @@ -333,19 +334,19 @@ template struct llt_inplace typedef typename NumTraits::Real RealScalar; template - static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat) + static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat) { Transpose matt(mat); return llt_inplace::unblocked(matt); } template - static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat) + static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat) { Transpose matt(mat); return llt_inplace::blocked(matt); } template - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) { Transpose matt(mat); return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); @@ -356,8 +357,8 @@ template struct LLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace::blocked(m)==-1; } }; @@ -366,8 +367,8 @@ template struct LLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace::blocked(m)==-1; } }; @@ -382,12 +383,15 @@ template struct LLT_Traits * Output: \verbinclude TutorialLinAlgComputeTwice.out */ template -LLT& LLT::compute(const MatrixType& a) +template +LLT& LLT::compute(const EigenBase& a) { + check_template_parameters(); + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); m_matrix.resize(size, size); - m_matrix = a; + m_matrix = a.derived(); m_isInitialized = true; bool ok = Traits::inplace_decomposition(m_matrix); @@ -415,22 +419,16 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c return *this; } - -namespace internal { -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LLT<_MatrixType,UpLo> LLTType; - EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs) - - template void evalTo(Dest& dst) const - { - dst = rhs(); - dec().solveInPlace(dst); - } -}; + dst = rhs; + solveInPlace(dst); } +#endif /** \internal use x = llt_object.solve(x); * @@ -465,8 +463,10 @@ MatrixType LLT::reconstructedMatrix() const return matrixL() * matrixL().adjoint().toDenseMatrix(); } +#ifndef __CUDACC__ /** \cholesky_module * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() */ template inline const LLT::PlainObject> @@ -477,6 +477,7 @@ MatrixBase::llt() const /** \cholesky_module * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() */ template inline const LLT::PlainObject, UpLo> @@ -484,7 +485,8 @@ SelfAdjointView::llt() const { return LLT(m_matrix); } - +#endif // __CUDACC__ + } // end namespace Eigen #endif // EIGEN_LLT_H diff --git a/nuparu/include/Eigen/src/Cholesky/LLT_MKL.h b/nuparu/include/Eigen/src/Cholesky/LLT_MKL.h index 64daa445..0d42cb5b 100644 --- a/nuparu/include/Eigen/src/Cholesky/LLT_MKL.h +++ b/nuparu/include/Eigen/src/Cholesky/LLT_MKL.h @@ -46,7 +46,7 @@ template struct mkl_llt; template<> struct mkl_llt \ { \ template \ - static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \ + static inline Index potrf(MatrixType& m, char uplo) \ { \ lapack_int matrix_order; \ lapack_int size, lda, info, StorageOrder; \ @@ -60,30 +60,30 @@ template<> struct mkl_llt \ lda = m.outerStride(); \ \ info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ - info = (info==0) ? Success : NumericalIssue; \ + info = (info==0) ? -1 : info>0 ? info-1 : size; \ return info; \ } \ }; \ template<> struct llt_inplace \ { \ template \ - static typename MatrixType::Index blocked(MatrixType& m) \ + static Index blocked(MatrixType& m) \ { \ return mkl_llt::potrf(m, 'L'); \ } \ template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \ }; \ template<> struct llt_inplace \ { \ template \ - static typename MatrixType::Index blocked(MatrixType& m) \ + static Index blocked(MatrixType& m) \ { \ return mkl_llt::potrf(m, 'U'); \ } \ template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ { \ Transpose matt(mat); \ return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); \ diff --git a/nuparu/include/Eigen/src/CholmodSupport/CholmodSupport.h b/nuparu/include/Eigen/src/CholmodSupport/CholmodSupport.h index 783324b0..06421d5e 100644 --- a/nuparu/include/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/nuparu/include/Eigen/src/CholmodSupport/CholmodSupport.h @@ -48,8 +48,8 @@ void cholmod_configure_matrix(CholmodType& mat) /** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. * Note that the data are shared. */ -template -cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) +template +cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat) { cholmod_sparse res; res.nzmax = mat.nonZeros(); @@ -58,10 +58,12 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) res.p = mat.outerIndexPtr(); res.i = mat.innerIndexPtr(); res.x = mat.valuePtr(); + res.z = 0; res.sorted = 1; if(mat.isCompressed()) { res.packed = 1; + res.nz = 0; } else { @@ -72,11 +74,11 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) res.dtype = 0; res.stype = -1; - if (internal::is_same<_Index,int>::value) + if (internal::is_same<_StorageIndex,int>::value) { res.itype = CHOLMOD_INT; } - else if (internal::is_same<_Index,UF_long>::value) + else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value) { res.itype = CHOLMOD_LONG; } @@ -103,7 +105,7 @@ const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. * The data are not copied but shared. */ template -cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) { cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived()); @@ -136,12 +138,12 @@ cholmod_dense viewAsCholmod(MatrixBase& mat) /** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. * The data are not copied but shared. */ -template -MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) +template +MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) { - return MappedSparseMatrix - (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], - static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); + return MappedSparseMatrix + (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], + static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); } enum CholmodMode { @@ -155,26 +157,35 @@ enum CholmodMode { * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT */ template -class CholmodBase : internal::noncopyable +class CholmodBase : public SparseSolverBase { + protected: + typedef SparseSolverBase Base; + using Base::derived; + using Base::m_isInitialized; public: typedef _MatrixType MatrixType; enum { UpLo = _UpLo }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; typedef MatrixType CholMatrixType; - typedef typename MatrixType::Index Index; + typedef typename MatrixType::StorageIndex StorageIndex; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: CholmodBase() - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + : m_cholmodFactor(0), m_info(Success) { + m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); } - CholmodBase(const MatrixType& matrix) - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + explicit CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success) { m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); cholmod_start(&m_cholmod); @@ -188,11 +199,8 @@ class CholmodBase : internal::noncopyable cholmod_finish(&m_cholmod); } - inline Index cols() const { return m_cholmodFactor->n; } - inline Index rows() const { return m_cholmodFactor->n; } - - Derived& derived() { return *static_cast(this); } - const Derived& derived() const { return *static_cast(this); } + inline StorageIndex cols() const { return internal::convert_index(m_cholmodFactor->n); } + inline StorageIndex rows() const { return internal::convert_index(m_cholmodFactor->n); } /** \brief Reports whether previous computation was successful. * @@ -213,35 +221,7 @@ class CholmodBase : internal::noncopyable return derived(); } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. + /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. * * This function is particularly useful when solving for several problems having the same structure. * @@ -265,7 +245,7 @@ class CholmodBase : internal::noncopyable /** Performs a numeric decomposition of \a matrix * - * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. + * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed. * * \sa analyzePattern() */ @@ -287,7 +267,7 @@ class CholmodBase : internal::noncopyable #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template - void _solve(const MatrixBase &b, MatrixBase &dest) const + void _solve_impl(const MatrixBase &b, MatrixBase &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -301,15 +281,16 @@ class CholmodBase : internal::noncopyable if(!x_cd) { this->m_info = NumericalIssue; + return; } - // TODO optimize this copy by swapping when possible (be carreful with alignment, etc.) + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); cholmod_free_dense(&x_cd, &m_cholmod); } /** \internal */ template - void _solve(const SparseMatrix &b, SparseMatrix &dest) const + void _solve_impl(const SparseMatrix &b, SparseMatrix &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -322,8 +303,9 @@ class CholmodBase : internal::noncopyable if(!x_cs) { this->m_info = NumericalIssue; + return; } - // TODO optimize this copy by swapping when possible (be carreful with alignment, etc.) + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) dest = viewAsEigen(*x_cs); cholmod_free_sparse(&x_cs, &m_cholmod); } @@ -354,7 +336,6 @@ class CholmodBase : internal::noncopyable cholmod_factor* m_cholmodFactor; RealScalar m_shiftOffset[2]; mutable ComputationInfo m_info; - bool m_isInitialized; int m_factorizationIsOk; int m_analysisIsOk; }; @@ -365,14 +346,16 @@ class CholmodBase : internal::noncopyable * * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization * using the Cholmod library. - * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Thefore, it has little practical interest. - * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices + * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices * X and B can be either dense or sparse. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT @@ -392,7 +375,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl CholmodSimplicialLLT(const MatrixType& matrix) : Base() { init(); - compute(matrix); + this->compute(matrix); } ~CholmodSimplicialLLT() {} @@ -412,14 +395,16 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl * * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization * using the Cholmod library. - * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Thefore, it has little practical interest. - * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices + * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices * X and B can be either dense or sparse. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT @@ -439,7 +424,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp CholmodSimplicialLDLT(const MatrixType& matrix) : Base() { init(); - compute(matrix); + this->compute(matrix); } ~CholmodSimplicialLDLT() {} @@ -458,13 +443,15 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization * using the Cholmod library. * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM. - * The sparse matrix A must be selfajoint and positive definite. The vectors or matrices + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices * X and B can be either dense or sparse. * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers @@ -484,7 +471,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper CholmodSupernodalLLT(const MatrixType& matrix) : Base() { init(); - compute(matrix); + this->compute(matrix); } ~CholmodSupernodalLLT() {} @@ -501,7 +488,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper * \brief A general Cholesky factorization and solver based on Cholmod * * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization - * using the Cholmod library. The sparse matrix A must be selfajoint and positive definite. The vectors or matrices + * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices * X and B can be either dense or sparse. * * This variant permits to change the underlying Cholesky method at runtime. @@ -512,6 +499,8 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * * \sa \ref TutorialSparseDirectSolvers @@ -531,7 +520,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom CholmodDecomposition(const MatrixType& matrix) : Base() { init(); - compute(matrix); + this->compute(matrix); } ~CholmodDecomposition() {} @@ -569,36 +558,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom } }; -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_CHOLMODSUPPORT_H diff --git a/nuparu/include/Eigen/src/Core/Array.h b/nuparu/include/Eigen/src/Core/Array.h index 497efff6..e38eda72 100644 --- a/nuparu/include/Eigen/src/Core/Array.h +++ b/nuparu/include/Eigen/src/Core/Array.h @@ -24,6 +24,9 @@ namespace Eigen { * API for the %Matrix class provides easy access to linear-algebra * operations. * + * See documentation of class Matrix for detailed information on the template parameters + * storage layout. + * * This class can be extended with the help of the plugin mechanism described on the page * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. * @@ -69,11 +72,27 @@ class Array * the usage of 'using'. This should be done only for operator=. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const EigenBase &other) { return Base::operator=(other); } + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() + */ + /* This overload is needed because the usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Scalar &value) + { + Base::setConstant(value); + return *this; + } + /** Copies the value of the expression \a other into \c *this with automatic resizing. * * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), @@ -84,7 +103,8 @@ class Array * remain row-vectors and vectors remain vectors. */ template - EIGEN_STRONG_INLINE Array& operator=(const ArrayBase& other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const DenseBase& other) { return Base::_set(other); } @@ -92,11 +112,12 @@ class Array /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const Array& other) { return Base::_set(other); } - + /** Default constructor. * * For fixed-size matrices, does nothing. @@ -107,6 +128,7 @@ class Array * * \sa resize(Index,Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array() : Base() { Base::_check_template_params(); @@ -116,6 +138,7 @@ class Array #ifndef EIGEN_PARSED_BY_DOXYGEN // FIXME is it still needed ?? /** \internal */ + EIGEN_DEVICE_FUNC Array(internal::constructor_without_unaligned_array_assert) : Base(internal::constructor_without_unaligned_array_assert()) { @@ -124,41 +147,64 @@ class Array } #endif - /** Constructs a vector or row-vector with given dimension. \only_for_vectors - * - * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, - * it is redundant to pass the dimension here, so it makes more sense to use the default - * constructor Matrix() instead. - */ - EIGEN_STRONG_INLINE explicit Array(Index dim) - : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim) +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Array(Array&& other) + : Base(std::move(other)) { Base::_check_template_params(); - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array) - eigen_assert(dim >= 0); - eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); } + EIGEN_DEVICE_FUNC + Array& operator=(Array&& other) + { + other.swap(*this); + return *this; + } +#endif #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(const T& x) + { + Base::_check_template_params(); + Base::template _init1(x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1) { Base::_check_template_params(); this->template _init2(val0, val1); } #else - /** constructs an uninitialized matrix with \a rows rows and \a cols columns. + /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC explicit Array(const Scalar *data); + /** Constructs a vector or row-vector with given dimension. \only_for_vectors * - * This is useful for dynamic-size matrices. For fixed-size matrices, + * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass the dimension here, so it makes more sense to use the default + * constructor Array() instead. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(Index dim); + /** constructs an initialized 1x1 Array with the given coefficient */ + Array(const Scalar& value); + /** constructs an uninitialized array with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size arrays. For fixed-size arrays, * it is redundant to pass these parameters, so one should use the default constructor - * Matrix() instead. */ + * Array() instead. */ Array(Index rows, Index cols); /** constructs an initialized 2D vector with given coefficients */ Array(const Scalar& val0, const Scalar& val1); #endif /** constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2) { Base::_check_template_params(); @@ -168,6 +214,7 @@ class Array m_storage.data()[2] = val2; } /** constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3) { Base::_check_template_params(); @@ -178,51 +225,21 @@ class Array m_storage.data()[3] = val3; } - explicit Array(const Scalar *data); - - /** Constructor copying the value of the expression \a other */ - template - EIGEN_STRONG_INLINE Array(const ArrayBase& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } /** Copy constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Array& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } - /** Copy constructor with in-place evaluation */ - template - EIGEN_STRONG_INLINE Array(const ReturnByValue& other) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - other.evalTo(*this); - } + : Base(other) + { } /** \sa MatrixBase::operator=(const EigenBase&) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const EigenBase &other) - : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - *this = other; - } - - /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. - */ - template - void swap(ArrayBase const & other) - { this->_swap(other.derived()); } + : Base(other.derived()) + { } - inline Index innerStride() const { return 1; } - inline Index outerStride() const { return this->innerSize(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } #ifdef EIGEN_ARRAY_PLUGIN #include EIGEN_ARRAY_PLUGIN diff --git a/nuparu/include/Eigen/src/Core/ArrayBase.h b/nuparu/include/Eigen/src/Core/ArrayBase.h index 38852600..b4c24a27 100644 --- a/nuparu/include/Eigen/src/Core/ArrayBase.h +++ b/nuparu/include/Eigen/src/Core/ArrayBase.h @@ -46,16 +46,14 @@ template class ArrayBase typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef DenseBase Base; + using Base::operator*; + using Base::operator/; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; @@ -64,8 +62,7 @@ template class ArrayBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -85,22 +82,10 @@ template class ArrayBase #endif // not EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily - * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const - * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either - * PlainObject or const PlainObject&. - */ - typedef Array::Scalar, - internal::traits::RowsAtCompileTime, - internal::traits::ColsAtCompileTime, - AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), - internal::traits::MaxRowsAtCompileTime, - internal::traits::MaxColsAtCompileTime - > PlainObject; - + typedef typename Base::PlainObject PlainObject; /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase @@ -118,40 +103,57 @@ template class ArrayBase /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC Derived& operator=(const ArrayBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } - - Derived& operator+=(const Scalar& scalar) - { return *this = derived() + scalar; } - Derived& operator-=(const Scalar& scalar) - { return *this = derived() - scalar; } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() */ + EIGEN_DEVICE_FUNC + Derived& operator=(const Scalar &value) + { Base::setConstant(value); return derived(); } + + EIGEN_DEVICE_FUNC + Derived& operator+=(const Scalar& scalar); + EIGEN_DEVICE_FUNC + Derived& operator-=(const Scalar& scalar); template + EIGEN_DEVICE_FUNC Derived& operator+=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC Derived& operator-=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC Derived& operator*=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC Derived& operator/=(const ArrayBase& other); public: + EIGEN_DEVICE_FUNC ArrayBase& array() { return *this; } + EIGEN_DEVICE_FUNC const ArrayBase& array() const { return *this; } /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array * \sa MatrixBase::array() */ - MatrixWrapper matrix() { return derived(); } - const MatrixWrapper matrix() const { return derived(); } + EIGEN_DEVICE_FUNC + MatrixWrapper matrix() { return MatrixWrapper(derived()); } + EIGEN_DEVICE_FUNC + const MatrixWrapper matrix() const { return MatrixWrapper(derived()); } // template // inline void evalTo(Dest& dst) const { dst = matrix(); } protected: + EIGEN_DEVICE_FUNC ArrayBase() : Base() {} private: @@ -176,8 +178,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -190,8 +191,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -204,8 +204,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator*=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::mul_assign_op()); return derived(); } @@ -218,8 +217,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator/=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::div_assign_op()); return derived(); } diff --git a/nuparu/include/Eigen/src/Core/ArrayWrapper.h b/nuparu/include/Eigen/src/Core/ArrayWrapper.h index a791bc35..4e484f29 100644 --- a/nuparu/include/Eigen/src/Core/ArrayWrapper.h +++ b/nuparu/include/Eigen/src/Core/ArrayWrapper.h @@ -29,6 +29,11 @@ struct traits > : public traits::type > { typedef ArrayXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + Flags = Flags0 & ~NestByRefBit + }; }; } @@ -39,6 +44,7 @@ class ArrayWrapper : public ArrayBase > typedef ArrayBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + typedef typename internal::remove_all::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue::value, @@ -46,43 +52,56 @@ class ArrayWrapper : public ArrayBase > const Scalar >::type ScalarWithConstIfNotLvalue; - typedef typename internal::nested::type NestedExpressionType; + typedef typename internal::ref_selector::type NestedExpressionType; - inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC + explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index rowId, Index colId) const { return m_expression.coeff(rowId, colId); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) { return m_expression.const_cast_derived().coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.const_cast_derived().coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_expression.const_cast_derived().coeffRef(index); @@ -113,9 +132,11 @@ class ArrayWrapper : public ArrayBase > } template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { dst = m_expression; } const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC nestedExpression() const { return m_expression; @@ -123,10 +144,12 @@ class ArrayWrapper : public ArrayBase > /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index,Index)*/ - void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } protected: NestedExpressionType m_expression; @@ -149,6 +172,11 @@ struct traits > : public traits::type > { typedef MatrixXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + Flags = Flags0 & ~NestByRefBit + }; }; } @@ -159,6 +187,7 @@ class MatrixWrapper : public MatrixBase > typedef MatrixBase > Base; EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + typedef typename internal::remove_all::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue::value, @@ -166,43 +195,56 @@ class MatrixWrapper : public MatrixBase > const Scalar >::type ScalarWithConstIfNotLvalue; - typedef typename internal::nested::type NestedExpressionType; + typedef typename internal::ref_selector::type NestedExpressionType; - inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} + EIGEN_DEVICE_FUNC + explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index rowId, Index colId) const { return m_expression.coeff(rowId, colId); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) { return m_expression.const_cast_derived().coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.derived().coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_expression.const_cast_derived().coeffRef(index); @@ -232,6 +274,7 @@ class MatrixWrapper : public MatrixBase > m_expression.const_cast_derived().template writePacket(index, val); } + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { @@ -240,10 +283,12 @@ class MatrixWrapper : public MatrixBase > /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index,Index)*/ - void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } protected: NestedExpressionType m_expression; diff --git a/nuparu/include/Eigen/src/Core/Assign.h b/nuparu/include/Eigen/src/Core/Assign.h index 1dccc2f4..53806ba3 100644 --- a/nuparu/include/Eigen/src/Core/Assign.h +++ b/nuparu/include/Eigen/src/Core/Assign.h @@ -14,471 +14,6 @@ namespace Eigen { -namespace internal { - -/*************************************************************************** -* Part 1 : the logic deciding a strategy for traversal and unrolling * -***************************************************************************/ - -template -struct assign_traits -{ -public: - enum { - DstIsAligned = Derived::Flags & AlignedBit, - DstHasDirectAccess = Derived::Flags & DirectAccessBit, - SrcIsAligned = OtherDerived::Flags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned - }; - -private: - enum { - InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) - : int(Derived::RowsAtCompileTime), - InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) - : int(Derived::MaxRowsAtCompileTime), - MaxSizeAtCompileTime = Derived::SizeAtCompileTime, - PacketSize = packet_traits::size - }; - - enum { - StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), - MightVectorize = StorageOrdersAgree - && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 - && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), - MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess - && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), - /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, - so it's only good for large enough sizes. */ - MaySliceVectorize = MightVectorize && DstHasDirectAccess - && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) - /* slice vectorization can be slow, so we only want it if the slices are big, which is - indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix */ - }; - -public: - enum { - Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) - : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) - : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) - : int(MayLinearize) ? int(LinearTraversal) - : int(DefaultTraversal), - Vectorized = int(Traversal) == InnerVectorizedTraversal - || int(Traversal) == LinearVectorizedTraversal - || int(Traversal) == SliceVectorizedTraversal - }; - -private: - enum { - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), - MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), - MayUnrollInner = int(InnerSize) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) - }; - -public: - enum { - Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) - ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) - : int(MayUnrollInner) ? int(InnerUnrolling) - : int(NoUnrolling) - ) - : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(Traversal) == int(LinearTraversal) - ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(NoUnrolling) - }; - -#ifdef EIGEN_DEBUG_ASSIGN - static void debug() - { - EIGEN_DEBUG_VAR(DstIsAligned) - EIGEN_DEBUG_VAR(SrcIsAligned) - EIGEN_DEBUG_VAR(JointAlignment) - EIGEN_DEBUG_VAR(InnerSize) - EIGEN_DEBUG_VAR(InnerMaxSize) - EIGEN_DEBUG_VAR(PacketSize) - EIGEN_DEBUG_VAR(StorageOrdersAgree) - EIGEN_DEBUG_VAR(MightVectorize) - EIGEN_DEBUG_VAR(MayLinearize) - EIGEN_DEBUG_VAR(MayInnerVectorize) - EIGEN_DEBUG_VAR(MayLinearVectorize) - EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) - EIGEN_DEBUG_VAR(UnrollingLimit) - EIGEN_DEBUG_VAR(MayUnrollCompletely) - EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) - } -#endif -}; - -/*************************************************************************** -* Part 2 : meta-unrollers -***************************************************************************/ - -/************************ -*** Default traversal *** -************************/ - -template -struct assign_DefaultTraversal_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime - }; - - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeffByOuterInner(outer, inner, src); - assign_DefaultTraversal_CompleteUnrolling::run(dst, src); - } -}; - -template -struct assign_DefaultTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template -struct assign_DefaultTraversal_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.copyCoeffByOuterInner(outer, Index, src); - assign_DefaultTraversal_InnerUnrolling::run(dst, src, outer); - } -}; - -template -struct assign_DefaultTraversal_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template -struct assign_LinearTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeff(Index, src); - assign_LinearTraversal_CompleteUnrolling::run(dst, src); - } -}; - -template -struct assign_LinearTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template -struct assign_innervec_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime, - JointAlignment = assign_traits::JointAlignment - }; - - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.template copyPacketByOuterInner(outer, inner, src); - assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); - } -}; - -template -struct assign_innervec_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.template copyPacketByOuterInner(outer, Index, src); - assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, outer); - } -}; - -template -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template::Traversal, - int Unrolling = assign_traits::Unrolling, - int Version = Specialized> -struct assign_impl; - -/************************ -*** Default traversal *** -************************/ - -template -struct assign_impl -{ - static inline void run(Derived1 &, const Derived2 &) { } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; ++inner) - dst.copyCoeffByOuterInner(outer, inner, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_DefaultTraversal_CompleteUnrolling - ::run(dst, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_DefaultTraversal_InnerUnrolling - ::run(dst, src, outer); - } -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - for(Index i = 0; i < size; ++i) - dst.copyCoeff(i, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_LinearTraversal_CompleteUnrolling - ::run(dst, src); - } -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index packetSize = packet_traits::size; - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; inner+=packetSize) - dst.template copyPacketByOuterInner(outer, inner, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_innervec_CompleteUnrolling - ::run(dst, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_innervec_InnerUnrolling - ::run(dst, src, outer); - } -}; - -/*************************** -*** Linear vectorization *** -***************************/ - -template -struct unaligned_assign_impl -{ - template - static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {} -}; - -template <> -struct unaligned_assign_impl -{ - // MSVC must not inline this functions. If it does, it fails to optimize the - // packet access path. -#ifdef _MSC_VER - template - static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#else - template - static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#endif - { - for (typename Derived::Index index = start; index < end; ++index) - dst.copyCoeff(index, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - typedef packet_traits PacketTraits; - enum { - packetSize = PacketTraits::size, - dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits::DstIsAligned) , - srcAlignment = assign_traits::JointAlignment - }; - const Index alignedStart = assign_traits::DstIsAligned ? 0 - : internal::first_aligned(&dst.coeffRef(0), size); - const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - - unaligned_assign_impl::DstIsAligned!=0>::run(src,dst,0,alignedStart); - - for(Index index = alignedStart; index < alignedEnd; index += packetSize) - { - dst.template copyPacket(index, src); - } - - unaligned_assign_impl<>::run(src,dst,alignedEnd,size); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - enum { size = Derived1::SizeAtCompileTime, - packetSize = packet_traits::size, - alignedSize = (size/packetSize)*packetSize }; - - assign_innervec_CompleteUnrolling::run(dst, src); - assign_DefaultTraversal_CompleteUnrolling::run(dst, src); - } -}; - -/************************** -*** Slice vectorization *** -***************************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - typedef packet_traits PacketTraits; - enum { - packetSize = PacketTraits::size, - alignable = PacketTraits::AlignedOnScalar, - dstAlignment = alignable ? Aligned : int(assign_traits::DstIsAligned) , - srcAlignment = assign_traits::JointAlignment - }; - const Index packetAlignedMask = packetSize - 1; - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; - Index alignedStart = ((!alignable) || assign_traits::DstIsAligned) ? 0 - : internal::first_aligned(&dst.coeffRef(0,0), innerSize); - - for(Index outer = 0; outer < outerSize; ++outer) - { - const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); - // do the non-vectorizable part of the assignment - for(Index inner = 0; inner(outer, inner, src); - - // do the non-vectorizable part of the assignment - for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); - } - } -}; - -} // end namespace internal - -/*************************************************************************** -* Part 4 : implementation of DenseBase methods -***************************************************************************/ - template template EIGEN_STRONG_INLINE Derived& DenseBase @@ -492,90 +27,62 @@ EIGEN_STRONG_INLINE Derived& DenseBase EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) -#ifdef EIGEN_DEBUG_ASSIGN - internal::assign_traits::debug(); -#endif eigen_assert(rows() == other.rows() && cols() == other.cols()); - internal::assign_impl::Traversal) - : int(InvalidTraversal)>::run(derived(),other.derived()); -#ifndef EIGEN_NO_DEBUG - checkTransposeAliasing(other.derived()); -#endif + internal::call_assignment_no_alias(derived(),other.derived()); + return derived(); } -namespace internal { - -template::Flags) & EvalBeforeAssigningBit) != 0, - bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1) - | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". - // revert to || as soon as not needed anymore. - (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1)) - && int(Derived::SizeAtCompileTime) != 1> -struct assign_selector; - -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } - template - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } - template - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose dstTrans(dst); other.evalTo(dstTrans); return dst; } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } -}; - -} // end namespace internal - template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) { - return internal::assign_selector::evalTo(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) { - return internal::assign_selector::evalTo(derived(), other.derived()); + other.derived().evalTo(derived()); + return derived(); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/AssignEvaluator.h b/nuparu/include/Eigen/src/Core/AssignEvaluator.h new file mode 100755 index 00000000..9dfffbcc --- /dev/null +++ b/nuparu/include/Eigen/src/Core/AssignEvaluator.h @@ -0,0 +1,810 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_EVALUATOR_H +#define EIGEN_ASSIGN_EVALUATOR_H + +namespace Eigen { + +// This implementation is based on Assign.h + +namespace internal { + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits + +template +struct copy_using_evaluator_traits +{ + typedef typename DstEvaluator::XprType Dst; + typedef typename Dst::Scalar DstScalar; + // TODO distinguish between linear traversal and inner-traversals + typedef typename find_best_packet::type PacketType; + + enum { + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags, + RequiredAlignment = unpacket_traits::alignment + }; + +public: + enum { + DstAlignment = DstEvaluator::Alignment, + SrcAlignment = SrcEvaluator::Alignment, + DstHasDirectAccess = DstFlags & DirectAccessBit, + JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) + }; + +private: + enum { + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time::ret), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + PacketSize = unpacket_traits::size + }; + + enum { + DstIsRowMajor = DstFlags&RowMajorBit, + SrcIsRowMajor = SrcFlags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), + MightVectorize = StorageOrdersAgree + && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) + && (functor_traits::PacketAccess), + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(PacketSize)==0 + && int(JointAlignment)>=int(RequiredAlignment), + MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), + MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess + && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + MaySliceVectorize = MightVectorize && DstHasDirectAccess + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix */ + }; + +public: + enum { + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) == InnerVectorizedTraversal + || int(Traversal) == LinearVectorizedTraversal + || int(Traversal) == SliceVectorizedTraversal + }; + +private: + enum { + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), + MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic + && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize) != Dynamic + && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(NoUnrolling) + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; + std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(DstAlignment) + EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(RequiredAlignment) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSize) + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.assignCoeffByOuterInner(outer, inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.assignCoeffByOuterInner(outer, Index_); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) + { + kernel.assignCoeff(Index); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime, + JointAlignment = Kernel::AssignmentTraits::JointAlignment + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.template assignPacketByOuterInner(outer, inner); + enum { NextIndex = Index + unpacket_traits::size }; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + typedef typename Kernel::PacketType PacketType; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.template assignPacketByOuterInner(outer, Index_); + enum { NextIndex = Index_ + unpacket_traits::size }; + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +// dense_assignment_loop is based on assign_impl + +template +struct dense_assignment_loop; + +/************************ +*** Default traversal *** +************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) + { + for(Index outer = 0; outer < kernel.outerSize(); ++outer) { + for(Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + + +// The goal of unaligned_dense_assignment_loop is simply to factorize the handling +// of the non vectorizable beginning and ending parts + +template +struct unaligned_dense_assignment_loop +{ + // if IsAligned = true, then do nothing + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} +}; + +template <> +struct unaligned_dense_assignment_loop +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. + // FIXME check which version exhibits this issue +#if EIGEN_COMP_MSVC + template + static EIGEN_DONT_INLINE void run(Kernel &kernel, + Index start, + Index end) +#else + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, + Index start, + Index end) +#endif + { + for (Index index = start; index < end; ++index) + kernel.assignCoeff(index); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment, + packetSize = unpacket_traits::size, + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = packet_traits::AlignedOnScalar ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment), + srcAlignment = Kernel::AssignmentTraits::JointAlignment + }; + const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_dense_assignment_loop::run(kernel, 0, alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + kernel.template assignPacket(index); + + unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + + enum { size = DstXprType::SizeAtCompileTime, + packetSize = packet_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct dense_assignment_loop +{ + typedef typename Kernel::PacketType PacketType; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index packetSize = unpacket_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + kernel.template assignPacketByOuterInner(outer, inner); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + for(Index i = 0; i < size; ++i) + kernel.assignCoeff(i); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + { + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + packetSize = unpacket_traits::size, + requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment), + alignable = packet_traits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = alignable ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment) + }; + const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0); + if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) + { + // the pointer is not aligend-on scalar, so alignment is not possible + return dense_assignment_loop::run(kernel); + } + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner(outer, inner); + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); + } + } +}; + +/*************************************************************************** +* Part 4 : Generic dense assignment kernel +***************************************************************************/ + +// This class generalize the assignment of a coefficient (or packet) from one dense evaluator +// to another dense writable evaluator. +// It is parametrized by the two evaluators, and the actual assignment functor. +// This abstraction level permits to keep the evaluation loops as simple and as generic as possible. +// One can customize the assignment using this generic dense_assignment_kernel with different +// functors, or by completely overloading it, by-passing a functor. +template +class generic_dense_assignment_kernel +{ +protected: + typedef typename DstEvaluatorTypeT::XprType DstXprType; + typedef typename SrcEvaluatorTypeT::XprType SrcXprType; +public: + + typedef DstEvaluatorTypeT DstEvaluatorType; + typedef SrcEvaluatorTypeT SrcEvaluatorType; + typedef typename DstEvaluatorType::Scalar Scalar; + typedef copy_using_evaluator_traits AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; + + + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) + { + #ifdef EIGEN_DEBUG_ASSIGN + AssignmentTraits::debug(); + #endif + } + + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } + + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } + + /// Assign src(row,col) to dst(row,col) through the assignment functor. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) + { + m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) + { + m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col); + } + + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) + { + m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) + { + m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col); + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner + : outer; + } + +protected: + DstEvaluatorType& m_dst; + const SrcEvaluatorType& m_src; + const Functor &m_functor; + // TODO find a way to avoid the needs of the original expression + DstXprType& m_dstExpr; +}; + +/*************************************************************************** +* Part 5 : Entry point for dense rectangular assignment +***************************************************************************/ + +template +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +{ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); +} + +template +EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +{ + call_dense_assignment_loop(dst, src, internal::assign_op()); +} + +/*************************************************************************** +* Part 6 : Generic assignment +***************************************************************************/ + +// Based on the respective shapes of the destination and source, +// the class AssignmentKind determine the kind of assignment mechanism. +// AssignmentKind must define a Kind typedef. +template struct AssignmentKind; + +// Assignement kind defined in this file: +struct Dense2Dense {}; +struct EigenBase2EigenBase {}; + +template struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; +template<> struct AssignmentKind { typedef Dense2Dense Kind; }; + +// This is the main assignment class +template< typename DstXprType, typename SrcXprType, typename Functor, + typename Kind = typename AssignmentKind< typename evaluator_traits::Shape , typename evaluator_traits::Shape >::Kind, + typename Scalar = typename DstXprType::Scalar> +struct Assignment; + + +// The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition. +// Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated. +// So this intermediate function removes everything related to AssumeAliasing such that Assignment +// does not has to bother about these annoying details. + +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} +template +EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} + +// Deal with AssumeAliasing +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::AssumeAliasing==1, void*>::type = 0) +{ + typename plain_matrix_type::type tmp(src); + call_assignment_no_alias(dst, tmp, func); +} + +template +EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::AssumeAliasing==0, void*>::type = 0) +{ + call_assignment_no_alias(dst, src, func); +} + +// by-pass AssumeAliasing +// When there is no aliasing, we require that 'dst' has been properly resized +template class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC void call_assignment(NoAlias& dst, const Src& src, const Func& func) +{ + call_assignment_no_alias(dst.expression(), src, func); +} + + +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +{ + enum { + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) + ) && int(Dst::SizeAtCompileTime) != 1 + }; + + Index dstRows = NeedToTranspose ? src.cols() : src.rows(); + Index dstCols = NeedToTranspose ? src.rows() : src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + typedef typename internal::conditional, Dst>::type ActualDstTypeCleaned; + typedef typename internal::conditional, Dst&>::type ActualDstType; + ActualDstType actualDst(dst); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + + Assignment::run(actualDst, src, func); +} +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) +{ + call_assignment_no_alias(dst, src, internal::assign_op()); +} + +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) +{ + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) + + Assignment::run(dst, src, func); +} +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) +{ + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); +} + +// forward declaration +template void check_for_aliasing(const Dst &dst, const Src &src); + +// Generic Dense to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func); + } +}; + +// Generic assignment through evalTo. +// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.evalTo(dst); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_EVALUATOR_H diff --git a/nuparu/include/Eigen/src/Core/Assign_MKL.h b/nuparu/include/Eigen/src/Core/Assign_MKL.h old mode 100644 new mode 100755 index 7772951b..897187a3 --- a/nuparu/include/Eigen/src/Core/Assign_MKL.h +++ b/nuparu/include/Eigen/src/Core/Assign_MKL.h @@ -1,6 +1,7 @@ /* Copyright (c) 2011, Intel Corporation. All rights reserved. - + Copyright (C) 2015 Gael Guennebaud + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -37,17 +38,13 @@ namespace Eigen { namespace internal { -template struct vml_call -{ enum { IsSupported = 0 }; }; - -template +template class vml_assign_traits { private: enum { DstHasDirectAccess = Dst::Flags & DirectAccessBit, SrcHasDirectAccess = Src::Flags & DirectAccessBit, - StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) @@ -57,165 +54,118 @@ class vml_assign_traits : int(Dst::MaxRowsAtCompileTime), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, - MightEnableVml = vml_call::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess - && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, - LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, - MayEnableVml = MightEnableVml && LargeEnough, - MayLinearize = MayEnableVml && MightLinearize + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD }; public: enum { - Traversal = MayLinearize ? LinearVectorizedTraversal - : MayEnableVml ? InnerVectorizedTraversal - : DefaultTraversal + EnableVml = MightEnableVml && LargeEnough, + Traversal = MightLinearize ? LinearTraversal : DefaultTraversal }; }; -template::Traversal > -struct vml_assign_impl - : assign_impl,Traversal,Unrolling,BuiltIn> -{ -}; - -template -struct vml_assign_impl -{ - typedef typename Derived1::Scalar Scalar; - typedef typename Derived1::Index Index; - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) { - const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : - &(src.nestedExpression().coeffRef(0, outer)); - Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); - vml_call::run(src.functor(), innerSize, src_ptr, dst_ptr ); - } - } -}; - -template -struct vml_assign_impl -{ - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - vml_call::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); - } -}; - -// Macroses - -#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ - template \ - struct assign_impl, TRAVERSAL, UNROLLING, Specialized> { \ - static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp &src) { \ - vml_assign_impl::run(dst, src); \ - } \ - }; - -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) - - +#define EIGEN_PP_EXPAND(ARG) ARG #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) -#define EIGEN_MKL_VML_MODE VML_HA +#define EIGEN_VMLMODE_EXPAND_LA , VML_HA #else -#define EIGEN_MKL_VML_MODE VML_LA +#define EIGEN_VMLMODE_EXPAND_LA , VML_LA #endif -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ - } \ +#define EIGEN_VMLMODE_EXPAND__ + +#define EIGEN_VMLMODE_PREFIX_LA vm +#define EIGEN_VMLMODE_PREFIX__ v +#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml,EIGENTYPE>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + if(vml_assign_traits::Traversal==LinearTraversal) { \ + VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ + }; \ + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA) +// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml,EIGENTYPE>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + VMLTYPE exponent = reinterpret_cast(src.functor().m_exponent); \ + if(vml_assign_traits::Traversal==LinearTraversal) \ + { \ + VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& func, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - EIGENTYPE exponent = func.m_exponent; \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ - (VMLTYPE*)dst, &vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) - - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) - - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) -//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) - -// The vm*powx functions are not avaibale in the windows version of MKL. -#ifndef _WIN32 -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) -#endif + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA) } // end namespace internal diff --git a/nuparu/include/Eigen/src/Core/BandMatrix.h b/nuparu/include/Eigen/src/Core/BandMatrix.h index ffd7fe8b..87c124fd 100644 --- a/nuparu/include/Eigen/src/Core/BandMatrix.h +++ b/nuparu/include/Eigen/src/Core/BandMatrix.h @@ -32,7 +32,7 @@ class BandMatrixBase : public EigenBase }; typedef typename internal::traits::Scalar Scalar; typedef Matrix DenseMatrixType; - typedef typename DenseMatrixType::Index Index; + typedef typename DenseMatrixType::StorageIndex StorageIndex; typedef typename internal::traits::CoefficientsType CoefficientsType; typedef EigenBase Base; @@ -179,7 +179,7 @@ struct traits > { typedef _Scalar Scalar; typedef Dense StorageKind; - typedef DenseIndex Index; + typedef Eigen::Index StorageIndex; enum { CoeffReadCost = NumTraits::ReadCost, RowsAtCompileTime = _Rows, @@ -201,10 +201,10 @@ class BandMatrix : public BandMatrixBase::Scalar Scalar; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; typedef typename internal::traits::CoefficientsType CoefficientsType; - inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) + explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) : m_coeffs(1+supers+subs,cols), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -241,7 +241,7 @@ struct traits::CoeffReadCost, RowsAtCompileTime = _Rows, @@ -264,9 +264,9 @@ class BandMatrixWrapper : public BandMatrixBase::Scalar Scalar; typedef typename internal::traits::CoefficientsType CoefficientsType; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; - inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) + explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) : m_coeffs(coeffs), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -312,9 +312,9 @@ template class TridiagonalMatrix : public BandMatrix { typedef BandMatrix Base; - typedef typename Base::Index Index; + typedef typename Base::StorageIndex StorageIndex; public: - TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} + explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} inline typename Base::template DiagonalIntReturnType<1>::Type super() { return Base::template diagonal<1>(); } @@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + } // end namespace internal } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/Block.h b/nuparu/include/Eigen/src/Core/Block.h index 358b3188..3748e259 100644 --- a/nuparu/include/Eigen/src/Core/Block.h +++ b/nuparu/include/Eigen/src/Core/Block.h @@ -21,6 +21,9 @@ namespace Eigen { * \param XprType the type of the expression in which we are taking a block * \param BlockRows the number of rows of the block we are taking at compile time (optional) * \param BlockCols the number of columns of the block we are taking at compile time (optional) + * \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or + * to set of columns of a column major matrix (optional). The parameter allows to determine + * at compile time whether aligned access is possible on the block expression. * * This class represents an expression of either a fixed-size or dynamic-size block. It is the return * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block(Index,Index) and @@ -52,7 +55,7 @@ struct traits > : traits::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - typedef typename nested::type XprTypeNested; + typedef typename ref_selector::type XprTypeNested; typedef typename remove_reference::type _XprTypeNested; enum{ MatrixRows = traits::RowsAtCompileTime, @@ -65,6 +68,7 @@ struct traits > : traits::MaxColsAtCompileTime), + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -77,18 +81,16 @@ struct traits > : traits::ret) : int(inner_stride_at_compile_time::ret), - MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) - && (InnerStrideAtCompileTime == 1) - ? PacketAccessBit : 0, - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0, - FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, + + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - Flags0 = traits::Flags & ( (HereditaryBits & ~RowMajorBit) | - DirectAccessBit | - MaskPacketAccessBit | - MaskAlignedBit), - Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit + Flags = (traits::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit, + // FIXME DirectAccessBit should not be handled by expressions + // + // Alignment is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + Alignment = 0 }; }; @@ -108,9 +110,12 @@ template class typedef Impl Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Block) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + typedef typename internal::remove_all::type NestedExpression; /** Column or Row constructor */ + EIGEN_DEVICE_FUNC inline Block(XprType& xpr, Index i) : Impl(xpr,i) { eigen_assert( (i>=0) && ( @@ -120,25 +125,27 @@ template class /** Fixed-size constructor */ - inline Block(XprType& xpr, Index a_startRow, Index a_startCol) - : Impl(xpr, a_startRow, a_startCol) + EIGEN_DEVICE_FUNC + inline Block(XprType& xpr, Index startRow, Index startCol) + : Impl(xpr, startRow, startCol) { EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) - eigen_assert(a_startRow >= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows() - && a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols()); + eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows() + && startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols()); } /** Dynamic-size constructor */ + EIGEN_DEVICE_FUNC inline Block(XprType& xpr, - Index a_startRow, Index a_startCol, + Index startRow, Index startCol, Index blockRows, Index blockCols) - : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) { eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); - eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows - && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols); + eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows + && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols); } }; @@ -149,14 +156,15 @@ class BlockImpl : public internal::BlockImpl_dense { typedef internal::BlockImpl_dense Impl; - typedef typename XprType::Index Index; + typedef typename XprType::StorageIndex StorageIndex; public: typedef Impl Base; EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) - inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} - inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {} - inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols) - : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {} + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {} + EIGEN_DEVICE_FUNC + inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) {} }; namespace internal { @@ -172,10 +180,11 @@ template::type& nestedExpression() const + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } - Index startRow() const + EIGEN_DEVICE_FUNC + StorageIndex startRow() const { return m_startRow.value(); } - Index startCol() const + EIGEN_DEVICE_FUNC + StorageIndex startCol() const { return m_startCol.value(); } @@ -302,10 +322,10 @@ template m_startRow; - const internal::variable_if_dynamic m_startCol; - const internal::variable_if_dynamic m_blockRows; - const internal::variable_if_dynamic m_blockCols; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; + const internal::variable_if_dynamic m_blockRows; + const internal::variable_if_dynamic m_blockCols; }; /** \internal Internal implementation of dense Blocks in the direct access case.*/ @@ -314,6 +334,9 @@ class BlockImpl_dense : public MapBase > { typedef Block BlockType; + enum { + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0 + }; public: typedef MapBase Base; @@ -322,10 +345,10 @@ class BlockImpl_dense /** Column or Row constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index i) - : Base(internal::const_cast_ptr(&xpr.coeffRef( - (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0, - (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)), + : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) + || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), BlockRows==1 ? 1 : xpr.rows(), BlockCols==1 ? 1 : xpr.cols()), m_xpr(xpr) @@ -335,29 +358,34 @@ class BlockImpl_dense /** Fixed-size constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), + m_xpr(xpr) { init(); } /** Dynamic-size constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols), + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), m_xpr(xpr) { init(); } - const typename internal::remove_all::type& nestedExpression() const + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } /** \sa MapBase::innerStride() */ + EIGEN_DEVICE_FUNC inline Index innerStride() const { return internal::traits::HasSameStorageOrderAsXprType @@ -366,6 +394,7 @@ class BlockImpl_dense } /** \sa MapBase::outerStride() */ + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_outerStride; @@ -379,6 +408,7 @@ class BlockImpl_dense #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal used by allowAligned() */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols) : Base(data, blockRows, blockCols), m_xpr(xpr) { @@ -387,6 +417,7 @@ class BlockImpl_dense #endif protected: + EIGEN_DEVICE_FUNC void init() { m_outerStride = internal::traits::HasSameStorageOrderAsXprType diff --git a/nuparu/include/Eigen/src/Core/BooleanRedux.h b/nuparu/include/Eigen/src/Core/BooleanRedux.h index 6e37e031..8409d874 100644 --- a/nuparu/include/Eigen/src/Core/BooleanRedux.h +++ b/nuparu/include/Eigen/src/Core/BooleanRedux.h @@ -17,9 +17,10 @@ namespace internal { template struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) @@ -29,9 +30,9 @@ struct all_unroller }; template -struct all_unroller +struct all_unroller { - static inline bool run(const Derived &mat) { return mat.coeff(0, 0); } + static inline bool run(const Derived &/*mat*/) { return true; } }; template @@ -43,11 +44,12 @@ struct all_unroller template struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; - + static inline bool run(const Derived &mat) { return any_unroller::run(mat) || mat.coeff(row, col); @@ -55,9 +57,9 @@ struct any_unroller }; template -struct any_unroller +struct any_unroller { - static inline bool run(const Derived &mat) { return mat.coeff(0, 0); } + static inline bool run(const Derived & /*mat*/) { return false; } }; template @@ -78,19 +80,19 @@ struct any_unroller template inline bool DenseBase::all() const { + typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller::run(derived()); + return internal::all_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (!coeff(i, j)) return false; + if (!evaluator.coeff(i, j)) return false; return true; } } @@ -102,19 +104,19 @@ inline bool DenseBase::all() const template inline bool DenseBase::any() const { + typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller::run(derived()); + return internal::any_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (coeff(i, j)) return true; + if (evaluator.coeff(i, j)) return true; return false; } } @@ -124,7 +126,7 @@ inline bool DenseBase::any() const * \sa all(), any() */ template -inline typename DenseBase::Index DenseBase::count() const +inline Eigen::Index DenseBase::count() const { return derived().template cast().template cast().sum(); } @@ -136,7 +138,11 @@ inline typename DenseBase::Index DenseBase::count() const template inline bool DenseBase::hasNaN() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else return !((derived().array()==derived().array()).all()); +#endif } /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. @@ -146,7 +152,11 @@ inline bool DenseBase::hasNaN() const template inline bool DenseBase::allFinite() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else return !((derived()-derived()).hasNaN()); +#endif } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/CMakeLists.txt b/nuparu/include/Eigen/src/Core/CMakeLists.txt index 2346fc2b..38c3afde 100644 --- a/nuparu/include/Eigen/src/Core/CMakeLists.txt +++ b/nuparu/include/Eigen/src/Core/CMakeLists.txt @@ -8,3 +8,4 @@ INSTALL(FILES ADD_SUBDIRECTORY(products) ADD_SUBDIRECTORY(util) ADD_SUBDIRECTORY(arch) +ADD_SUBDIRECTORY(functors) diff --git a/nuparu/include/Eigen/src/Core/CommaInitializer.h b/nuparu/include/Eigen/src/Core/CommaInitializer.h index a96867af..89bcd750 100644 --- a/nuparu/include/Eigen/src/Core/CommaInitializer.h +++ b/nuparu/include/Eigen/src/Core/CommaInitializer.h @@ -28,8 +28,8 @@ template struct CommaInitializer { typedef typename XprType::Scalar Scalar; - typedef typename XprType::Index Index; + EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const Scalar& s) : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) { @@ -37,13 +37,27 @@ struct CommaInitializer } template + EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const DenseBase& other) : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows()) { m_xpr.block(0, 0, other.rows(), other.cols()) = other; } + /* Copy/Move constructor which transfers ownership. This is crucial in + * absence of return value optimization to avoid assertions during destruction. */ + // FIXME in C++11 mode this could be replaced by a proper RValue constructor + EIGEN_DEVICE_FUNC + inline CommaInitializer(const CommaInitializer& o) + : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { + // Mark original object as finished. In absence of R-value references we need to const_cast: + const_cast(o).m_row = m_xpr.rows(); + const_cast(o).m_col = m_xpr.cols(); + const_cast(o).m_currentBlockRows = 0; + } + /* inserts a scalar value in the target matrix */ + EIGEN_DEVICE_FUNC CommaInitializer& operator,(const Scalar& s) { if (m_col==m_xpr.cols()) @@ -63,6 +77,7 @@ struct CommaInitializer /* inserts a matrix expression in the target matrix */ template + EIGEN_DEVICE_FUNC CommaInitializer& operator,(const DenseBase& other) { if(other.cols()==0 || other.rows()==0) @@ -88,7 +103,11 @@ struct CommaInitializer return *this; } + EIGEN_DEVICE_FUNC inline ~CommaInitializer() +#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS + EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) +#endif { eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() && m_col == m_xpr.cols() @@ -102,9 +121,10 @@ struct CommaInitializer * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished()); * \endcode */ + EIGEN_DEVICE_FUNC inline XprType& finished() { return m_xpr; } - XprType& m_xpr; // target expression + XprType& m_xpr; // target expression Index m_row; // current row id Index m_col; // current col id Index m_currentBlockRows; // current block height diff --git a/nuparu/include/Eigen/src/Core/CoreEvaluators.h b/nuparu/include/Eigen/src/Core/CoreEvaluators.h new file mode 100644 index 00000000..f97dc33d --- /dev/null +++ b/nuparu/include/Eigen/src/Core/CoreEvaluators.h @@ -0,0 +1,1376 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_COREEVALUATORS_H +#define EIGEN_COREEVALUATORS_H + +namespace Eigen { + +namespace internal { + +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; + +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template struct storage_kind_to_shape; + +template<> struct storage_kind_to_shape { typedef DenseShape Shape; }; +template<> struct storage_kind_to_shape { typedef SolverShape Shape; }; +template<> struct storage_kind_to_shape { typedef PermutationShape Shape; }; +template<> struct storage_kind_to_shape { typedef TranspositionsShape Shape; }; + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template< typename T, + typename LhsKind = typename evaluator_traits::Kind, + typename RhsKind = typename evaluator_traits::Kind, + typename LhsScalar = typename traits::Scalar, + typename RhsScalar = typename traits::Scalar> struct binary_evaluator; + +template< typename T, + typename Kind = typename evaluator_traits::Kind, + typename Scalar = typename T::Scalar> struct unary_evaluator; + +// evaluator_traits contains traits for evaluator + +template +struct evaluator_traits_base +{ + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; + + // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a + // temporary; 0 if not. + static const int AssumeAliasing = 0; +}; + +// Default evaluator traits +template +struct evaluator_traits : public evaluator_traits_base +{ +}; + + +// By default, we assume a unary expression: +template +struct evaluator : public unary_evaluator +{ + typedef unary_evaluator Base; + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} +}; + + +// TODO: Think about const-correctness +template +struct evaluator + : evaluator +{ + EIGEN_DEVICE_FUNC + explicit evaluator(const T& xpr) : evaluator(xpr) {} +}; + +// ---------- base class for all evaluators ---------- + +template +struct evaluator_base : public noncopyable +{ + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. + typedef traits ExpressionTraits; + + enum { + Alignment = 0 + }; +}; + +// -------------------- Matrix and Array -------------------- +// +// evaluator is a common base class for the +// Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. + +template +struct evaluator > + : evaluator_base +{ + typedef PlainObjectBase PlainObjectType; + typedef typename PlainObjectType::Scalar Scalar; + typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = PlainObjectType::IsRowMajor, + IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, + RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits::ReadCost, + Flags = traits::EvaluatorFlags, + Alignment = traits::Alignment + }; + + EIGEN_DEVICE_FUNC evaluator() + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + if (IsRowMajor) + return m_data[row * m_outerStride.value() + col]; + else + return m_data[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_data[index]; + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + if (IsRowMajor) + return const_cast(m_data)[row * m_outerStride.value() + col]; + else + return const_cast(m_data)[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return const_cast(m_data)[index]; + } + + template + PacketType packet(Index row, Index col) const + { + if (IsRowMajor) + return ploadt(m_data + row * m_outerStride.value() + col); + else + return ploadt(m_data + row + col * m_outerStride.value()); + } + + template + PacketType packet(Index index) const + { + return ploadt(m_data + index); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + if (IsRowMajor) + return pstoret + (const_cast(m_data) + row * m_outerStride.value() + col, x); + else + return pstoret + (const_cast(m_data) + row + col * m_outerStride.value(), x); + } + + template + void writePacket(Index index, const PacketType& x) + { + return pstoret(const_cast(m_data) + index, x); + } + +protected: + const Scalar *m_data; + + // We do not need to know the outer stride for vectors + variable_if_dynamic m_outerStride; +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Matrix XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Array XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +// -------------------- Transpose -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Transpose XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags ^ RowMajorBit, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(col, row); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(col, row); + } + + EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(col, row); + } + + template + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(col, row, x); + } + + template + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +// -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. + +template +struct evaluator > + : evaluator_base > +{ + typedef CwiseNullaryOp XprType; + typedef typename internal::remove_all::type PlainObjectTypeCleaned; + + enum { + CoeffReadCost = internal::functor_traits::Cost, + + Flags = (evaluator::Flags + & ( HereditaryBits + | (functor_has_linear_access::ret ? LinearAccessBit : 0) + | (functor_traits::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + Alignment = AlignedMax + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) + : m_functor(n.functor()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(row, col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(index); + } + + template + PacketType packet(Index row, Index col) const + { + return m_functor.template packetOp(row, col); + } + + template + PacketType packet(Index index) const + { + return m_functor.template packetOp(index); + } + +protected: + const NullaryOp m_functor; +}; + +// -------------------- CwiseUnaryOp -------------------- + +template +struct unary_evaluator, IndexBased > + : evaluator_base > +{ + typedef CwiseUnaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = evaluator::Flags + & (HereditaryBits | LinearAccessBit | (functor_traits::PacketAccess ? PacketAccessBit : 0)), + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_argImpl.coeff(index)); + } + + template + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_argImpl.template packet(row, col)); + } + + template + PacketType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + +protected: + const UnaryOp m_functor; + evaluator m_argImpl; +}; + +// -------------------- CwiseBinaryOp -------------------- + +// this is a binary expression +template +struct evaluator > + : public binary_evaluator > +{ + typedef CwiseBinaryOp XprType; + typedef binary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct binary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseBinaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment,evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); + } + + template + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_lhsImpl.template packet(row, col), + m_rhsImpl.template packet(row, col)); + } + + template + PacketType packet(Index index) const + { + return m_functor.packetOp(m_lhsImpl.template packet(index), + m_rhsImpl.template packet(index)); + } + +protected: + const BinaryOp m_functor; + evaluator m_lhsImpl; + evaluator m_rhsImpl; +}; + +// -------------------- CwiseUnaryView -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef CwiseUnaryView XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), + + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_unaryOp(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_unaryOp(m_argImpl.coeff(index)); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_unaryOp(m_argImpl.coeffRef(row, col)); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_unaryOp(m_argImpl.coeffRef(index)); + } + +protected: + const UnaryOp m_unaryOp; + evaluator m_argImpl; +}; + +// -------------------- Map -------------------- + +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template +struct mapbase_evaluator; + +template +struct mapbase_evaluator : evaluator_base +{ + typedef Derived XprType; + typedef typename XprType::PointerType PointerType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits::ReadCost + }; + + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast(map.data())), + m_xpr(map) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_data[index * m_xpr.innerStride()]; + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_data[index * m_xpr.innerStride()]; + } + + template + PacketType packet(Index row, Index col) const + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + return internal::ploadt(ptr); + } + + template + PacketType packet(Index index) const + { + return internal::ploadt(m_data + index * m_xpr.innerStride()); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + return internal::pstoret(ptr, x); + } + + template + void writePacket(Index index, const PacketType& x) + { + internal::pstoret(m_data + index * m_xpr.innerStride(), x); + } + +protected: + PointerType m_data; + const XprType& m_xpr; +}; + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Map XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator::Flags) & (LinearAccessMask&PacketAccessMask), + + Alignment = int(MapOptions)&int(AlignedMask) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) + : mapbase_evaluator(map) + { } +}; + +// -------------------- Ref -------------------- + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Ref XprType; + + enum { + Flags = evaluator >::Flags, + Alignment = evaluator >::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) + : mapbase_evaluator(ref) + { } +}; + +// -------------------- Block -------------------- + +template::ret> struct block_evaluator; + +template +struct evaluator > + : block_evaluator +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) + && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit : 0, + + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, + + PacketAlignment = unpacket_traits::alignment, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) + }; + typedef block_evaluator block_evaluator_type; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } +}; + +// no direct-access => dispatch to a unary evaluator +template +struct block_evaluator + : unary_evaluator > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : unary_evaluator(block) + {} +}; + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) + : m_argImpl(block.nestedExpression()), + m_startRow(block.startRow()), + m_startCol(block.startCol()) + { } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + RowsAtCompileTime = XprType::RowsAtCompileTime + }; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + template + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); + } + + template + PacketType packet(Index index) const + { + return packet(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); + } + + template + void writePacket(Index index, const PacketType& x) + { + return writePacket(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); + } + +protected: + evaluator m_argImpl; + const variable_if_dynamic m_startRow; + const variable_if_dynamic m_startCol; +}; + +// TODO: This evaluator does not actually use the child evaluator; +// all action is via the data() as returned by the Block expression. + +template +struct block_evaluator + : mapbase_evaluator, + typename Block::PlainObject> +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : mapbase_evaluator(block) + { + // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime + eigen_assert(((size_t(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); + } +}; + + +// -------------------- Select -------------------- +// NOTE shall we introduce a ternary_evaluator? + +// TODO enable vectorization for Select +template +struct evaluator > + : evaluator_base > +{ + typedef Select XprType; + enum { + CoeffReadCost = evaluator::CoeffReadCost + + EIGEN_PLAIN_ENUM_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), + + Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, + + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment) + }; + + inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) + : m_conditionImpl(select.conditionMatrix()), + m_thenImpl(select.thenMatrix()), + m_elseImpl(select.elseMatrix()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + if (m_conditionImpl.coeff(row, col)) + return m_thenImpl.coeff(row, col); + else + return m_elseImpl.coeff(row, col); + } + + inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + if (m_conditionImpl.coeff(index)) + return m_thenImpl.coeff(index); + else + return m_elseImpl.coeff(index); + } + +protected: + evaluator m_conditionImpl; + evaluator m_thenImpl; + evaluator m_elseImpl; +}; + + +// -------------------- Replicate -------------------- + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Replicate XprType; + typedef typename XprType::CoeffReturnType CoeffReturnType; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, + Flags = (evaluator::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits::Flags & RowMajorBit), + + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) + {} + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.coeff(actual_row, actual_col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.coeff(actual_index); + } + + template + PacketType packet(Index row, Index col) const + { + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.template packet(actual_row, actual_col); + } + + template + PacketType packet(Index index) const + { + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.template packet(actual_index); + } + +protected: + const ArgTypeNested m_arg; + evaluator m_argImpl; + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- PartialReduxExpr -------------------- + +template< typename ArgType, typename MemberOp, int Direction> +struct evaluator > + : evaluator_base > +{ + typedef PartialReduxExpr XprType; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + typedef typename ArgType::Scalar InputScalar; + typedef typename XprType::Scalar Scalar; + enum { + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost CostOpType; + enum { + CoeffReadCost = TraversalSize==Dynamic ? HugeCost + : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), + + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&(HereditaryBits&(~RowMajorBit))), + + Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) + : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(j)); + else + return m_functor(m_arg.row(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(index)); + else + return m_functor(m_arg.row(index)); + } + +protected: + const ArgTypeNested m_arg; + const MemberOp m_functor; +}; + + +// -------------------- MatrixWrapper and ArrayWrapper -------------------- +// +// evaluator_wrapper_base is a common base class for the +// MatrixWrapper and ArrayWrapper evaluators. + +template +struct evaluator_wrapper_base + : evaluator_base +{ + typedef typename remove_all::type ArgType; + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + + typedef typename ArgType::Scalar Scalar; + typedef typename ArgType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(row, col); + } + + template + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(row, col, x); + } + + template + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef MatrixWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef ArrayWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + + +// -------------------- Reverse -------------------- + +// defined in Reverse.h: +template struct reverse_packet_cond; + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Reverse XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::IsRowMajor, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1)) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), + + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) + : m_argImpl(reverse.nestedExpression()), + m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), + m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) + { } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); + } + + template + PacketType packet(Index row, Index col) const + { + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + return reverse_packet::run(m_argImpl.template packet( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col)); + } + + template + PacketType packet(Index index) const + { + enum { PacketSize = unpacket_traits::size }; + return preverse(m_argImpl.template packet(m_rows.value() * m_cols.value() - index - PacketSize)); + } + + template + void writePacket(Index row, Index col, const PacketType& x) + { + // FIXME we could factorize some code with packet(i,j) + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + m_argImpl.template writePacket( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col, + reverse_packet::run(x)); + } + + template + void writePacket(Index index, const PacketType& x) + { + enum { PacketSize = unpacket_traits::size }; + m_argImpl.template writePacket + (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x)); + } + +protected: + evaluator m_argImpl; + + // If we do not reverse rows, then we do not need to know the number of rows; same for columns + // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors. + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- Diagonal -------------------- + +template +struct evaluator > + : evaluator_base > +{ + typedef Diagonal XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (unsigned int)evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit, + + Alignment = 0 + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) + : m_argImpl(diagonal.nestedExpression()), + m_index(diagonal.index()) + { } + + typedef typename XprType::Scalar Scalar; + // FIXME having to check whether ArgType is sparse here i not very nice. + typedef typename internal::conditional::value, + typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const + { + return m_argImpl.coeff(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index + rowOffset(), index + colOffset()); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) + { + return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); + } + +protected: + evaluator m_argImpl; + const internal::variable_if_dynamicindex m_index; + +private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } +}; + + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template class EvalToTemp; + +template +struct traits > + : public traits +{ }; + +template +class EvalToTemp + : public dense_xpr_base >::type +{ + public: + + typedef typename dense_xpr_base::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) + : m_arg(arg) + { } + + const ArgType& arg() const + { + return m_arg; + } + + Index rows() const + { + return m_arg.rows(); + } + + Index cols() const + { + return m_arg.cols(); + } + + private: + const ArgType& m_arg; +}; + +template +struct evaluator > + : public evaluator +{ + typedef EvalToTemp XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.arg()) + { + ::new (static_cast(this)) Base(m_result); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) + : m_result(arg) + { + ::new (static_cast(this)) Base(m_result); + } + +protected: + PlainObject m_result; +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREEVALUATORS_H diff --git a/nuparu/include/Eigen/src/Core/CoreIterators.h b/nuparu/include/Eigen/src/Core/CoreIterators.h index 6da4683d..4eb42b93 100644 --- a/nuparu/include/Eigen/src/Core/CoreIterators.h +++ b/nuparu/include/Eigen/src/Core/CoreIterators.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,47 +15,113 @@ namespace Eigen { /* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core */ -/** \ingroup SparseCore_Module - * \class InnerIterator - * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression - * - * todo +namespace internal { + +template +class inner_iterator_selector; + +} + +/** \class InnerIterator + * \brief An InnerIterator allows to loop over the element of any matrix expression. + * + * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed. + * + * TODO: add a usage example */ +template +class InnerIterator +{ +protected: + typedef internal::inner_iterator_selector::Kind> IteratorType; + typedef internal::evaluator EvaluatorType; + typedef typename internal::traits::Scalar Scalar; +public: + /** Construct an iterator over the \a outerId -th row or column of \a xpr */ + InnerIterator(const XprType &xpr, const Index &outerId) + : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize()) + {} + + /// \returns the value of the current coefficient. + EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); } + /** Increment the iterator \c *this to the next non-zero coefficient. + * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView + */ + EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } + /// \returns the column or row index of the current coefficient. + EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } + /// \returns the row index of the current coefficient. + EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); } + /// \returns the column index of the current coefficient. + EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); } + /// \returns \c true if the iterator \c *this still references a valid coefficient. + EIGEN_STRONG_INLINE operator bool() const { return m_iter; } + +protected: + EvaluatorType m_eval; + IteratorType m_iter; +private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix A; + // SparseMatrix::InnerIterator it(A,0); + template InnerIterator(const EigenBase&,Index outer); +}; + +namespace internal { -// generic version for dense matrix and expressions -template class DenseBase::InnerIterator +// Generic inner iterator implementation for dense objects +template +class inner_iterator_selector { - protected: - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - - enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit }; - public: - EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer) - : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize()) - {} - - EIGEN_STRONG_INLINE Scalar value() const - { - return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner) - : m_expression.coeff(m_inner, m_outer); - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; } - - EIGEN_STRONG_INLINE Index index() const { return m_inner; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } - - protected: - const Derived& m_expression; - Index m_inner; - const Index m_outer; - const Index m_end; +protected: + typedef evaluator EvaluatorType; + typedef typename traits::Scalar Scalar; + enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit }; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize) + : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize) + {} + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner) + : m_eval.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + +protected: + const EvaluatorType& m_eval; + Index m_inner; + const Index m_outer; + const Index m_end; }; +// For iterator-based evaluator, inner-iterator is already implemented as +// evaluator<>::InnerIterator +template +class inner_iterator_selector + : public evaluator::InnerIterator +{ +protected: + typedef typename evaluator::InnerIterator Base; + typedef evaluator EvaluatorType; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/) + : Base(eval, outerId) + {} +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_COREITERATORS_H diff --git a/nuparu/include/Eigen/src/Core/CwiseBinaryOp.h b/nuparu/include/Eigen/src/Core/CwiseBinaryOp.h index 586f77aa..e42c3031 100644 --- a/nuparu/include/Eigen/src/Core/CwiseBinaryOp.h +++ b/nuparu/include/Eigen/src/Core/CwiseBinaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -56,72 +56,51 @@ struct traits > typename Rhs::Scalar ) >::type Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; + typedef typename cwise_promote_storage_type::StorageKind, + typename traits::StorageKind, + BinaryOp>::ret StorageKind; + typedef typename promote_index_type::StorageIndex, + typename traits::StorageIndex>::type StorageIndex; typedef typename Lhs::Nested LhsNested; typedef typename Rhs::Nested RhsNested; typedef typename remove_reference::type _LhsNested; typedef typename remove_reference::type _RhsNested; enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - SameType = is_same::value, - StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), - Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( - HereditaryBits - | (int(LhsFlags) & int(RhsFlags) & - ( AlignedBit - | (StorageOrdersAgree ? LinearAccessBit : 0) - | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) - ) - ) - ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), - CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits::Cost + Flags = _LhsNested::Flags & RowMajorBit }; }; } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. -// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. -// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to -// add together a float matrix and a double matrix. -#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ - ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same::value)), \ - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - template class CwiseBinaryOpImpl; -template -class CwiseBinaryOp : internal::no_assignment_operator, +template +class CwiseBinaryOp : public CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type::StorageKind, - typename internal::traits::StorageKind>::ret> + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>, + internal::no_assignment_operator { public: + + typedef typename internal::remove_all::type Lhs; + typedef typename internal::remove_all::type Rhs; typedef typename CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type::StorageKind, - typename internal::traits::StorageKind>::ret>::Base Base; + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) - typedef typename internal::nested::type LhsNested; - typedef typename internal::nested::type RhsNested; + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; typedef typename internal::remove_reference::type _LhsNested; typedef typename internal::remove_reference::type _RhsNested; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp()) : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) { @@ -131,6 +110,7 @@ class CwiseBinaryOp : internal::no_assignment_operator, eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { // return the fixed size type if available to enable compile time optimizations if (internal::traits::type>::RowsAtCompileTime==Dynamic) @@ -138,6 +118,7 @@ class CwiseBinaryOp : internal::no_assignment_operator, else return m_lhs.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { // return the fixed size type if available to enable compile time optimizations if (internal::traits::type>::ColsAtCompileTime==Dynamic) @@ -147,10 +128,13 @@ class CwiseBinaryOp : internal::no_assignment_operator, } /** \returns the left hand side nested expression */ + EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; } /** \returns the right hand side nested expression */ + EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; } /** \returns the functor representing the binary operation */ + EIGEN_DEVICE_FUNC const BinaryOp& functor() const { return m_functor; } protected: @@ -159,41 +143,13 @@ class CwiseBinaryOp : internal::no_assignment_operator, const BinaryOp m_functor; }; -template -class CwiseBinaryOpImpl - : public internal::dense_xpr_base >::type +// Generic API dispatcher +template +class CwiseBinaryOpImpl + : public internal::generic_xpr_base >::type { - typedef CwiseBinaryOp Derived; - public: - - typedef typename internal::dense_xpr_base >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE( Derived ) - - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().lhs().coeff(rowId, colId), - derived().rhs().coeff(rowId, colId)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().lhs().template packet(rowId, colId), - derived().rhs().template packet(rowId, colId)); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().lhs().coeff(index), - derived().rhs().coeff(index)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().lhs().template packet(index), - derived().rhs().template packet(index)); - } +public: + typedef typename internal::generic_xpr_base >::type Base; }; /** replaces \c *this by \c *this - \a other. @@ -205,8 +161,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -219,11 +174,11 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } } // end namespace Eigen #endif // EIGEN_CWISE_BINARY_OP_H + diff --git a/nuparu/include/Eigen/src/Core/CwiseNullaryOp.h b/nuparu/include/Eigen/src/Core/CwiseNullaryOp.h index a93bab2d..2bc6933d 100644 --- a/nuparu/include/Eigen/src/Core/CwiseNullaryOp.h +++ b/nuparu/include/Eigen/src/Core/CwiseNullaryOp.h @@ -35,37 +35,35 @@ template struct traits > : traits { enum { - Flags = (traits::Flags - & ( HereditaryBits - | (functor_has_linear_access::ret ? LinearAccessBit : 0) - | (functor_traits::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), - CoeffReadCost = functor_traits::Cost + Flags = traits::Flags & RowMajorBit }; }; } template -class CwiseNullaryOp : internal::no_assignment_operator, - public internal::dense_xpr_base< CwiseNullaryOp >::type +class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp >::type, internal::no_assignment_operator { public: typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) - CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp()) - : m_rows(nbRows), m_cols(nbCols), m_functor(func) + EIGEN_DEVICE_FUNC + CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) + : m_rows(rows), m_cols(cols), m_functor(func) { - eigen_assert(nbRows >= 0 - && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows) - && nbCols >= 0 - && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols)); + eigen_assert(rows >= 0 + && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 + && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const { return m_functor(rowId, colId); @@ -77,6 +75,7 @@ class CwiseNullaryOp : internal::no_assignment_operator, return m_functor.packetOp(rowId, colId); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_functor(index); @@ -89,6 +88,7 @@ class CwiseNullaryOp : internal::no_assignment_operator, } /** \returns the functor representing the nullary operation */ + EIGEN_DEVICE_FUNC const NullaryOp& functor() const { return m_functor; } protected: @@ -113,10 +113,10 @@ class CwiseNullaryOp : internal::no_assignment_operator, */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) { - return CwiseNullaryOp(rows, cols, func); + return CwiseNullaryOp(rows, cols, func); } /** \returns an expression of a matrix defined by a custom functor \a func @@ -132,16 +132,19 @@ DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f * * The template parameter \a CustomNullaryOp is the type of the functor. * + * Here is an example with C++11 random generators: \include random_cpp11.cpp + * Output: \verbinclude random_cpp11.out + * * \sa class CwiseNullaryOp */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); - else return CwiseNullaryOp(size, 1, func); + if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); + else return CwiseNullaryOp(size, 1, func); } /** \returns an expression of a matrix defined by a custom functor \a func @@ -155,19 +158,19 @@ DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(const CustomNullaryOp& func) { - return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); + return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); } /** \returns an expression of a constant matrix of value \a value * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this DenseBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, - * it is redundant to pass \a nbRows and \a nbCols as arguments, so Zero() should be used + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used * instead. * * The template parameter \a CustomNullaryOp is the type of the functor. @@ -176,9 +179,9 @@ DenseBase::NullaryExpr(const CustomNullaryOp& func) */ template EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Constant(Index nbRows, Index nbCols, const Scalar& value) +DenseBase::Constant(Index rows, Index cols, const Scalar& value) { - return DenseBase::NullaryExpr(nbRows, nbCols, internal::scalar_constant_op(value)); + return DenseBase::NullaryExpr(rows, cols, internal::scalar_constant_op(value)); } /** \returns an expression of a constant matrix of value \a value @@ -242,7 +245,7 @@ EIGEN_STRONG_INLINE const typename DenseBase::SequentialLinSpacedReturn DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); } /** @@ -255,7 +258,7 @@ DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); } /** @@ -276,7 +279,7 @@ EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedRetu DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); } /** @@ -289,7 +292,7 @@ DenseBase::LinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); } /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ @@ -297,9 +300,10 @@ template bool DenseBase::isApproxToConstant (const Scalar& val, const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isApprox(this->coeff(i, j), val, prec)) + if(!internal::isApprox(self.coeff(i, j), val, prec)) return false; return true; } @@ -353,8 +357,8 @@ PlainObjectBase::setConstant(Index size, const Scalar& val) /** Resizes to the given size, and sets all coefficients in this expression to the given \a value. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * \param val the value to which all coefficients are set * * Example: \include Matrix_setConstant_int_int.cpp @@ -364,9 +368,9 @@ PlainObjectBase::setConstant(Index size, const Scalar& val) */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setConstant(Index nbRows, Index nbCols, const Scalar& val) +PlainObjectBase::setConstant(Index rows, Index cols, const Scalar& val) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(val); } @@ -387,7 +391,7 @@ template EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); + return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); } /** @@ -425,9 +429,9 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, */ template EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Zero(Index nbRows, Index nbCols) +DenseBase::Zero(Index rows, Index cols) { - return Constant(nbRows, nbCols, Scalar(0)); + return Constant(rows, cols, Scalar(0)); } /** \returns an expression of a zero vector. @@ -481,9 +485,10 @@ DenseBase::Zero() template bool DenseBase::isZero(const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; return true; } @@ -520,8 +525,8 @@ PlainObjectBase::setZero(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to zero. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setZero_int_int.cpp * Output: \verbinclude Matrix_setZero_int_int.out @@ -530,9 +535,9 @@ PlainObjectBase::setZero(Index newSize) */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setZero(Index nbRows, Index nbCols) +PlainObjectBase::setZero(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(Scalar(0)); } @@ -540,7 +545,7 @@ PlainObjectBase::setZero(Index nbRows, Index nbCols) /** \returns an expression of a matrix where all coefficients equal one. * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, @@ -554,9 +559,9 @@ PlainObjectBase::setZero(Index nbRows, Index nbCols) */ template EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Ones(Index nbRows, Index nbCols) +DenseBase::Ones(Index rows, Index cols) { - return Constant(nbRows, nbCols, Scalar(1)); + return Constant(rows, cols, Scalar(1)); } /** \returns an expression of a vector where all coefficients equal one. @@ -646,8 +651,8 @@ PlainObjectBase::setOnes(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to one. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setOnes_int_int.cpp * Output: \verbinclude Matrix_setOnes_int_int.out @@ -656,9 +661,9 @@ PlainObjectBase::setOnes(Index newSize) */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setOnes(Index nbRows, Index nbCols) +PlainObjectBase::setOnes(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(Scalar(1)); } @@ -666,7 +671,7 @@ PlainObjectBase::setOnes(Index nbRows, Index nbCols) /** \returns an expression of the identity matrix (not necessarily square). * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, @@ -680,9 +685,9 @@ PlainObjectBase::setOnes(Index nbRows, Index nbCols) */ template EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType -MatrixBase::Identity(Index nbRows, Index nbCols) +MatrixBase::Identity(Index rows, Index cols) { - return DenseBase::NullaryExpr(nbRows, nbCols, internal::scalar_identity_op()); + return DenseBase::NullaryExpr(rows, cols, internal::scalar_identity_op()); } /** \returns an expression of the identity matrix (not necessarily square). @@ -716,18 +721,19 @@ template bool MatrixBase::isIdentity (const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) { for(Index i = 0; i < rows(); ++i) { if(i == j) { - if(!internal::isApprox(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isApprox(self.coeff(i, j), static_cast(1), prec)) return false; } else { - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; } } @@ -740,6 +746,7 @@ namespace internal { template=16)> struct setIdentity_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Derived& run(Derived& m) { return m = Derived::Identity(m.rows(), m.cols()); @@ -749,7 +756,7 @@ struct setIdentity_impl template struct setIdentity_impl { - typedef typename Derived::Index Index; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Derived& run(Derived& m) { m.setZero(); @@ -776,8 +783,8 @@ EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() /** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setIdentity_int_int.cpp * Output: \verbinclude Matrix_setIdentity_int_int.out @@ -785,9 +792,9 @@ EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() */ template -EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index nbRows, Index nbCols) +EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index cols) { - derived().resize(nbRows, nbCols); + derived().resize(rows, cols); return setIdentity(); } diff --git a/nuparu/include/Eigen/src/Core/CwiseUnaryOp.h b/nuparu/include/Eigen/src/Core/CwiseUnaryOp.h index f2de749f..da1d1992 100644 --- a/nuparu/include/Eigen/src/Core/CwiseUnaryOp.h +++ b/nuparu/include/Eigen/src/Core/CwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -44,10 +44,7 @@ struct traits > typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference::type _XprTypeNested; enum { - Flags = _XprTypeNested::Flags & ( - HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits::PacketAccess ? PacketAccessBit : 0)), - CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits::Cost + Flags = _XprTypeNested::Flags & RowMajorBit }; }; } @@ -56,28 +53,34 @@ template class CwiseUnaryOpImpl; template -class CwiseUnaryOp : internal::no_assignment_operator, - public CwiseUnaryOpImpl::StorageKind> +class CwiseUnaryOp : public CwiseUnaryOpImpl::StorageKind>, internal::no_assignment_operator { public: typedef typename CwiseUnaryOpImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::remove_all::type NestedExpression; - inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + EIGEN_DEVICE_FUNC + explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); } /** \returns the functor representing the unary operation */ + EIGEN_DEVICE_FUNC const UnaryOp& functor() const { return m_functor; } /** \returns the nested expression */ + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } /** \returns the nested expression */ + EIGEN_DEVICE_FUNC typename internal::remove_all::type& nestedExpression() { return m_xpr.const_cast_derived(); } @@ -86,39 +89,13 @@ class CwiseUnaryOp : internal::no_assignment_operator, const UnaryOp m_functor; }; -// This is the generic implementation for dense storage. -// It can be used for any expression types implementing the dense concept. -template -class CwiseUnaryOpImpl - : public internal::dense_xpr_base >::type +// Generic API dispatcher +template +class CwiseUnaryOpImpl + : public internal::generic_xpr_base >::type { - public: - - typedef CwiseUnaryOp Derived; - typedef typename internal::dense_xpr_base >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) - - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().nestedExpression().coeff(rowId, colId)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet(rowId, colId)); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet(index)); - } +public: + typedef typename internal::generic_xpr_base >::type Base; }; } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/CwiseUnaryView.h b/nuparu/include/Eigen/src/Core/CwiseUnaryView.h index b2638d32..72244751 100644 --- a/nuparu/include/Eigen/src/Core/CwiseUnaryView.h +++ b/nuparu/include/Eigen/src/Core/CwiseUnaryView.h @@ -37,8 +37,8 @@ struct traits > typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all::type _MatrixTypeNested; enum { - Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), - CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits::Cost, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions MatrixTypeInnerStride = inner_stride_at_compile_time::ret, // need to cast the sizeof's from size_t to int explicitly, otherwise: // "error: no integral type can represent all of the enumerator values @@ -62,8 +62,9 @@ class CwiseUnaryView : public CwiseUnaryViewImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::remove_all::type NestedExpression; - inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) + explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) : m_matrix(mat), m_functor(func) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) @@ -83,11 +84,19 @@ class CwiseUnaryView : public CwiseUnaryViewImpl::type m_matrix; + typename internal::ref_selector::type m_matrix; ViewOp m_functor; }; +// Generic API dispatcher +template +class CwiseUnaryViewImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + template class CwiseUnaryViewImpl : public internal::dense_xpr_base< CwiseUnaryView >::type @@ -100,38 +109,18 @@ class CwiseUnaryViewImpl EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) - inline Scalar* data() { return &coeffRef(0); } - inline const Scalar* data() const { return &coeff(0); } + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const - { - return derived().functor()(derived().nestedExpression().coeff(row, col)); - } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index)); - } }; } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/DenseBase.h b/nuparu/include/Eigen/src/Core/DenseBase.h index c5800f6c..e181dafa 100644 --- a/nuparu/include/Eigen/src/Core/DenseBase.h +++ b/nuparu/include/Eigen/src/Core/DenseBase.h @@ -40,31 +40,43 @@ static inline void check_DenseIndex_is_signed() { */ template class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real> + : public internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real, + DenseCoeffsBase > #else : public DenseCoeffsBase #endif // not EIGEN_PARSED_BY_DOXYGEN { public: - using internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real>::operator*; - class InnerIterator; + /** Inner iterator type to iterate over the coefficients of a row or column. + * \sa class InnerIterator + */ + typedef Eigen::InnerIterator InnerIterator; typedef typename internal::traits::StorageKind StorageKind; - /** \brief The type of indices - * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. - * \sa \ref TopicPreprocessorDirectives. - */ - typedef typename internal::traits::Index Index; + /** + * \brief The type used to store indices + * \details This typedef is relevant for types that store multiple indices such as + * PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index + * \sa \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. + */ + typedef typename internal::traits::StorageIndex StorageIndex; + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. */ typedef typename internal::traits::Scalar Scalar; - typedef typename internal::packet_traits::type PacketScalar; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. + * + * It is an alias for the Scalar type */ + typedef Scalar value_type; + typedef typename NumTraits::Real RealScalar; + typedef internal::special_scalar_op_base > Base; - typedef DenseCoeffsBase Base; + using Base::operator*; + using Base::operator/; using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -74,16 +86,6 @@ template class DenseBase using Base::colIndexByOuterInner; using Base::coeff; using Base::coeffByOuterInner; - using Base::packet; - using Base::packetByOuterInner; - using Base::writePacket; - using Base::writePacketByOuterInner; - using Base::coeffRef; - using Base::coeffRefByOuterInner; - using Base::copyCoeff; - using Base::copyCoeffByOuterInner; - using Base::copyPacket; - using Base::copyPacketByOuterInner; using Base::operator(); using Base::operator[]; using Base::x; @@ -169,30 +171,54 @@ template class DenseBase InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), - CoeffReadCost = internal::traits::CoeffReadCost, - /**< This is a rough measure of how expensive it is to read one coefficient from - * this expression. - */ - InnerStrideAtCompileTime = internal::inner_stride_at_compile_time::ret, OuterStrideAtCompileTime = internal::outer_stride_at_compile_time::ret }; + + typedef typename internal::find_best_packet::type PacketScalar; - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 0 }; + + /** The plain matrix type corresponding to this expression. + * \sa PlainObject */ + typedef Matrix::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainMatrix; + + /** The plain array type corresponding to this expression. + * \sa PlainObject */ + typedef Array::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainArray; + + /** \brief The plain matrix or array type corresponding to this expression. + * + * This is not necessarily exactly the return type of eval(). In the case of plain matrices, + * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed + * that the return type of eval() is either PlainObject or const PlainObject&. + */ + typedef typename internal::conditional::XprKind,MatrixXpr >::value, + PlainMatrix, PlainArray>::type PlainObject; /** \returns the number of nonzero coefficients which is in practice the number * of stored coefficients. */ + EIGEN_DEVICE_FUNC inline Index nonZeros() const { return size(); } - /** \returns true if either the number of rows or the number of columns is equal to 1. - * In other words, this function returns - * \code rows()==1 || cols()==1 \endcode - * \sa rows(), cols(), IsVectorAtCompileTime. */ /** \returns the outer size. * * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a * column-major matrix, and the number of rows for a row-major matrix. */ + EIGEN_DEVICE_FUNC Index outerSize() const { return IsVectorAtCompileTime ? 1 @@ -204,6 +230,7 @@ template class DenseBase * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a * column-major matrix, and the number of columns for a row-major matrix. */ + EIGEN_DEVICE_FUNC Index innerSize() const { return IsVectorAtCompileTime ? this->size() @@ -214,6 +241,7 @@ template class DenseBase * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does * nothing else. */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { EIGEN_ONLY_USED_FOR_DEBUG(newSize); @@ -224,22 +252,22 @@ template class DenseBase * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does * nothing else. */ - void resize(Index nbRows, Index nbCols) + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { - EIGEN_ONLY_USED_FOR_DEBUG(nbRows); - EIGEN_ONLY_USED_FOR_DEBUG(nbCols); - eigen_assert(nbRows == this->rows() && nbCols == this->cols() + EIGEN_ONLY_USED_FOR_DEBUG(rows); + EIGEN_ONLY_USED_FOR_DEBUG(cols); + eigen_assert(rows == this->rows() && cols == this->cols() && "DenseBase::resize() does not actually allow to resize."); } #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ - typedef CwiseNullaryOp,Derived> SequentialLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> SequentialLinSpacedReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ - typedef CwiseNullaryOp,Derived> RandomAccessLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> RandomAccessLinSpacedReturnType; /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix::Scalar>::Real, internal::traits::ColsAtCompileTime, 1> EigenvaluesReturnType; @@ -247,110 +275,122 @@ template class DenseBase /** Copies \a other into *this. \returns a reference to *this. */ template + EIGEN_DEVICE_FUNC Derived& operator=(const DenseBase& other); /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC Derived& operator=(const DenseBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator+=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator-=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& func); -#ifndef EIGEN_PARSED_BY_DOXYGEN - /** Copies \a other into *this without evaluating other. \returns a reference to *this. */ + /** \ínternal + * Copies \a other into *this without evaluating other. \returns a reference to *this. + * \deprecated */ template + EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase& other); -#endif // not EIGEN_PARSED_BY_DOXYGEN + EIGEN_DEVICE_FUNC CommaInitializer operator<< (const Scalar& s); + /** \deprecated it now returns \c *this */ template - const Flagged flagged() const; + EIGEN_DEPRECATED + const Derived& flagged() const + { return derived(); } template + EIGEN_DEVICE_FUNC CommaInitializer operator<< (const DenseBase& other); - Eigen::Transpose transpose(); - typedef typename internal::add_const >::type ConstTransposeReturnType; + typedef Transpose TransposeReturnType; + EIGEN_DEVICE_FUNC + TransposeReturnType transpose(); + typedef typename internal::add_const >::type ConstTransposeReturnType; + EIGEN_DEVICE_FUNC ConstTransposeReturnType transpose() const; + EIGEN_DEVICE_FUNC void transposeInPlace(); -#ifndef EIGEN_NO_DEBUG - protected: - template - void checkTransposeAliasing(const OtherDerived& other) const; - public: -#endif - - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index rows, Index cols, const Scalar& value); - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index size, const Scalar& value); - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(const Scalar& value); - static const SequentialLinSpacedReturnType + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high); - static const RandomAccessLinSpacedReturnType + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Index size, const Scalar& low, const Scalar& high); - static const SequentialLinSpacedReturnType + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType LinSpaced(Sequential_t, const Scalar& low, const Scalar& high); - static const RandomAccessLinSpacedReturnType + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(const Scalar& low, const Scalar& high); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(Index size, const CustomNullaryOp& func); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(const CustomNullaryOp& func); - static const ConstantReturnType Zero(Index rows, Index cols); - static const ConstantReturnType Zero(Index size); - static const ConstantReturnType Zero(); - static const ConstantReturnType Ones(Index rows, Index cols); - static const ConstantReturnType Ones(Index size); - static const ConstantReturnType Ones(); - - void fill(const Scalar& value); - Derived& setConstant(const Scalar& value); - Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); - Derived& setLinSpaced(const Scalar& low, const Scalar& high); - Derived& setZero(); - Derived& setOnes(); - Derived& setRandom(); - - template + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(); + + EIGEN_DEVICE_FUNC void fill(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setZero(); + EIGEN_DEVICE_FUNC Derived& setOnes(); + EIGEN_DEVICE_FUNC Derived& setRandom(); + + template EIGEN_DEVICE_FUNC bool isApprox(const DenseBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const RealScalar& other, const RealScalar& prec = NumTraits::dummy_precision()) const; - template + template EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const DenseBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; inline bool hasNaN() const; inline bool allFinite() const; + EIGEN_DEVICE_FUNC inline Derived& operator*=(const Scalar& other); + EIGEN_DEVICE_FUNC inline Derived& operator/=(const Scalar& other); typedef typename internal::add_const_on_value_type::type>::type EvalReturnType; @@ -358,7 +398,10 @@ template class DenseBase * * Notice that in the case of a plain matrix or vector (not an expression) this function just returns * a const reference, in order to avoid a useless copy. + * + * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvalReturnType eval() const { // Even though MSVC does not honor strong inlining when the return type @@ -366,61 +409,68 @@ template class DenseBase // size types on MSVC. return typename internal::eval::type(derived()); } - + /** swaps *this with the expression \a other. * */ template - void swap(const DenseBase& other, - int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase) + EIGEN_DEVICE_FUNC + void swap(const DenseBase& other) { - SwapWrapper(derived()).lazyAssign(other.derived()); + EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } /** swaps *this with the matrix or array \a other. * */ template + EIGEN_DEVICE_FUNC void swap(PlainObjectBase& other) { - SwapWrapper(derived()).lazyAssign(other.derived()); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.derived(), internal::swap_assign_op()); } + EIGEN_DEVICE_FUNC inline const NestByValue nestByValue() const; + EIGEN_DEVICE_FUNC inline const ForceAlignedAccess forceAlignedAccess() const; + EIGEN_DEVICE_FUNC inline ForceAlignedAccess forceAlignedAccess(); + template EIGEN_DEVICE_FUNC + inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; + template EIGEN_DEVICE_FUNC + inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); - inline const NestByValue nestByValue() const; - inline const ForceAlignedAccess forceAlignedAccess() const; - inline ForceAlignedAccess forceAlignedAccess(); - template inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; - template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + EIGEN_DEVICE_FUNC Scalar sum() const; + EIGEN_DEVICE_FUNC Scalar mean() const; + EIGEN_DEVICE_FUNC Scalar trace() const; - Scalar sum() const; - Scalar mean() const; - Scalar trace() const; + EIGEN_DEVICE_FUNC Scalar prod() const; - Scalar prod() const; + EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff() const; + EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff() const; - typename internal::traits::Scalar minCoeff() const; - typename internal::traits::Scalar maxCoeff() const; - - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* index) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* index) const; template - typename internal::result_of::Scalar)>::type - redux(const BinaryOp& func) const; + EIGEN_DEVICE_FUNC + Scalar redux(const BinaryOp& func) const; template + EIGEN_DEVICE_FUNC void visit(Visitor& func) const; inline const WithFormat format(const IOFormat& fmt) const; /** \returns the unique coefficient of a 1x1 expression */ + EIGEN_DEVICE_FUNC CoeffReturnType value() const { EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) @@ -428,8 +478,8 @@ template class DenseBase return derived().coeff(0,0); } - bool all(void) const; - bool any(void) const; + bool all() const; + bool any() const; Index count() const; typedef VectorwiseOp RowwiseReturnType; @@ -437,14 +487,35 @@ template class DenseBase typedef VectorwiseOp ColwiseReturnType; typedef const VectorwiseOp ConstColwiseReturnType; - ConstRowwiseReturnType rowwise() const; - RowwiseReturnType rowwise(); - ConstColwiseReturnType colwise() const; - ColwiseReturnType colwise(); + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_rowwise.cpp + * Output: \verbinclude MatrixBase_rowwise.out + * + * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const { + return ConstRowwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC RowwiseReturnType rowwise(); + + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_colwise.cpp + * Output: \verbinclude MatrixBase_colwise.out + * + * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const { + return ConstColwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC ColwiseReturnType colwise(); - static const CwiseNullaryOp,Derived> Random(Index rows, Index cols); - static const CwiseNullaryOp,Derived> Random(Index size); - static const CwiseNullaryOp,Derived> Random(); + typedef CwiseNullaryOp,PlainObject> RandomReturnType; + static const RandomReturnType Random(Index rows, Index cols); + static const RandomReturnType Random(Index size); + static const RandomReturnType Random(); template const Select @@ -462,14 +533,33 @@ template class DenseBase template RealScalar lpNorm() const; template + EIGEN_DEVICE_FUNC const Replicate replicate() const; - const Replicate replicate(Index rowFacor,Index colFactor) const; + /** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate_int_int.cpp + * Output: \verbinclude MatrixBase_replicate_int_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC + const Replicate replicate(Index rowFactor, Index colFactor) const + { + return Replicate(derived(), rowFactor, colFactor); + } typedef Reverse ReverseReturnType; typedef const Reverse ConstReverseReturnType; - ReverseReturnType reverse(); - ConstReverseReturnType reverse() const; - void reverseInPlace(); + EIGEN_DEVICE_FUNC ReverseReturnType reverse(); + /** This is the const version of reverse(). */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const + { + return ConstReverseReturnType(derived()); + } + EIGEN_DEVICE_FUNC void reverseInPlace(); #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase # include "../plugins/BlockMethods.h" @@ -478,27 +568,18 @@ template class DenseBase # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS -#ifdef EIGEN2_SUPPORT - - Block corner(CornerType type, Index cRows, Index cCols); - const Block corner(CornerType type, Index cRows, Index cCols) const; - template - Block corner(CornerType type); - template - const Block corner(CornerType type) const; - -#endif // EIGEN2_SUPPORT - // disable the use of evalTo for dense objects with a nice compilation error - template inline void evalTo(Dest& ) const + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& ) const { EIGEN_STATIC_ASSERT((internal::is_same::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); } protected: /** Default constructor. Do nothing. */ - DenseBase() + EIGEN_DEVICE_FUNC DenseBase() { /* Just checks for self-consistency of the flags. * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down @@ -511,9 +592,9 @@ template class DenseBase } private: - explicit DenseBase(int); - DenseBase(int,int); - template explicit DenseBase(const DenseBase&); + EIGEN_DEVICE_FUNC explicit DenseBase(int); + EIGEN_DEVICE_FUNC DenseBase(int,int); + template EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase&); }; } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/DenseCoeffsBase.h b/nuparu/include/Eigen/src/Core/DenseCoeffsBase.h index 3c890f21..820a90e6 100644 --- a/nuparu/include/Eigen/src/Core/DenseCoeffsBase.h +++ b/nuparu/include/Eigen/src/Core/DenseCoeffsBase.h @@ -35,7 +35,6 @@ class DenseCoeffsBase : public EigenBase { public: typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; @@ -61,6 +60,7 @@ class DenseCoeffsBase : public EigenBase using Base::size; using Base::derived; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const { return int(Derived::RowsAtCompileTime) == 1 ? 0 @@ -69,6 +69,7 @@ class DenseCoeffsBase : public EigenBase : inner; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const { return int(Derived::ColsAtCompileTime) == 1 ? 0 @@ -91,13 +92,15 @@ class DenseCoeffsBase : public EigenBase * * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeff(row, col); + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeff(row,col); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { return coeff(rowIndexByOuterInner(outer, inner), @@ -108,11 +111,12 @@ class DenseCoeffsBase : public EigenBase * * \sa operator()(Index,Index), operator[](Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeff(row, col); + return coeff(row, col); } /** Short version: don't use this function, use @@ -130,11 +134,14 @@ class DenseCoeffsBase : public EigenBase * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); - return derived().coeff(index); + return internal::evaluator(derived()).coeff(index); } @@ -146,15 +153,14 @@ class DenseCoeffsBase : public EigenBase * z() const, w() const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator[](Index index) const { - #ifndef EIGEN2_SUPPORT EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) - #endif eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** \returns the coefficient at given index. @@ -167,30 +173,35 @@ class DenseCoeffsBase : public EigenBase * z() const, w() const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index index) const { eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** equivalent to operator[](0). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType x() const { return (*this)[0]; } /** equivalent to operator[](1). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType y() const { return (*this)[1]; } /** equivalent to operator[](2). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType z() const { return (*this)[2]; } /** equivalent to operator[](3). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType w() const { return (*this)[3]; } @@ -207,9 +218,9 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().template packet(row,col); + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return internal::evaluator(derived()).template packet(row,col); } @@ -234,8 +245,11 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + typedef typename internal::packet_traits::type DefaultPacketType; eigen_internal_assert(index >= 0 && index < size()); - return derived().template packet(index); + return internal::evaluator(derived()).template packet(index); } protected: @@ -278,7 +292,6 @@ class DenseCoeffsBase : public DenseCoeffsBase Base; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -311,13 +324,15 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeffRef(row,col); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRefByOuterInner(Index outer, Index inner) { @@ -330,12 +345,13 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + return coeffRef(row, col); } @@ -354,11 +370,14 @@ class DenseCoeffsBase : public DenseCoeffsBase::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); - return derived().coeffRef(index); + return internal::evaluator(derived()).coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -368,15 +387,14 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -388,167 +406,37 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** equivalent to operator[](0). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& x() { return (*this)[0]; } /** equivalent to operator[](1). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& y() { return (*this)[1]; } /** equivalent to operator[](2). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& z() { return (*this)[2]; } /** equivalent to operator[](3). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& w() { return (*this)[3]; } - - /** \internal - * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit. - * - * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - - template - EIGEN_STRONG_INLINE void writePacket - (Index row, Index col, const typename internal::packet_traits::type& val) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket(row,col,val); - } - - - /** \internal */ - template - EIGEN_STRONG_INLINE void writePacketByOuterInner - (Index outer, Index inner, const typename internal::packet_traits::type& val) - { - writePacket(rowIndexByOuterInner(outer, inner), - colIndexByOuterInner(outer, inner), - val); - } - - /** \internal - * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit and the LinearAccessBit. - * - * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - template - EIGEN_STRONG_INLINE void writePacket - (Index index, const typename internal::packet_traits::type& val) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket(index,val); - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - - /** \internal Copies the coefficient at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().coeffRef(row, col) = other.derived().coeff(row, col); - } - - /** \internal Copies the coefficient at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().coeffRef(index) = other.derived().coeff(index); - } - - - template - EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().copyCoeff(row, col, other); - } - - /** \internal Copies the packet at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket(row, col, - other.derived().template packet(row, col)); - } - - /** \internal Copies the packet at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket(index, - other.derived().template packet(index)); - } - - /** \internal */ - template - EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other); - } -#endif - }; /** \brief Base class providing direct read-only coefficient access to matrices and arrays. @@ -568,7 +456,6 @@ class DenseCoeffsBase : public DenseCoeffsBase Base; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename NumTraits::Real RealScalar; @@ -581,6 +468,7 @@ class DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase public: typedef DenseCoeffsBase Base; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename NumTraits::Real RealScalar; @@ -652,6 +542,7 @@ class DenseCoeffsBase * * \sa outerStride(), rowStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().innerStride(); @@ -662,6 +553,7 @@ class DenseCoeffsBase * * \sa innerStride(), rowStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().outerStride(); @@ -677,6 +569,7 @@ class DenseCoeffsBase * * \sa innerStride(), outerStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index rowStride() const { return Derived::IsRowMajor ? outerStride() : innerStride(); @@ -686,6 +579,7 @@ class DenseCoeffsBase * * \sa innerStride(), outerStride(), rowStride() */ + EIGEN_DEVICE_FUNC inline Index colStride() const { return Derived::IsRowMajor ? innerStride() : outerStride(); @@ -694,33 +588,42 @@ class DenseCoeffsBase namespace internal { -template +template struct first_aligned_impl { - static inline typename Derived::Index run(const Derived&) + static inline Index run(const Derived&) { return 0; } }; -template -struct first_aligned_impl +template +struct first_aligned_impl { - static inline typename Derived::Index run(const Derived& m) + static inline Index run(const Derived& m) { - return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size()); + return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size()); } }; -/** \internal \returns the index of the first element of the array that is well aligned for vectorization. +/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization. + * + * \tparam Alignment requested alignment in Bytes. * * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more * documentation. */ +template +static inline Index first_aligned(const DenseBase& m) +{ + enum { ReturnZero = (int(evaluator::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) }; + return first_aligned_impl::run(m.derived()); +} + template -static inline typename Derived::Index first_aligned(const Derived& m) +static inline Index first_default_aligned(const DenseBase& m) { - return first_aligned_impl - - ::run(m); + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type DefaultPacketType; + return internal::first_aligned::alignment),Derived>(m); } template::ret> diff --git a/nuparu/include/Eigen/src/Core/DenseStorage.h b/nuparu/include/Eigen/src/Core/DenseStorage.h index 3e7f9c1b..34048461 100644 --- a/nuparu/include/Eigen/src/Core/DenseStorage.h +++ b/nuparu/include/Eigen/src/Core/DenseStorage.h @@ -3,7 +3,7 @@ // // Copyright (C) 2008 Gael Guennebaud // Copyright (C) 2006-2009 Benoit Jacob -// Copyright (C) 2010 Hauke Heibel +// Copyright (C) 2010-2013 Hauke Heibel // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -24,26 +24,37 @@ namespace internal { struct constructor_without_unaligned_array_assert {}; +template +EIGEN_DEVICE_FUNC +void check_static_allocation_size() +{ + // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit + #if EIGEN_STACK_ALLOCATION_LIMIT + EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + #endif +} + /** \internal * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: * to 16 bytes boundary if the total size is a multiple of 16 bytes. */ template + : compute_default_alignment::value > struct plain_array { T array[Size]; - plain_array() + EIGEN_DEVICE_FUNC + plain_array() { - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + check_static_allocation_size(); } - plain_array(constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) { - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + check_static_allocation_size(); } }; @@ -56,41 +67,100 @@ struct plain_array template EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; } #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((reinterpret_cast(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \ + eigen_assert((reinterpret_cast(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \ && "this assertion is explained here: " \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ " **** READ THIS WEB PAGE !!! ****"); #else #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((reinterpret_cast(array) & sizemask) == 0 \ + eigen_assert((reinterpret_cast(array) & (sizemask)) == 0 \ && "this assertion is explained here: " \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ " **** READ THIS WEB PAGE !!! ****"); #endif +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + template struct plain_array { - EIGEN_USER_ALIGN16 T array[Size]; + EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; + EIGEN_DEVICE_FUNC plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf); - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); + check_static_allocation_size(); } + EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) { - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= 128 * 128 * 8, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + check_static_allocation_size(); } }; template struct plain_array { - EIGEN_USER_ALIGN16 T array[1]; - plain_array() {} - plain_array(constructor_without_unaligned_array_assert) {} + T array[1]; + EIGEN_DEVICE_FUNC plain_array() {} + EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {} }; } // end namespace internal @@ -114,33 +184,50 @@ template class DenseSt { internal::plain_array m_data; public: - inline DenseStorage() {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()) {} - inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } - static inline DenseIndex rows(void) {return _Rows;} - static inline DenseIndex cols(void) {return _Cols;} - inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {} - inline void resize(DenseIndex,DenseIndex,DenseIndex) {} - inline const T *data() const { return m_data.array; } - inline T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC + DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} + EIGEN_DEVICE_FUNC + DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) m_data = other.m_data; + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols); + EIGEN_UNUSED_VARIABLE(size); + EIGEN_UNUSED_VARIABLE(rows); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // null matrix template class DenseStorage { public: - inline DenseStorage() {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) {} - inline DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} - inline void swap(DenseStorage& ) {} - static inline DenseIndex rows(void) {return _Rows;} - static inline DenseIndex cols(void) {return _Cols;} - inline void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {} - inline void resize(DenseIndex,DenseIndex,DenseIndex) {} - inline const T *data() const { return 0; } - inline T *data() { return 0; } + EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; } + EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {} + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return 0; } + EIGEN_DEVICE_FUNC T *data() { return 0; } }; // more specializations for null matrices; these are necessary to resolve ambiguities @@ -157,86 +244,157 @@ template class DenseStorage class DenseStorage { internal::plain_array m_data; - DenseIndex m_rows; - DenseIndex m_cols; + Index m_rows; + Index m_cols; public: - inline DenseStorage() : m_rows(0), m_cols(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} - inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {} - inline void swap(DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + m_cols = other.m_cols; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } - inline DenseIndex rows() const {return m_rows;} - inline DenseIndex cols() const {return m_cols;} - inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; } - inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; } - inline const T *data() const { return m_data.array; } - inline T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC Index rows() const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols() const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // dynamic-size matrix with fixed-size storage and fixed width template class DenseStorage { internal::plain_array m_data; - DenseIndex m_rows; + Index m_rows; public: - inline DenseStorage() : m_rows(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} - inline DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {} - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } - inline DenseIndex rows(void) const {return m_rows;} - inline DenseIndex cols(void) const {return _Cols;} - inline void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; } - inline void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; } - inline const T *data() const { return m_data.array; } - inline T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // dynamic-size matrix with fixed-size storage and fixed height template class DenseStorage { internal::plain_array m_data; - DenseIndex m_cols; + Index m_cols; public: - inline DenseStorage() : m_cols(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} - inline DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {} - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } - inline DenseIndex rows(void) const {return _Rows;} - inline DenseIndex cols(void) const {return m_cols;} - inline void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; } - inline void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; } - inline const T *data() const { return m_data.array; } - inline T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_cols = other.m_cols; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index, Index, Index cols) { m_cols = cols; } + void resize(Index, Index, Index cols) { m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // purely dynamic matrix. template class DenseStorage { T *m_data; - DenseIndex m_rows; - DenseIndex m_cols; + Index m_rows; + Index m_cols; public: - inline DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0), m_cols(0) {} - inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) - : m_data(internal::conditional_aligned_new_auto(size)), m_rows(nbRows), m_cols(nbCols) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } - inline ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } - inline void swap(DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) + : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*other.m_cols)) + , m_rows(other.m_rows) + , m_cols(other.m_cols) + { + internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + other.m_rows = 0; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + swap(m_cols, other.m_cols); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } - inline DenseIndex rows(void) const {return m_rows;} - inline DenseIndex cols(void) const {return m_cols;} - inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index size, Index rows, Index cols) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*m_cols); - m_rows = nbRows; - m_cols = nbCols; + m_rows = rows; + m_cols = cols; } - void resize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols) { if(size != m_rows*m_cols) { @@ -247,33 +405,70 @@ template class DenseStorage class DenseStorage { T *m_data; - DenseIndex m_cols; + Index m_cols; public: - inline DenseStorage() : m_data(0), m_cols(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} - inline DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(nbCols) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } - inline ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } - static inline DenseIndex rows(void) {return _Rows;} - inline DenseIndex cols(void) const {return m_cols;} - inline void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0); + EIGEN_UNUSED_VARIABLE(rows); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(_Rows*other.m_cols)) + , m_cols(other.m_cols) + { + internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_cols, other.m_cols); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, _Rows*m_cols); - m_cols = nbCols; + m_cols = cols; } - EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex nbCols) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) { if(size != _Rows*m_cols) { @@ -284,32 +479,69 @@ template class DenseStorage class DenseStorage { T *m_data; - DenseIndex m_rows; + Index m_rows; public: - inline DenseStorage() : m_data(0), m_rows(0) {} - inline DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} - inline DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(nbRows) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } - inline ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } - inline void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } - inline DenseIndex rows(void) const {return m_rows;} - static inline DenseIndex cols(void) {return _Cols;} - inline void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*_Cols)) + , m_rows(other.m_rows) + { + internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + { + other.m_data = nullptr; + other.m_rows = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + void conservativeResize(Index size, Index rows, Index) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*_Cols); - m_rows = nbRows; + m_rows = rows; } - EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex nbRows, DenseIndex) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) { if(size != m_rows*_Cols) { @@ -320,10 +552,10 @@ template class DenseStorage struct traits > : traits { - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; typedef typename MatrixType::StorageKind StorageKind; enum { @@ -52,8 +52,7 @@ struct traits > MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), MaxColsAtCompileTime = 1, MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, - Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions MatrixTypeOuterStride = outer_stride_at_compile_time::ret, InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, OuterStrideAtCompileTime = 0 @@ -70,20 +69,28 @@ template class Diagonal typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) - inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + EIGEN_DEVICE_FUNC + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) + EIGEN_DEVICE_FUNC inline Index rows() const - { return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); } + { + return m_index.value()<0 ? numext::mini(m_matrix.cols(),m_matrix.rows()+m_index.value()) + : numext::mini(m_matrix.rows(),m_matrix.cols()-m_index.value()); + } + EIGEN_DEVICE_FUNC inline Index cols() const { return 1; } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.outerStride() + 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return 0; @@ -95,48 +102,58 @@ template class Diagonal const Scalar >::type ScalarWithConstIfNotLvalue; + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index) const { return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index) const { return m_matrix.coeff(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index idx) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index idx) const { return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index idx) const { return m_matrix.coeff(idx+rowOffset(), idx+colOffset()); } - const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC + inline const typename internal::remove_all::type& nestedExpression() const { return m_matrix; } - int index() const + EIGEN_DEVICE_FUNC + inline Index index() const { return m_index.value(); } @@ -147,10 +164,13 @@ template class Diagonal private: // some compilers may fail to optimize std::max etc in case of compile-time constants... + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } - // triger a compile time error is someone try to call packet + // trigger a compile-time error if someone try to call packet template typename MatrixType::PacketReturnType packet(Index) const; template typename MatrixType::PacketReturnType packet(Index,Index) const; }; @@ -167,7 +187,7 @@ template inline typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { - return derived(); + return DiagonalReturnType(derived()); } /** This is the const version of diagonal(). */ @@ -190,18 +210,18 @@ MatrixBase::diagonal() const * * \sa MatrixBase::diagonal(), class Diagonal */ template -inline typename MatrixBase::template DiagonalIndexReturnType::Type +inline typename MatrixBase::DiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) { - return typename DiagonalIndexReturnType::Type(derived(), index); + return DiagonalDynamicIndexReturnType(derived(), index); } /** This is the const version of diagonal(Index). */ template -inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType MatrixBase::diagonal(Index index) const { - return typename ConstDiagonalIndexReturnType::Type(derived(), index); + return ConstDiagonalDynamicIndexReturnType(derived(), index); } /** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this @@ -216,20 +236,20 @@ MatrixBase::diagonal(Index index) const * * \sa MatrixBase::diagonal(), class Diagonal */ template -template -inline typename MatrixBase::template DiagonalIndexReturnType::Type +template +inline typename MatrixBase::template DiagonalIndexReturnType::Type MatrixBase::diagonal() { - return derived(); + return typename DiagonalIndexReturnType::Type(derived()); } /** This is the const version of diagonal(). */ template -template -inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +template +inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type MatrixBase::diagonal() const { - return derived(); + return typename ConstDiagonalIndexReturnType::Type(derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/DiagonalMatrix.h b/nuparu/include/Eigen/src/Core/DiagonalMatrix.h index e6c220f4..5a9e3abd 100644 --- a/nuparu/include/Eigen/src/Core/DiagonalMatrix.h +++ b/nuparu/include/Eigen/src/Core/DiagonalMatrix.h @@ -22,7 +22,7 @@ class DiagonalBase : public EigenBase typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::RealScalar RealScalar; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, @@ -30,79 +30,62 @@ class DiagonalBase : public EigenBase MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, IsVectorAtCompileTime = 0, - Flags = 0 + Flags = NoPreferredStorageOrderBit }; typedef Matrix DenseMatrixType; typedef DenseMatrixType DenseType; typedef DiagonalMatrix PlainObject; + EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - template - void evalTo(MatrixBase &other) const; - template - void addTo(MatrixBase &other) const - { other.diagonal() += diagonal(); } - template - void subTo(MatrixBase &other) const - { other.diagonal() -= diagonal(); } - + + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } + EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return derived().diagonal(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return diagonal().size(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return diagonal().size(); } - /** \returns the diagonal matrix product of \c *this by the matrix \a matrix. - */ template - const DiagonalProduct + EIGEN_DEVICE_FUNC + const Product operator*(const MatrixBase &matrix) const { - return DiagonalProduct(matrix.derived(), derived()); + return Product(derived(),matrix.derived()); } - inline const DiagonalWrapper, const DiagonalVectorType> > + typedef DiagonalWrapper, const DiagonalVectorType> > InverseReturnType; + EIGEN_DEVICE_FUNC + inline const InverseReturnType inverse() const { - return diagonal().cwiseInverse(); + return InverseReturnType(diagonal().cwiseInverse()); } - inline const DiagonalWrapper, const DiagonalVectorType> > + typedef DiagonalWrapper, const DiagonalVectorType> > ScalarMultipleReturnType; + EIGEN_DEVICE_FUNC + inline const ScalarMultipleReturnType operator*(const Scalar& scalar) const { - return diagonal() * scalar; + return ScalarMultipleReturnType(diagonal() * scalar); } - friend inline const DiagonalWrapper, const DiagonalVectorType> > + EIGEN_DEVICE_FUNC + friend inline const ScalarMultipleReturnType operator*(const Scalar& scalar, const DiagonalBase& other) { - return other.diagonal() * scalar; - } - - #ifdef EIGEN2_SUPPORT - template - bool isApprox(const DiagonalBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const - { - return diagonal().isApprox(other.diagonal(), precision); + return ScalarMultipleReturnType(other.diagonal() * scalar); } - template - bool isApprox(const MatrixBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const - { - return toDenseMatrix().isApprox(other, precision); - } - #endif }; -template -template -void DiagonalBase::evalTo(MatrixBase &other) const -{ - other.setZero(); - other.diagonal() = diagonal(); -} #endif /** \class DiagonalMatrix @@ -124,10 +107,9 @@ struct traits > : traits > { typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; - typedef Dense StorageKind; - typedef DenseIndex Index; + typedef DiagonalShape StorageKind; enum { - Flags = LvalueBit + Flags = LvalueBit | NoPreferredStorageOrderBit }; }; } @@ -141,7 +123,7 @@ class DiagonalMatrix typedef const DiagonalMatrix& Nested; typedef _Scalar Scalar; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; #endif protected: @@ -151,24 +133,31 @@ class DiagonalMatrix public: /** const version of diagonal(). */ + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return m_diagonal; } /** \returns a reference to the stored vector of diagonal coefficients. */ + EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return m_diagonal; } /** Default constructor without initialization */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix() {} /** Constructs a diagonal matrix with given dimension */ - inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + EIGEN_DEVICE_FUNC + explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} /** 2D constructor. */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {} /** 3D constructor. */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {} /** Copy constructor. */ template + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const DiagonalBase& other) : m_diagonal(other.diagonal()) {} #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -178,11 +167,13 @@ class DiagonalMatrix /** generic constructor from expression of the diagonal coefficients */ template + EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(const MatrixBase& other) : m_diagonal(other) {} /** Copy operator. */ template + EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalBase& other) { m_diagonal = other.diagonal(); @@ -193,6 +184,7 @@ class DiagonalMatrix /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalMatrix& other) { m_diagonal = other.diagonal(); @@ -201,14 +193,19 @@ class DiagonalMatrix #endif /** Resizes to given size. */ + EIGEN_DEVICE_FUNC inline void resize(Index size) { m_diagonal.resize(size); } /** Sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero() { m_diagonal.setZero(); } /** Resizes and sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero(Index size) { m_diagonal.setZero(size); } /** Sets this matrix to be the identity matrix of the current size. */ + EIGEN_DEVICE_FUNC inline void setIdentity() { m_diagonal.setOnes(); } /** Sets this matrix to be the identity matrix of the given size. */ + EIGEN_DEVICE_FUNC inline void setIdentity(Index size) { m_diagonal.setOnes(size); } }; @@ -232,14 +229,15 @@ struct traits > { typedef _DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; - typedef typename DiagonalVectorType::Index Index; - typedef typename DiagonalVectorType::StorageKind StorageKind; + typedef typename DiagonalVectorType::StorageIndex StorageIndex; + typedef DiagonalShape StorageKind; + typedef typename traits::XprKind XprKind; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - Flags = traits::Flags & LvalueBit + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + Flags = (traits::Flags & LvalueBit) | NoPreferredStorageOrderBit }; }; } @@ -255,9 +253,11 @@ class DiagonalWrapper #endif /** Constructor from expression of diagonal coefficients to wrap. */ - inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + EIGEN_DEVICE_FUNC + explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} /** \returns a const reference to the wrapped expression of diagonal coefficients. */ + EIGEN_DEVICE_FUNC const DiagonalVectorType& diagonal() const { return m_diagonal; } protected: @@ -277,7 +277,7 @@ template inline const DiagonalWrapper MatrixBase::asDiagonal() const { - return derived(); + return DiagonalWrapper(derived()); } /** \returns true if *this is approximately equal to a diagonal matrix, @@ -308,6 +308,33 @@ bool MatrixBase::isDiagonal(const RealScalar& prec) const return true; } +namespace internal { + +template<> struct storage_kind_to_shape { typedef DiagonalShape Shape; }; + +struct Diagonal2Dense {}; + +template<> struct AssignmentKind { typedef Diagonal2Dense Kind; }; + +// Diagonal matrix to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + dst.setZero(); + dst.diagonal() = src.diagonal(); + } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { dst.diagonal() += src.diagonal(); } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { dst.diagonal() -= src.diagonal(); } +}; + +} // namespace internal + } // end namespace Eigen #endif // EIGEN_DIAGONALMATRIX_H diff --git a/nuparu/include/Eigen/src/Core/DiagonalProduct.h b/nuparu/include/Eigen/src/Core/DiagonalProduct.h index c03a0c2e..d372b938 100644 --- a/nuparu/include/Eigen/src/Core/DiagonalProduct.h +++ b/nuparu/include/Eigen/src/Core/DiagonalProduct.h @@ -13,116 +13,14 @@ namespace Eigen { -namespace internal { -template -struct traits > - : traits -{ - typedef typename scalar_product_traits::ReturnType Scalar; - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, - _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) - ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), - _SameTypes = is_same::value, - // FIXME currently we need same types, but in the future the next rule should be the one - //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, - - Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit,//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), - CoeffReadCost = NumTraits::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost - }; -}; -} - -template -class DiagonalProduct : internal::no_assignment_operator, - public MatrixBase > -{ - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct) - - inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal) - : m_matrix(matrix), m_diagonal(diagonal) - { - eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols())); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } - - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const - { - return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const - { - enum { - StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor - }; - const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col; - return packet_impl(row,col,indexInDiagonalVector,typename internal::conditional< - ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) - ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type()); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - protected: - template - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const - { - return internal::pmul(m_matrix.template packet(row, col), - internal::pset1(m_diagonal.diagonal().coeff(id))); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const - { - enum { - InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) - }; - return internal::pmul(m_matrix.template packet(row, col), - m_diagonal.diagonal().template packet(id)); - } - - typename MatrixType::Nested m_matrix; - typename DiagonalType::Nested m_diagonal; -}; - /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. */ template template -inline const DiagonalProduct +inline const Product MatrixBase::operator*(const DiagonalBase &a_diagonal) const { - return DiagonalProduct(derived(), a_diagonal.derived()); + return Product(derived(),a_diagonal.derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/Dot.h b/nuparu/include/Eigen/src/Core/Dot.h index 9d7651f1..003450f1 100644 --- a/nuparu/include/Eigen/src/Core/Dot.h +++ b/nuparu/include/Eigen/src/Core/Dot.h @@ -29,6 +29,7 @@ template::Scalar,typename traits::Scalar>::ReturnType ResScalar; + EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { return a.template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); @@ -39,6 +40,7 @@ template struct dot_nocheck { typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { return a.transpose().template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); @@ -59,6 +61,7 @@ struct dot_nocheck */ template template +EIGEN_DEVICE_FUNC typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { @@ -73,34 +76,6 @@ MatrixBase::dot(const MatrixBase& other) const return internal::dot_nocheck::run(*this, other); } -#ifdef EIGEN2_SUPPORT -/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable - * (conjugating the second variable). Of course this only makes a difference in the complex case. - * - * This method is only available in EIGEN2_SUPPORT mode. - * - * \only_for_vectors - * - * \sa dot() - */ -template -template -typename internal::traits::Scalar -MatrixBase::eigen2_dot(const MatrixBase& other) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - eigen_assert(size() == other.size()); - - return internal::dot_nocheck::run(other,*this); -} -#endif - - //---------- implementation of L2 norm and related functions ---------- /** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm. @@ -124,7 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala template inline typename NumTraits::Scalar>::Real MatrixBase::norm() const { - using std::sqrt; + EIGEN_USING_STD_MATH(sqrt) return sqrt(squaredNorm()); } @@ -138,8 +113,7 @@ template inline const typename MatrixBase::PlainObject MatrixBase::normalized() const { - typedef typename internal::nested::type Nested; - typedef typename internal::remove_reference::type _Nested; + typedef typename internal::nested_eval::type _Nested; _Nested n(derived()); return n / n.norm(); } @@ -164,9 +138,10 @@ template struct lpNorm_selector { typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const MatrixBase& m) { - using std::pow; + EIGEN_USING_STD_MATH(pow) return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p); } }; @@ -174,6 +149,7 @@ struct lpNorm_selector template struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) { return m.cwiseAbs().sum(); @@ -183,6 +159,7 @@ struct lpNorm_selector template struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) { return m.norm(); @@ -192,6 +169,7 @@ struct lpNorm_selector template struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) { return m.cwiseAbs().maxCoeff(); @@ -200,9 +178,11 @@ struct lpNorm_selector } // end namespace internal -/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values - * of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ - * norm, that is the maximum of the absolute values of the coefficients of *this. +/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values + * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ + * norm, that is the maximum of the absolute values of the coefficients of \c *this. + * + * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. * * \sa norm() */ @@ -227,8 +207,8 @@ template bool MatrixBase::isOrthogonal (const MatrixBase& other, const RealScalar& prec) const { - typename internal::nested::type nested(derived()); - typename internal::nested::type otherNested(other.derived()); + typename internal::nested_eval::type nested(derived()); + typename internal::nested_eval::type otherNested(other.derived()); return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); } @@ -246,13 +226,13 @@ bool MatrixBase::isOrthogonal template bool MatrixBase::isUnitary(const RealScalar& prec) const { - typename Derived::Nested nested(derived()); + typename internal::nested_eval::type self(derived()); for(Index i = 0; i < cols(); ++i) { - if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast(1), prec)) + if(!internal::isApprox(self.col(i).squaredNorm(), static_cast(1), prec)) return false; for(Index j = 0; j < i; ++j) - if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast(1), prec)) return false; } return true; diff --git a/nuparu/include/Eigen/src/Core/EigenBase.h b/nuparu/include/Eigen/src/Core/EigenBase.h index 2b8dd1b7..79dabda3 100644 --- a/nuparu/include/Eigen/src/Core/EigenBase.h +++ b/nuparu/include/Eigen/src/Core/EigenBase.h @@ -13,7 +13,9 @@ namespace Eigen { -/** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). +/** \class EigenBase + * + * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * * In other words, an EigenBase object is an object that can be copied into a MatrixBase. * @@ -26,34 +28,52 @@ namespace Eigen { template struct EigenBase { // typedef typename internal::plain_matrix_type::type PlainObject; - + + /** \brief The interface type of indices + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead. + * \sa StorageIndex, \ref TopicPreprocessorDirectives. + */ + typedef Eigen::Index Index; + + // FIXME is it needed? typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; /** \returns a reference to the derived object */ + EIGEN_DEVICE_FUNC Derived& derived() { return *static_cast(this); } /** \returns a const reference to the derived object */ + EIGEN_DEVICE_FUNC const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } + EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast(this); } /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ + EIGEN_DEVICE_FUNC inline Index rows() const { return derived().rows(); } /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ + EIGEN_DEVICE_FUNC inline Index cols() const { return derived().cols(); } /** \returns the number of coefficients, which is rows()*cols(). * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC inline Index size() const { return rows() * cols(); } /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ - template inline void evalTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const { derived().evalTo(dst); } /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */ - template inline void addTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void addTo(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -63,7 +83,9 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */ - template inline void subTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void subTo(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -73,7 +95,8 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */ - template inline void applyThisOnTheRight(Dest& dst) const + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -81,7 +104,8 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */ - template inline void applyThisOnTheLeft(Dest& dst) const + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -106,7 +130,7 @@ template template Derived& DenseBase::operator=(const EigenBase &other) { - other.derived().evalTo(derived()); + call_assignment(derived(), other.derived()); return derived(); } @@ -114,7 +138,7 @@ template template Derived& DenseBase::operator+=(const EigenBase &other) { - other.derived().addTo(derived()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -122,40 +146,10 @@ template template Derived& DenseBase::operator-=(const EigenBase &other) { - other.derived().subTo(derived()); - return derived(); -} - -/** replaces \c *this by \c *this * \a other. - * - * \returns a reference to \c *this - */ -template -template -inline Derived& -MatrixBase::operator*=(const EigenBase &other) -{ - other.derived().applyThisOnTheRight(derived()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } -/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=(). - */ -template -template -inline void MatrixBase::applyOnTheRight(const EigenBase &other) -{ - other.derived().applyThisOnTheRight(derived()); -} - -/** replaces \c *this by \c *this * \a other. */ -template -template -inline void MatrixBase::applyOnTheLeft(const EigenBase &other) -{ - other.derived().applyThisOnTheLeft(derived()); -} - } // end namespace Eigen #endif // EIGEN_EIGENBASE_H diff --git a/nuparu/include/Eigen/src/Core/Flagged.h b/nuparu/include/Eigen/src/Core/Flagged.h deleted file mode 100644 index 1f2955fc..00000000 --- a/nuparu/include/Eigen/src/Core/Flagged.h +++ /dev/null @@ -1,140 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FLAGGED_H -#define EIGEN_FLAGGED_H - -namespace Eigen { - -/** \class Flagged - * \ingroup Core_Module - * - * \brief Expression with modified flags - * - * \param ExpressionType the type of the object of which we are modifying the flags - * \param Added the flags added to the expression - * \param Removed the flags removed from the expression (has priority over Added). - * - * This class represents an expression whose flags have been modified. - * It is the return type of MatrixBase::flagged() - * and most of the time this is the only way it is used. - * - * \sa MatrixBase::flagged() - */ - -namespace internal { -template -struct traits > : traits -{ - enum { Flags = (ExpressionType::Flags | Added) & ~Removed }; -}; -} - -template class Flagged - : public MatrixBase > -{ - public: - - typedef MatrixBase Base; - - EIGEN_DENSE_PUBLIC_INTERFACE(Flagged) - typedef typename internal::conditional::ret, - ExpressionType, const ExpressionType&>::type ExpressionTypeNested; - typedef typename ExpressionType::InnerIterator InnerIterator; - - inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - inline Index outerStride() const { return m_matrix.outerStride(); } - inline Index innerStride() const { return m_matrix.innerStride(); } - - inline CoeffReturnType coeff(Index row, Index col) const - { - return m_matrix.coeff(row, col); - } - - inline CoeffReturnType coeff(Index index) const - { - return m_matrix.coeff(index); - } - - inline const Scalar& coeffRef(Index row, Index col) const - { - return m_matrix.const_cast_derived().coeffRef(row, col); - } - - inline const Scalar& coeffRef(Index index) const - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(row, col); - } - - inline Scalar& coeffRef(Index index) - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - template - inline const PacketScalar packet(Index row, Index col) const - { - return m_matrix.template packet(row, col); - } - - template - inline void writePacket(Index row, Index col, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(row, col, x); - } - - template - inline const PacketScalar packet(Index index) const - { - return m_matrix.template packet(index); - } - - template - inline void writePacket(Index index, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(index, x); - } - - const ExpressionType& _expression() const { return m_matrix; } - - template - typename ExpressionType::PlainObject solveTriangular(const MatrixBase& other) const; - - template - void solveTriangularInPlace(const MatrixBase& other) const; - - protected: - ExpressionTypeNested m_matrix; -}; - -/** \returns an expression of *this with added and removed flags - * - * This is mostly for internal use. - * - * \sa class Flagged - */ -template -template -inline const Flagged -DenseBase::flagged() const -{ - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_FLAGGED_H diff --git a/nuparu/include/Eigen/src/Core/ForceAlignedAccess.h b/nuparu/include/Eigen/src/Core/ForceAlignedAccess.h index 807c7a29..7b08b45e 100644 --- a/nuparu/include/Eigen/src/Core/ForceAlignedAccess.h +++ b/nuparu/include/Eigen/src/Core/ForceAlignedAccess.h @@ -39,29 +39,29 @@ template class ForceAlignedAccess typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) - inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -90,7 +90,7 @@ template class ForceAlignedAccess m_expression.const_cast_derived().template writePacket(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType& m_expression; @@ -127,7 +127,7 @@ template inline typename internal::add_const_on_value_type,Derived&>::type>::type MatrixBase::forceAlignedAccessIf() const { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } /** \returns an expression of *this with forced aligned access if \a Enable is true. @@ -138,7 +138,7 @@ template inline typename internal::conditional,Derived&>::type MatrixBase::forceAlignedAccessIf() { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/Functors.h b/nuparu/include/Eigen/src/Core/Functors.h deleted file mode 100644 index 04fb2173..00000000 --- a/nuparu/include/Eigen/src/Core/Functors.h +++ /dev/null @@ -1,985 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FUNCTORS_H -#define EIGEN_FUNCTORS_H - -namespace Eigen { - -namespace internal { - -// associative functors: - -/** \internal - * \brief Template functor to compute the sum of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum() - */ -template struct scalar_sum_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::padd(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd - }; -}; - -/** \internal - * \brief Template functor to compute the product of two scalars - * - * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() - */ -template struct scalar_product_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul - }; - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmul(a,b); } - template - EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const - { return internal::predux_mul(a); } -}; -template -struct functor_traits > { - enum { - Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! - PacketAccess = scalar_product_op::Vectorizable - }; -}; - -/** \internal - * \brief Template functor to compute the conjugate product of two scalars - * - * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) - */ -template struct scalar_conj_product_op { - - enum { - Conj = NumTraits::IsComplex - }; - - typedef typename scalar_product_traits::ReturnType result_type; - - EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const - { return conj_helper().pmul(a,b); } - - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return conj_helper().pmul(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::MulCost, - PacketAccess = internal::is_same::value && packet_traits::HasMul - }; -}; - -/** \internal - * \brief Template functor to compute the min of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() - */ -template struct scalar_min_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmin(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux_min(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMin - }; -}; - -/** \internal - * \brief Template functor to compute the max of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() - */ -template struct scalar_max_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmax(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux_max(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMax - }; -}; - -/** \internal - * \brief Template functor to compute the hypot of two scalars - * - * \sa MatrixBase::stableNorm(), class Redux - */ -template struct scalar_hypot_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) -// typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const - { - using std::max; - using std::min; - using std::sqrt; - Scalar p = (max)(_x, _y); - Scalar q = (min)(_x, _y); - Scalar qp = q/p; - return p * sqrt(Scalar(1) + qp*qp); - } -}; -template -struct functor_traits > { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess=0 }; -}; - -/** \internal - * \brief Template functor to compute the pow of two scalars - */ -template struct scalar_binary_pow_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) - inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } -}; -template -struct functor_traits > { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; -}; - -// other binary functors: - -/** \internal - * \brief Template functor to compute the difference of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::operator- - */ -template struct scalar_difference_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::psub(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasSub - }; -}; - -/** \internal - * \brief Template functor to compute the quotient of two scalars - * - * \sa class CwiseBinaryOp, Cwise::operator/() - */ -template struct scalar_quotient_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv - }; - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pdiv(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = (NumTraits::MulCost + NumTraits::MulCost), // rough estimate! - PacketAccess = scalar_quotient_op::Vectorizable - }; -}; - - - -/** \internal - * \brief Template functor to compute the and of two booleans - * - * \sa class CwiseBinaryOp, ArrayBase::operator&& - */ -struct scalar_boolean_and_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op) - EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; } -}; -template<> struct functor_traits { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -/** \internal - * \brief Template functor to compute the or of two booleans - * - * \sa class CwiseBinaryOp, ArrayBase::operator|| - */ -struct scalar_boolean_or_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op) - EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; } -}; -template<> struct functor_traits { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -// unary functors: - -/** \internal - * \brief Template functor to compute the opposite of a scalar - * - * \sa class CwiseUnaryOp, MatrixBase::operator- - */ -template struct scalar_opposite_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pnegate(a); } -}; -template -struct functor_traits > -{ enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasNegate }; -}; - -/** \internal - * \brief Template functor to compute the absolute value of a scalar - * - * \sa class CwiseUnaryOp, Cwise::abs - */ -template struct scalar_abs_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pabs(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAbs - }; -}; - -/** \internal - * \brief Template functor to compute the squared absolute value of a scalar - * - * \sa class CwiseUnaryOp, Cwise::abs2 - */ -template struct scalar_abs2_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a,a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasAbs2 }; }; - -/** \internal - * \brief Template functor to compute the conjugate of a complex value - * - * \sa class CwiseUnaryOp, MatrixBase::conjugate() - */ -template struct scalar_conjugate_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = NumTraits::IsComplex ? NumTraits::AddCost : 0, - PacketAccess = packet_traits::HasConj - }; -}; - -/** \internal - * \brief Template functor to cast a scalar to another type - * - * \sa class CwiseUnaryOp, MatrixBase::cast() - */ -template -struct scalar_cast_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) - typedef NewType result_type; - EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast(a); } -}; -template -struct functor_traits > -{ enum { Cost = is_same::value ? 0 : NumTraits::AddCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the real part of a complex - * - * \sa class CwiseUnaryOp, MatrixBase::real() - */ -template -struct scalar_real_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the imaginary part of a complex - * - * \sa class CwiseUnaryOp, MatrixBase::imag() - */ -template -struct scalar_imag_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the real part of a complex as a reference - * - * \sa class CwiseUnaryOp, MatrixBase::real() - */ -template -struct scalar_real_ref_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast(&a)); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the imaginary part of a complex as a reference - * - * \sa class CwiseUnaryOp, MatrixBase::imag() - */ -template -struct scalar_imag_ref_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast(&a)); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * - * \brief Template functor to compute the exponential of a scalar - * - * \sa class CwiseUnaryOp, Cwise::exp() - */ -template struct scalar_exp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) - inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasExp }; }; - -/** \internal - * - * \brief Template functor to compute the logarithm of a scalar - * - * \sa class CwiseUnaryOp, Cwise::log() - */ -template struct scalar_log_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) - inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::plog(a); } -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasLog }; }; - -/** \internal - * \brief Template functor to multiply a scalar by a fixed other one - * - * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ - */ -/* NOTE why doing the pset1() in packetOp *is* an optimization ? - * indeed it seems better to declare m_other as a Packet and do the pset1() once - * in the constructor. However, in practice: - * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() outside the loop :) - * - simpler code ;) - * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) - */ -template -struct scalar_multiple_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -template -struct scalar_multiple2_op { - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } - EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to divide a scalar by a fixed other one - * - * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not necessarily a floating point type. - * - * \sa class CwiseUnaryOp, MatrixBase::operator/ - */ -template -struct scalar_quotient1_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pdiv(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - -// nullary functors - -template -struct scalar_constant_op { - typedef typename packet_traits::type Packet; - EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { } - template - EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } - template - EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1(m_other); } - const Scalar m_other; -}; -template -struct functor_traits > -// FIXME replace this packet test by a safe one -{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; - -template struct scalar_identity_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) - template - EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false, IsRepeatable = true }; }; - -template struct linspaced_op_impl; - -// linear access for packet ops: -// 1) initialization -// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0]) -// 2) each step (where size is 1 for coeff access or PacketSize for packet access) -// base += [size*step, ..., size*step] -// -// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp) -// in order to avoid the padd() in operator() ? -template -struct linspaced_op_impl -{ - typedef typename packet_traits::type Packet; - - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_packetStep(pset1(packet_traits::size*step)), - m_base(padd(pset1(low), pmul(pset1(step),plset(-packet_traits::size)))) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const - { - m_base = padd(m_base, pset1(m_step)); - return m_low+Scalar(i)*m_step; - } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } - - const Scalar m_low; - const Scalar m_step; - const Packet m_packetStep; - mutable Packet m_base; -}; - -// random access for packet ops: -// 1) each step -// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) -template -struct linspaced_op_impl -{ - typedef typename packet_traits::type Packet; - - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const - { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(i),m_interPacket))); } - - const Scalar m_low; - const Scalar m_step; - const Packet m_lowPacket; - const Packet m_stepPacket; - const Packet m_interPacket; -}; - -// ----- Linspace functor ---------------------------------------------------------------- - -// Forward declaration (we default to random access which does not really give -// us a speed gain when using packet access but it allows to use the functor in -// nested expressions). -template struct linspaced_op; -template struct functor_traits< linspaced_op > -{ enum { Cost = 1, PacketAccess = packet_traits::HasSetLinear, IsRepeatable = true }; }; -template struct linspaced_op -{ - typedef typename packet_traits::type Packet; - linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl(col + row); - } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl.packetOp(col + row); - } - - // This proxy object handles the actual required temporaries, the different - // implementations (random vs. sequential access) as well as the - // correct piping to size 2/4 packet operations. - const linspaced_op_impl impl; -}; - -// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta -// to indicate whether a functor allows linear access, just always answering 'yes' except for -// scalar_identity_op. -// FIXME move this to functor_traits adding a functor_default -template struct functor_has_linear_access { enum { ret = 1 }; }; -template struct functor_has_linear_access > { enum { ret = 0 }; }; - -// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication -// where the mixing of different types is handled by scalar_product_traits -// In particular, real * complex is allowed. -// FIXME move this to functor_traits adding a functor_default -template struct functor_is_product_like { enum { ret = 0 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; - - -/** \internal - * \brief Template functor to add a scalar to a fixed other one - * \sa class CwiseUnaryOp, Array::operator+ - */ -/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ -template -struct scalar_add_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } - inline scalar_add_op(const Scalar& other) : m_other(other) { } - inline Scalar operator() (const Scalar& a) const { return a + m_other; } - inline const Packet packetOp(const Packet& a) const - { return internal::padd(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to compute the square root of a scalar - * \sa class CwiseUnaryOp, Cwise::sqrt() - */ -template struct scalar_sqrt_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) - inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } -}; -template -struct functor_traits > -{ enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasSqrt - }; -}; - -/** \internal - * \brief Template functor to compute the cosine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::cos() - */ -template struct scalar_cos_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op) - inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasCos - }; -}; - -/** \internal - * \brief Template functor to compute the sine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::sin() - */ -template struct scalar_sin_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op) - inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::psin(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasSin - }; -}; - - -/** \internal - * \brief Template functor to compute the tan of a scalar - * \sa class CwiseUnaryOp, ArrayBase::tan() - */ -template struct scalar_tan_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op) - inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasTan - }; -}; - -/** \internal - * \brief Template functor to compute the arc cosine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::acos() - */ -template struct scalar_acos_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op) - inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasACos - }; -}; - -/** \internal - * \brief Template functor to compute the arc sine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::asin() - */ -template struct scalar_asin_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op) - inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasASin - }; -}; - -/** \internal - * \brief Template functor to raise a scalar to a power - * \sa class CwiseUnaryOp, Cwise::pow - */ -template -struct scalar_pow_op { - // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} - inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } - const Scalar m_exponent; -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to compute the quotient between a scalar and array entries. - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_mult_op { - scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} - inline Scalar operator() (const Scalar& a) const { return m_other / a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(m_other),a); } - Scalar m_other; -}; - -/** \internal - * \brief Template functor to compute the inverse of a scalar - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op) - inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(Scalar(1)),a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - -/** \internal - * \brief Template functor to compute the square of a scalar - * \sa class CwiseUnaryOp, Cwise::square() - */ -template -struct scalar_square_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op) - inline Scalar operator() (const Scalar& a) const { return a*a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pmul(a,a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -/** \internal - * \brief Template functor to compute the cube of a scalar - * \sa class CwiseUnaryOp, Cwise::cube() - */ -template -struct scalar_cube_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op) - inline Scalar operator() (const Scalar& a) const { return a*a*a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pmul(a,pmul(a,a)); } -}; -template -struct functor_traits > -{ enum { Cost = 2*NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -// default functor traits for STL functors: - -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; - -#ifdef EIGEN_STDEXT_SUPPORT - -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost + functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; - -#endif // EIGEN_STDEXT_SUPPORT - -// allow to add new functors and specializations of functor_traits from outside Eigen. -// this macro is really needed because functor_traits must be specialized after it is declared but before it is used... -#ifdef EIGEN_FUNCTORS_PLUGIN -#include EIGEN_FUNCTORS_PLUGIN -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_FUNCTORS_H diff --git a/nuparu/include/Eigen/src/Core/Fuzzy.h b/nuparu/include/Eigen/src/Core/Fuzzy.h index fe63bd29..3e403a09 100644 --- a/nuparu/include/Eigen/src/Core/Fuzzy.h +++ b/nuparu/include/Eigen/src/Core/Fuzzy.h @@ -19,18 +19,19 @@ namespace internal template::IsInteger> struct isApprox_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { - using std::min; - typename internal::nested::type nested(x); - typename internal::nested::type otherNested(y); - return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + typename internal::nested_eval::type nested(x); + typename internal::nested_eval::type otherNested(y); + return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); } }; template struct isApprox_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&) { return x.matrix() == y.matrix(); @@ -40,6 +41,7 @@ struct isApprox_selector template::IsInteger> struct isMuchSmallerThan_object_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum(); @@ -49,6 +51,7 @@ struct isMuchSmallerThan_object_selector template struct isMuchSmallerThan_object_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&) { return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); @@ -58,6 +61,7 @@ struct isMuchSmallerThan_object_selector template::IsInteger> struct isMuchSmallerThan_scalar_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec) { return x.cwiseAbs2().sum() <= numext::abs2(prec * y); @@ -67,6 +71,7 @@ struct isMuchSmallerThan_scalar_selector template struct isMuchSmallerThan_scalar_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&) { return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); diff --git a/nuparu/include/Eigen/src/Core/GeneralProduct.h b/nuparu/include/Eigen/src/Core/GeneralProduct.h index 2a59d946..fe8204ac 100644 --- a/nuparu/include/Eigen/src/Core/GeneralProduct.h +++ b/nuparu/include/Eigen/src/Core/GeneralProduct.h @@ -11,29 +11,7 @@ #ifndef EIGEN_GENERAL_PRODUCT_H #define EIGEN_GENERAL_PRODUCT_H -namespace Eigen { - -/** \class GeneralProduct - * \ingroup Core_Module - * - * \brief Expression of the product of two general matrices or vectors - * - * \param LhsNested the type used to store the left-hand side - * \param RhsNested the type used to store the right-hand side - * \param ProductMode the type of the product - * - * This class represents an expression of the product of two general matrices. - * We call a general matrix, a dense matrix with full storage. For instance, - * This excludes triangular, selfadjoint, and sparse matrices. - * It is the return type of the operator* between general matrices. Its template - * arguments are determined automatically by ProductReturnType. Therefore, - * GeneralProduct should never be used direclty. To determine the result type of a - * function which involves a matrix product, use ProductReturnType::Type. - * - * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase&) - */ -template::value> -class GeneralProduct; +namespace Eigen { enum { Large = 2, @@ -59,15 +37,14 @@ template struct product_type typedef typename remove_all::type _Lhs; typedef typename remove_all::type _Rhs; enum { - MaxRows = _Lhs::MaxRowsAtCompileTime, - Rows = _Lhs::RowsAtCompileTime, - MaxCols = _Rhs::MaxColsAtCompileTime, - Cols = _Rhs::ColsAtCompileTime, - MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime, - _Rhs::MaxRowsAtCompileTime), - Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, - _Rhs::RowsAtCompileTime), - LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, + Rows = traits<_Lhs>::RowsAtCompileTime, + MaxCols = traits<_Rhs>::MaxColsAtCompileTime, + Cols = traits<_Rhs>::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, + traits<_Rhs>::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, + traits<_Rhs>::RowsAtCompileTime) }; // the splitting into different lines of code here, introducing the _select enums and the typedef below, @@ -82,7 +59,8 @@ template struct product_type public: enum { - value = selector::ret + value = selector::ret, + ret = selector::ret }; #ifdef EIGEN_DEBUG_PRODUCT static void debug() @@ -98,6 +76,31 @@ template struct product_type #endif }; +// template struct product_tag +// { +// private: +// +// typedef typename remove_all::type _Lhs; +// typedef typename remove_all::type _Rhs; +// enum { +// Rows = _Lhs::RowsAtCompileTime, +// Cols = _Rhs::ColsAtCompileTime, +// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime) +// }; +// +// enum { +// rows_select = Rows==1 ? int(Rows) : int(Large), +// cols_select = Cols==1 ? int(Cols) : int(Large), +// depth_select = Depth==1 ? int(Depth) : int(Large) +// }; +// typedef product_type_selector selector; +// +// public: +// enum { +// ret = selector::ret +// }; +// +// }; /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. @@ -128,54 +131,6 @@ template<> struct product_type_selector { enum } // end namespace internal -/** \class ProductReturnType - * \ingroup Core_Module - * - * \brief Helper class to get the correct and optimized returned type of operator* - * - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * \param ProductMode the type of the product (determined automatically by internal::product_mode) - * - * This class defines the typename Type representing the optimized product expression - * between two matrix expressions. In practice, using ProductReturnType::Type - * is the recommended way to define the result type of a function returning an expression - * which involve a matrix product. The class Product should never be - * used directly. - * - * \sa class Product, MatrixBase::operator*(const MatrixBase&) - */ -template -struct ProductReturnType -{ - // TODO use the nested type to reduce instanciations ???? -// typedef typename internal::nested::type LhsNested; -// typedef typename internal::nested::type RhsNested; - - typedef GeneralProduct Type; -}; - -template -struct ProductReturnType -{ - typedef typename internal::nested::type >::type LhsNested; - typedef typename internal::nested::type >::type RhsNested; - typedef CoeffBasedProduct Type; -}; - -template -struct ProductReturnType -{ - typedef typename internal::nested::type >::type LhsNested; - typedef typename internal::nested::type >::type RhsNested; - typedef CoeffBasedProduct Type; -}; - -// this is a workaround for sun CC -template -struct LazyProductReturnType : public ProductReturnType -{}; - /*********************************************************************** * Implementation of Inner Vector Vector Product ***********************************************************************/ @@ -187,119 +142,10 @@ struct LazyProductReturnType : public ProductReturnType with: operator=(Scalar x); -namespace internal { - -template -struct traits > - : traits::ReturnType,1,1> > -{}; - -} - -template -class GeneralProduct - : internal::no_assignment_operator, - public Matrix::ReturnType,1,1> -{ - typedef Matrix::ReturnType,1,1> Base; - public: - GeneralProduct(const Lhs& lhs, const Rhs& rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); - } - - /** Convertion to scalar */ - operator const typename Base::Scalar() const { - return Base::coeff(0,0); - } -}; - /*********************************************************************** * Implementation of Outer Vector Vector Product ***********************************************************************/ -namespace internal { - -// Column major -template -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&) -{ - typedef typename Dest::Index Index; - // FIXME make sure lhs is sequentially stored - // FIXME not very good if rhs is real and lhs complex while alpha is real too - const Index cols = dest.cols(); - for (Index j=0; j -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) { - typedef typename Dest::Index Index; - // FIXME make sure rhs is sequentially stored - // FIXME not very good if lhs is real and rhs complex while alpha is real too - const Index rows = dest.rows(); - for (Index i=0; i -struct traits > - : traits, Lhs, Rhs> > -{}; - -} - -template -class GeneralProduct - : public ProductBase, Lhs, Rhs> -{ - template struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; - - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; - struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; - struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; - struct adds { - Scalar m_scale; - adds(const Scalar& s) : m_scale(s) {} - template void operator()(const Dst& dst, const Src& src) const { - dst.const_cast_derived() += m_scale * src; - } - }; - - template - inline void evalTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, set(), IsRowMajor()); - } - - template - inline void addTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, add(), IsRowMajor()); - } - - template - inline void subTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor()); - } - - template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor()); - } -}; - /*********************************************************************** * Implementation of General Matrix Vector Product ***********************************************************************/ @@ -313,60 +159,13 @@ class GeneralProduct */ namespace internal { -template -struct traits > - : traits, Lhs, Rhs> > -{}; - template -struct gemv_selector; +struct gemv_dense_selector; } // end namespace internal -template -class GeneralProduct - : public ProductBase, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - typedef typename Lhs::Scalar LhsScalar; - typedef typename Rhs::Scalar RhsScalar; - - GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs) - { -// EIGEN_STATIC_ASSERT((internal::is_same::value), -// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; - typedef typename internal::conditional::type MatrixType; - - template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const - { - eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); - internal::gemv_selector::HasUsableDirectAccess)>::run(*this, dst, alpha); - } -}; - namespace internal { -// The vector is on the left => transposition -template -struct gemv_selector -{ - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) - { - Transpose destT(dest); - enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_selector - ::run(GeneralProduct,Transpose, GemvProduct> - (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); - } -}; - template struct gemv_static_vector_if; template @@ -384,7 +183,7 @@ struct gemv_static_vector_if template struct gemv_static_vector_if { - #if EIGEN_ALIGN_STATICALLY + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 internal::plain_array m_data; EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } #else @@ -397,33 +196,48 @@ struct gemv_static_vector_if internal::plain_array m_data; EIGEN_STRONG_INLINE Scalar* data() { return ForceAlignment - ? reinterpret_cast((reinterpret_cast(m_data.array) & ~(size_t(15))) + 16) + ? reinterpret_cast((reinterpret_cast(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) : m_data.array; } #endif }; -template<> struct gemv_selector +// The vector is on the left => transposition +template +struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_selector + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_dense_selector { - template - static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef Map, Aligned> MappedDest; - ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); - ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 @@ -435,18 +249,18 @@ template<> struct gemv_selector gemv_static_vector_if static_dest; - bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); - bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; - + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), evalToDest ? dest.data() : static_dest.data()); - + if(!evalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = dest.size(); + Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif if(!alphaIsCompatible) @@ -458,11 +272,13 @@ template<> struct gemv_selector MappedDest(actualDestPtr, dest.size()) = dest; } + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; general_matrix_vector_product - ::run( + ::run( actualLhs.rows(), actualLhs.cols(), - actualLhs.data(), actualLhs.outerStride(), - actualRhs.data(), actualRhs.innerStride(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), actualDestPtr, 1, compatibleAlpha); @@ -476,34 +292,34 @@ template<> struct gemv_selector } }; -template<> struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - - typename add_const::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const::type actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; + + typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if static_rhs; + gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); @@ -511,45 +327,46 @@ template<> struct gemv_selector if(!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = actualRhs.size(); + Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map(actualRhsPtr, actualRhs.size()) = actualRhs; + Map(actualRhsPtr, actualRhs.size()) = actualRhs; } + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; general_matrix_vector_product - ::run( + ::run( actualLhs.rows(), actualLhs.cols(), - actualLhs.data(), actualLhs.outerStride(), - actualRhsPtr, 1, + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhsPtr, 1), dest.data(), dest.innerStride(), actualAlpha); } }; -template<> struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename Dest::Index Index; - // TODO makes sure dest is sequentially stored in memory, otherwise use a temp - const Index size = prod.rhs().rows(); + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); + const Index size = rhs.rows(); for(Index k=0; k struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename Dest::Index Index; - // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp - const Index rows = prod.rows(); + typename nested_eval::type actual_rhs(rhs); + const Index rows = dest.rows(); for(Index i=0; i struct gemv_selector * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ +#ifndef __CUDACC__ + template template -inline const typename ProductReturnType::Type +inline const Product MatrixBase::operator*(const MatrixBase &other) const { // A note regarding the function declaration: In MSVC, this function will sometimes @@ -592,9 +411,12 @@ MatrixBase::operator*(const MatrixBase &other) const #ifdef EIGEN_DEBUG_PRODUCT internal::product_type::debug(); #endif - return typename ProductReturnType::Type(derived(), other.derived()); + + return Product(derived(), other.derived()); } +#endif // __CUDACC__ + /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be @@ -608,7 +430,7 @@ MatrixBase::operator*(const MatrixBase &other) const */ template template -const typename LazyProductReturnType::Type +const Product MatrixBase::lazyProduct(const MatrixBase &other) const { enum { @@ -627,7 +449,7 @@ MatrixBase::lazyProduct(const MatrixBase &other) const INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - return typename LazyProductReturnType::Type(derived(), other.derived()); + return Product(derived(), other.derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/GenericPacketMath.h b/nuparu/include/Eigen/src/Core/GenericPacketMath.h index 5f783ebe..8ad51bad 100644 --- a/nuparu/include/Eigen/src/Core/GenericPacketMath.h +++ b/nuparu/include/Eigen/src/Core/GenericPacketMath.h @@ -42,21 +42,27 @@ namespace internal { struct default_packet_traits { enum { + HasHalfPacket = 0, + HasAdd = 1, HasSub = 1, HasMul = 1, HasNegate = 1, HasAbs = 1, + HasArg = 0, HasAbs2 = 1, HasMin = 1, HasMax = 1, HasConj = 1, HasSetLinear = 1, + HasBlend = 0, HasDiv = 0, HasSqrt = 0, + HasRsqrt = 0, HasExp = 0, HasLog = 0, + HasLog10 = 0, HasPow = 0, HasSin = 0, @@ -64,17 +70,31 @@ struct default_packet_traits HasTan = 0, HasASin = 0, HasACos = 0, - HasATan = 0 + HasATan = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, + HasLGamma = 0, + HasErf = 0, + HasErfc = 0, + + HasRound = 0, + HasFloor = 0, + HasCeil = 0, + + HasSign = 0 }; }; template struct packet_traits : default_packet_traits { typedef T type; + typedef T half; enum { Vectorizable = 0, size = 1, - AlignedOnScalar = 0 + AlignedOnScalar = 0, + HasHalfPacket = 0 }; enum { HasAdd = 0, @@ -90,135 +110,250 @@ template struct packet_traits : default_packet_traits }; }; +template struct packet_traits : packet_traits { }; + +template struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +/** \internal \returns static_cast(a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a) { + return static_cast(a); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/) { + return static_cast(a); +} + + /** \internal \returns a + b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) { return a+b; } /** \internal \returns a - b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) { return a-b; } /** \internal \returns -a (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) { return -a; } /** \internal \returns conj(a) (coeff-wise) */ -template inline Packet + +template EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) { return numext::conj(a); } /** \internal \returns a * b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) { return a*b; } /** \internal \returns a / b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) { return a/b; } /** \internal \returns the min of \a a and \a b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, - const Packet& b) { using std::min; return (min)(a, b); } + const Packet& b) { return numext::mini(a, b); } /** \internal \returns the max of \a a and \a b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, - const Packet& b) { using std::max; return (max)(a, b); } + const Packet& b) { return numext::maxi(a, b); } /** \internal \returns the absolute value of \a a */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) { using std::abs; return abs(a); } +/** \internal \returns the phase angle of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +parg(const Packet& a) { using numext::arg; return arg(a); } + /** \internal \returns the bitwise and of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) { return a & b; } /** \internal \returns the bitwise or of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) { return a | b; } /** \internal \returns the bitwise xor of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) { return a ^ b; } /** \internal \returns the bitwise andnot of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return a & (!b); } /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits::type* from) { return *from; } /** \internal \returns a packet version of \a *from, (un-aligned load) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits::type* from) { return *from; } +/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +template EIGEN_DEVICE_FUNC inline Packet +pset1(const typename unpacket_traits::type& a) { return a; } + +/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ +template EIGEN_DEVICE_FUNC inline Packet +pload1(const typename unpacket_traits::type *a) { return pset1(*a); } + /** \internal \returns a packet with elements of \a *from duplicated. - * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and - * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]} + * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and + * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} * Currently, this function is only used for scalar * complex products. - */ -template inline Packet + */ +template EIGEN_DEVICE_FUNC inline Packet ploaddup(const typename unpacket_traits::type* from) { return *from; } -/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ -template inline Packet -pset1(const typename unpacket_traits::type& a) { return a; } +/** \internal \returns a packet with elements of \a *from quadrupled. + * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and + * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]} + * Currently, this function is only used in matrix products. + * For packet-size smaller or equal to 4, this function is equivalent to pload1 + */ +template EIGEN_DEVICE_FUNC inline Packet +ploadquad(const typename unpacket_traits::type* from) +{ return pload1(from); } + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * a2 = pload1(a+2); + * a3 = pload1(a+3); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast2 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast4(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1, Packet& a2, Packet& a3) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); + a2 = pload1(a+2); + a3 = pload1(a+3); +} + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast4 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast2(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); +} /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ -template inline typename packet_traits::type -plset(const Scalar& a) { return a; } +template inline Packet +plset(const typename unpacket_traits::type& a) { return a; } /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ -template inline void pstore(Scalar* to, const Packet& from) +template EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) { (*to) = from; } /** \internal copy the packet \a from to \a *to, (un-aligned store) */ -template inline void pstoreu(Scalar* to, const Packet& from) -{ (*to) = from; } +template EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) +{ (*to) = from; } + + template EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) + { return ploadu(from); } + + template EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) + { pstore(to, from); } /** \internal tries to do cache prefetching of \a addr */ template inline void prefetch(const Scalar* addr) { -#if !defined(_MSC_VER) -__builtin_prefetch(addr); +#ifdef __CUDA_ARCH__ +#if defined(__LP64__) + // 64-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); +#else + // 32-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); +#endif +#elif !EIGEN_COMP_MSVC + __builtin_prefetch(addr); #endif } /** \internal \returns the first element of a packet */ -template inline typename unpacket_traits::type pfirst(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type pfirst(const Packet& a) { return a; } /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet preduxp(const Packet* vecs) { return vecs[0]; } /** \internal \returns the sum of the elements of \a a*/ -template inline typename unpacket_traits::type predux(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux(const Packet& a) +{ return a; } + +/** \internal \returns the sum of the elements of \a a by block of 4 elements. + * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} + * For packet-size smaller or equal to 4, this boils down to a noop. + */ +template EIGEN_DEVICE_FUNC inline +typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type +predux4(const Packet& a) { return a; } /** \internal \returns the product of the elements of \a a*/ -template inline typename unpacket_traits::type predux_mul(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_mul(const Packet& a) { return a; } /** \internal \returns the min of the elements of \a a*/ -template inline typename unpacket_traits::type predux_min(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) { return a; } /** \internal \returns the max of the elements of \a a*/ -template inline typename unpacket_traits::type predux_max(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) { return a; } /** \internal \returns the reversed elements of \a a*/ -template inline Packet preverse(const Packet& a) +template EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) { return a; } +template +struct protate_impl +{ + // Empty so attempts to use this unimplemented path will fail to compile. + // Only specializations of this template should be used. +}; + +/** \internal \returns a packet with the coefficients rotated to the right in little-endian convention, + * by the given offset, e.g. for offset == 1: + * (packet[3], packet[2], packet[1], packet[0]) becomes (packet[0], packet[3], packet[2], packet[1]) + */ +template EIGEN_DEVICE_FUNC inline Packet protate(const Packet& a) +{ + return offset ? protate_impl::run(a) : a; +} /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ -template inline Packet pcplxflip(const Packet& a) +template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) { // FIXME: uncomment the following in case we drop the internal imag and real functions. // using std::imag; @@ -250,6 +385,22 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) { using std::acos; return acos(a); } +/** \internal \returns the arc tangent of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet patan(const Packet& a) { using std::atan; return atan(a); } + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psinh(const Packet& a) { using std::sinh; return sinh(a); } + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); } + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } + /** \internal \returns the exp of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } @@ -258,10 +409,44 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } +/** \internal \returns the log10 of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog10(const Packet& a) { using std::log10; return log10(a); } + /** \internal \returns the square-root of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } +/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet prsqrt(const Packet& a) { + return pdiv(pset1(1), psqrt(a)); +} + +/** \internal \returns the rounded value of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pround(const Packet& a) { using numext::round; return round(a); } + +/** \internal \returns the floor of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } + +/** \internal \returns the ceil of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } + +/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } + +/** \internal \returns the erf(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { using numext::erf; return erf(a); } + +/** \internal \returns the erfc(\a a) (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ @@ -275,34 +460,45 @@ inline void pstore1(typename unpacket_traits::type* to, const typename u } /** \internal \returns a * b + c (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { return padd(pmul(a, b),c); } /** \internal \returns a packet version of \a *from. - * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ -template -inline Packet ploadt(const typename unpacket_traits::type* from) + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits::alignment) return pload(from); else return ploadu(from); } /** \internal copy the packet \a from to \a *to. - * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ -template -inline void pstoret(Scalar* to, const Packet& from) + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits::alignment) pstore(to, from); else pstoreu(to, from); } +/** \internal \returns a packet version of \a *from. + * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the + * hardware if available to speedup the loading of data that won't be modified + * by the current computation. + */ +template +inline Packet ploadt_ro(const typename unpacket_traits::type* from) +{ + return ploadt(from); +} + /** \internal default implementation of palign() allowing partial specialization */ template struct palign_impl @@ -336,15 +532,46 @@ inline void palign(PacketType& first, const PacketType& second) * Fast complex products (GCC generates a function call which is very slow) ***************************************************************************/ +// Eigen+CUDA does not support complexes. +#ifndef __CUDACC__ + template<> inline std::complex pmul(const std::complex& a, const std::complex& b) { return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } template<> inline std::complex pmul(const std::complex& a, const std::complex& b) { return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } +#endif + + +/*************************************************************************** + * PacketBlock, that is a collection of N packets where the number of words + * in the packet is a multiple of N. +***************************************************************************/ +template ::size> struct PacketBlock { + Packet packet[N]; +}; + +template EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock& /*kernel*/) { + // Nothing to do in the scalar case, i.e. a 1x1 matrix. +} + +/*************************************************************************** + * Selector, i.e. vector of N boolean values used to select (i.e. blend) + * words from 2 packets. +***************************************************************************/ +template struct Selector { + bool select[N]; +}; + +template EIGEN_DEVICE_FUNC inline Packet +pblend(const Selector::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) { + return ifPacket.select[0] ? thenPacket : elsePacket; +} + } // end namespace internal } // end namespace Eigen #endif // EIGEN_GENERIC_PACKET_MATH_H - diff --git a/nuparu/include/Eigen/src/Core/GlobalFunctions.h b/nuparu/include/Eigen/src/Core/GlobalFunctions.h index 2acf9772..62fec700 100644 --- a/nuparu/include/Eigen/src/Core/GlobalFunctions.h +++ b/nuparu/include/Eigen/src/Core/GlobalFunctions.h @@ -14,8 +14,8 @@ #define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \ template \ inline const Eigen::CwiseUnaryOp, const Derived> \ - NAME(const Eigen::ArrayBase& x) { \ - return x.derived(); \ + (NAME)(const Eigen::ArrayBase& x) { \ + return Eigen::CwiseUnaryOp, const Derived>(x.derived()); \ } #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ @@ -30,25 +30,44 @@ { \ static inline typename NAME##_retval >::type run(const Eigen::ArrayBase& x) \ { \ - return x.derived(); \ + return typename NAME##_retval >::type(x.derived()); \ } \ }; - namespace Eigen { EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op) template inline const Eigen::CwiseUnaryOp, const Derived> @@ -56,16 +75,46 @@ namespace Eigen return x.derived().pow(exponent); } - template - inline const Eigen::CwiseBinaryOp, const Derived, const Derived> - pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) + /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power. + * + * Example: \include Cwise_array_power_array.cpp + * Output: \verbinclude Cwise_array_power_array.out + * + * \sa ArrayBase::pow() + */ + template + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { - return Eigen::CwiseBinaryOp, const Derived, const Derived>( + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( x.derived(), exponents.derived() ); } + /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power between a scalar and an array of exponents. + * Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same. + * + * Example: \include Cwise_scalar_power_array.cpp + * Output: \verbinclude Cwise_scalar_power_array.out + * + * \sa ArrayBase::pow() + */ + template + inline const Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived> + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + { + typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x); + return Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived>( + constant_x, + exponents.derived() + ); + } + /** * \brief Component-wise division of a scalar by array elements. **/ diff --git a/nuparu/include/Eigen/src/Core/IO.h b/nuparu/include/Eigen/src/Core/IO.h index c8d5f637..9ae37bb5 100644 --- a/nuparu/include/Eigen/src/Core/IO.h +++ b/nuparu/include/Eigen/src/Core/IO.h @@ -49,7 +49,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& */ struct IOFormat { - /** Default contructor, see class IOFormat for the meaning of the parameters */ + /** Default constructor, see class IOFormat for the meaning of the parameters */ IOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _coeffSeparator = " ", const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="", @@ -57,6 +57,10 @@ struct IOFormat : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator), rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags) { + // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline + // don't add rowSpacer if columns are not to be aligned + if((flags & DontAlignCols)) + return; int i = int(matSuffix.length())-1; while (i>=0 && matSuffix[i]!='\n') { @@ -160,7 +164,6 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& typename Derived::Nested m = _m; typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; Index width = 0; @@ -185,21 +188,22 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& explicit_precision = fmt.precision; } + std::streamsize old_precision = 0; + if(explicit_precision) old_precision = s.precision(explicit_precision); + bool align_cols = !(fmt.flags & DontAlignCols); if(align_cols) { // compute the largest width - for(Index j = 1; j < m.cols(); ++j) + for(Index j = 0; j < m.cols(); ++j) for(Index i = 0; i < m.rows(); ++i) { std::stringstream sstr; - if(explicit_precision) sstr.precision(explicit_precision); + sstr.copyfmt(s); sstr << m.coeff(i,j); width = std::max(width, Index(sstr.str().length())); } } - std::streamsize old_precision = 0; - if(explicit_precision) old_precision = s.precision(explicit_precision); s << fmt.matPrefix; for(Index i = 0; i < m.rows(); ++i) { diff --git a/nuparu/include/Eigen/src/Core/Inverse.h b/nuparu/include/Eigen/src/Core/Inverse.h new file mode 100644 index 00000000..f3ec8499 --- /dev/null +++ b/nuparu/include/Eigen/src/Core/Inverse.h @@ -0,0 +1,117 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_H +#define EIGEN_INVERSE_H + +namespace Eigen { + +template class InverseImpl; + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename XprType::PlainObject PlainObject; + typedef traits BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit + }; +}; + +} // end namespace internal + +/** \class Inverse + * + * \brief Expression of the inverse of another expression + * + * \tparam XprType the type of the expression we are taking the inverse + * + * This class represents an abstract expression of A.inverse() + * and most of the time this is the only way it is used. + * + */ +template +class Inverse : public InverseImpl::StorageKind> +{ +public: + typedef typename XprType::StorageIndex StorageIndex; + typedef typename XprType::PlainObject PlainObject; + typedef typename internal::ref_selector::type XprTypeNested; + typedef typename internal::remove_all::type XprTypeNestedCleaned; + typedef typename internal::ref_selector::type Nested; + typedef typename internal::remove_all::type NestedExpression; + + explicit Inverse(const XprType &xpr) + : m_xpr(xpr) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + + EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + +protected: + XprTypeNested m_xpr; +}; + +// Generic API dispatcher +template +class InverseImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; + typedef typename XprType::Scalar Scalar; +private: + + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +/** \internal + * \brief Default evaluator for Inverse expression. + * + * This default evaluator for Inverse expression simply evaluate the inverse into a temporary + * by a call to internal::call_assignment_no_alias. + * Therefore, inverse implementers only have to specialize Assignment, ...> for + * there own nested expression. + * + * \sa class Inverse + */ +template +struct unary_evaluator > + : public evaluator::PlainObject> +{ + typedef Inverse InverseType; + typedef typename InverseType::PlainObject PlainObject; + typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + unary_evaluator(const InverseType& inv_xpr) + : m_result(inv_xpr.rows(), inv_xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + internal::call_assignment_no_alias(m_result, inv_xpr); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_H diff --git a/nuparu/include/Eigen/src/Core/Map.h b/nuparu/include/Eigen/src/Core/Map.h index f804c89d..3a8375da 100644 --- a/nuparu/include/Eigen/src/Core/Map.h +++ b/nuparu/include/Eigen/src/Core/Map.h @@ -19,7 +19,7 @@ namespace Eigen { * \brief A matrix or vector expression mapping an existing array of data. * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout * of an ordinary, contiguous array. This can be overridden by specifying strides. @@ -70,8 +70,6 @@ struct traits > : public traits { typedef traits TraitsBase; - typedef typename PlainObjectType::Index Index; - typedef typename PlainObjectType::Scalar Scalar; enum { InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 ? int(PlainObjectType::InnerStrideAtCompileTime) @@ -79,22 +77,9 @@ struct traits > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), - HasNoInnerStride = InnerStrideAtCompileTime == 1, - HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, - HasNoStride = HasNoInnerStride && HasNoOuterStride, - IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), - IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - KeepsPacketAccess = bool(HasNoInnerStride) - && ( bool(IsDynamicSize) - || HasNoOuterStride - || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), + Alignment = int(MapOptions)&int(AlignedMask), Flags0 = TraitsBase::Flags & (~NestByRefBit), - Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), - Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags1) : int(Flags1 & ~LinearAccessBit), - Flags3 = is_lvalue::value ? int(Flags2) : (int(Flags2) & ~LvalueBit), - Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit) + Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) }; private: enum { Options }; // Expressions don't have Options @@ -110,19 +95,17 @@ template class Ma EIGEN_DENSE_PUBLIC_INTERFACE(Map) typedef typename Base::PointerType PointerType; -#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API - typedef const Scalar* PointerArgType; - inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast(ptr); } -#else typedef PointerType PointerArgType; + EIGEN_DEVICE_FUNC inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } -#endif + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() @@ -134,10 +117,11 @@ template class Ma /** Constructor in the fixed-size case. * * \param dataPtr pointer to the array to map - * \param a_stride optional Stride object, passing the strides. + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride) + EIGEN_DEVICE_FUNC + explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -145,11 +129,12 @@ template class Ma /** Constructor in the dynamic-size vector case. * * \param dataPtr pointer to the array to map - * \param a_size the size of the vector expression - * \param a_stride optional Stride object, passing the strides. + * \param size the size of the vector expression + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride) + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -157,12 +142,13 @@ template class Ma /** Constructor in the dynamic-size matrix case. * * \param dataPtr pointer to the array to map - * \param nbRows the number of rows of the matrix expression - * \param nbCols the number of columns of the matrix expression - * \param a_stride optional Stride object, passing the strides. + * \param rows the number of rows of the matrix expression + * \param cols the number of columns of the matrix expression + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride) + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -173,19 +159,6 @@ template class Ma StrideType m_stride; }; -template -inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> - ::Array(const Scalar *data) -{ - this->_set_noalias(Eigen::Map(data)); -} - -template -inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> - ::Matrix(const Scalar *data) -{ - this->_set_noalias(Eigen::Map(data)); -} } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/MapBase.h b/nuparu/include/Eigen/src/Core/MapBase.h index 6876de58..75a80daa 100644 --- a/nuparu/include/Eigen/src/Core/MapBase.h +++ b/nuparu/include/Eigen/src/Core/MapBase.h @@ -12,7 +12,7 @@ #define EIGEN_MAPBASE_H #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ - EIGEN_STATIC_ASSERT((int(internal::traits::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + EIGEN_STATIC_ASSERT((int(internal::evaluator::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) namespace Eigen { @@ -37,7 +37,6 @@ template class MapBase }; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -76,8 +75,8 @@ template class MapBase typedef typename Base::CoeffReturnType CoeffReturnType; - inline Index rows() const { return m_rows.value(); } - inline Index cols() const { return m_cols.value(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); } /** Returns a pointer to the first coefficient of the matrix or vector. * @@ -85,24 +84,28 @@ template class MapBase * * \sa innerStride(), outerStride() */ - inline const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const { return m_data[colId * colStride() + rowId * rowStride()]; } + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index index) const { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) return m_data[index * innerStride()]; } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return this->m_data[colId * colStride() + rowId * rowStride()]; } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) @@ -123,12 +126,14 @@ template class MapBase return internal::ploadt(m_data + index * innerStride()); } - inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) + EIGEN_DEVICE_FUNC + explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) checkSanity(); } + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : m_data(dataPtr), m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)), @@ -140,24 +145,28 @@ template class MapBase checkSanity(); } - inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) - : m_data(dataPtr), m_rows(nbRows), m_cols(nbCols) + EIGEN_DEVICE_FUNC + inline MapBase(PointerType dataPtr, Index rows, Index cols) + : m_data(dataPtr), m_rows(rows), m_cols(cols) { eigen_assert( (dataPtr == 0) - || ( nbRows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows) - && nbCols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols))); + || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); checkSanity(); } + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + protected: + EIGEN_DEVICE_FUNC void checkSanity() const { - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits::Flags&PacketAccessBit, - internal::inner_stride_at_compile_time::ret==1), - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); - eigen_assert(EIGEN_IMPLIES(internal::traits::Flags&AlignedBit, (size_t(m_data) % 16) == 0) - && "data is not aligned"); +#if EIGEN_MAX_ALIGN_BYTES>0 + eigen_assert(((size_t(m_data) % EIGEN_PLAIN_ENUM_MAX(1,internal::traits::Alignment)) == 0) && "data is not aligned"); +#endif } PointerType m_data; @@ -168,13 +177,14 @@ template class MapBase template class MapBase : public MapBase { + typedef MapBase ReadOnlyMapBase; public: typedef MapBase Base; typedef typename Base::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; - typedef typename Base::Index Index; + typedef typename Base::StorageIndex StorageIndex; typedef typename Base::PointerType PointerType; using Base::derived; @@ -195,14 +205,18 @@ template class MapBase const Scalar >::type ScalarWithConstIfNotLvalue; + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return this->m_data; } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) { return this->m_data[col * colStride() + row * rowStride()]; } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index index) { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) @@ -224,19 +238,24 @@ template class MapBase (this->m_data + index * innerStride(), val); } - explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} - inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} - inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {} + EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {} + EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) { - Base::Base::operator=(other); + ReadOnlyMapBase::Base::operator=(other); return derived(); } - using Base::Base::operator=; + // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base, + // see bugs 821 and 920. + using ReadOnlyMapBase::Base::operator=; }; +#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS + } // end namespace Eigen #endif // EIGEN_MAPBASE_H diff --git a/nuparu/include/Eigen/src/Core/MathFunctions.h b/nuparu/include/Eigen/src/Core/MathFunctions.h index 2bfc5ebd..48cf565f 100644 --- a/nuparu/include/Eigen/src/Core/MathFunctions.h +++ b/nuparu/include/Eigen/src/Core/MathFunctions.h @@ -10,8 +10,20 @@ #ifndef EIGEN_MATHFUNCTIONS_H #define EIGEN_MATHFUNCTIONS_H +// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html +#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406 + namespace Eigen { +// On WINCE, std::abs is defined for int only, so let's defined our own overloads: +// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too. +#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500 +long abs(long x) { return (labs(x)); } +double abs(double x) { return (fabs(x)); } +float abs(float x) { return (fabsf(x)); } +long double abs(long double x) { return (fabsl(x)); } +#endif + namespace internal { /** \internal \struct global_math_functions_filtering_base @@ -62,6 +74,7 @@ template::IsComplex> struct real_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x; @@ -72,6 +85,7 @@ template struct real_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { using std::real; @@ -87,7 +101,6 @@ struct real_retval typedef typename NumTraits::Real type; }; - /**************************************************************************** * Implementation of imag * ****************************************************************************/ @@ -96,6 +109,7 @@ template::IsComplex> struct imag_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar&) { return RealScalar(0); @@ -106,6 +120,7 @@ template struct imag_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { using std::imag; @@ -129,10 +144,12 @@ template struct real_ref_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[0]; } + EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { return reinterpret_cast(&x)[0]; @@ -153,10 +170,12 @@ template struct imag_ref_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[1]; } + EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { return reinterpret_cast(&x)[1]; @@ -166,10 +185,12 @@ struct imag_ref_default_impl template struct imag_ref_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(Scalar&) { return Scalar(0); } + EIGEN_DEVICE_FUNC static inline const Scalar run(const Scalar&) { return Scalar(0); @@ -192,6 +213,7 @@ struct imag_ref_retval template::IsComplex> struct conj_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { return x; @@ -201,6 +223,7 @@ struct conj_impl template struct conj_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { using std::conj; @@ -218,25 +241,39 @@ struct conj_retval * Implementation of abs2 * ****************************************************************************/ -template -struct abs2_impl +template +struct abs2_impl_default { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x*x; } }; -template -struct abs2_impl > +template +struct abs2_impl_default // IsComplex { - static inline RealScalar run(const std::complex& x) + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) { return real(x)*real(x) + imag(x)*imag(x); } }; +template +struct abs2_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return abs2_impl_default::IsComplex>::run(x); + } +}; + template struct abs2_retval { @@ -251,9 +288,10 @@ template struct norm1_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { - using std::abs; + EIGEN_USING_STD_MATH(abs); return abs(real(x)) + abs(imag(x)); } }; @@ -261,9 +299,10 @@ struct norm1_default_impl template struct norm1_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { - using std::abs; + EIGEN_USING_STD_MATH(abs); return abs(x); } }; @@ -287,16 +326,22 @@ struct hypot_impl typedef typename NumTraits::Real RealScalar; static inline RealScalar run(const Scalar& x, const Scalar& y) { - using std::max; - using std::min; - using std::abs; - using std::sqrt; + EIGEN_USING_STD_MATH(abs); + EIGEN_USING_STD_MATH(sqrt); RealScalar _x = abs(x); RealScalar _y = abs(y); - RealScalar p = (max)(_x, _y); - if(p==RealScalar(0)) return 0; - RealScalar q = (min)(_x, _y); - RealScalar qp = q/p; + Scalar p, qp; + if(_x>_y) + { + p = _x; + qp = _y / p; + } + else + { + p = _y; + qp = _x / p; + } + if(p==RealScalar(0)) return RealScalar(0); return p * sqrt(RealScalar(1) + qp*qp); } }; @@ -314,6 +359,7 @@ struct hypot_retval template struct cast_impl { + EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x) { return static_cast(x); @@ -323,48 +369,121 @@ struct cast_impl // here, for once, we're plainly returning NewType: we don't want cast to do weird things. template +EIGEN_DEVICE_FUNC inline NewType cast(const OldType& x) { return cast_impl::run(x); } /**************************************************************************** -* Implementation of atanh2 * +* Implementation of round * ****************************************************************************/ -template -struct atanh2_default_impl -{ - typedef Scalar retval; - typedef typename NumTraits::Real RealScalar; - static inline Scalar run(const Scalar& x, const Scalar& y) +#if EIGEN_HAS_CXX11_MATH + template + struct round_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + using std::round; + return round(x); + } + }; +#else + template + struct round_impl { - using std::abs; - using std::log; - using std::sqrt; - Scalar z = x / y; - if (y == Scalar(0) || abs(z) > sqrt(NumTraits::epsilon())) - return RealScalar(0.5) * log((y + x) / (y - x)); - else - return z + z*z*z / RealScalar(3); - } + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + EIGEN_USING_STD_MATH(floor); + EIGEN_USING_STD_MATH(ceil); + return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5)); + } + }; +#endif + +template +struct round_retval +{ + typedef Scalar type; }; +/**************************************************************************** +* Implementation of arg * +****************************************************************************/ + +#if EIGEN_HAS_CXX11_MATH + template + struct arg_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; +#else + template::IsComplex> + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); } + }; + + template + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; + + template struct arg_impl : arg_default_impl {}; +#endif + template -struct atanh2_default_impl +struct arg_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of log1p * +****************************************************************************/ +template::IsComplex > +struct log1p_impl { - static inline Scalar run(const Scalar&, const Scalar&) + static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - return Scalar(0); + typedef typename NumTraits::Real RealScalar; + EIGEN_USING_STD_MATH(log); + Scalar x1p = RealScalar(1) + x; + return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); } }; +#if EIGEN_HAS_CXX11_MATH template -struct atanh2_impl : atanh2_default_impl::IsInteger> {}; +struct log1p_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + using std::log1p; + return log1p(x); + } +}; +#endif template -struct atanh2_retval +struct log1p_retval { typedef Scalar type; }; @@ -379,7 +498,7 @@ struct pow_default_impl typedef Scalar retval; static inline Scalar run(const Scalar& x, const Scalar& y) { - using std::pow; + EIGEN_USING_STD_MATH(pow); return pow(x, y); } }; @@ -447,48 +566,48 @@ struct random_default_impl }; enum { - floor_log2_terminate, - floor_log2_move_up, - floor_log2_move_down, - floor_log2_bogus + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus }; -template struct floor_log2_selector +template struct meta_floor_log2_selector { enum { middle = (lower + upper) / 2, - value = (upper <= lower + 1) ? int(floor_log2_terminate) - : (n < (1 << middle)) ? int(floor_log2_move_down) - : (n==0) ? int(floor_log2_bogus) - : int(floor_log2_move_up) + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) }; }; template::value> -struct floor_log2 {}; + int selector = meta_floor_log2_selector::value> +struct meta_floor_log2 {}; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle>::value }; + enum { value = meta_floor_log2::middle>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle, upper>::value }; + enum { value = meta_floor_log2::middle, upper>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; }; template -struct floor_log2 +struct meta_floor_log2 { // no value, error at compile time }; @@ -496,11 +615,22 @@ struct floor_log2 template struct random_default_impl { - typedef typename NumTraits::NonInteger NonInteger; - static inline Scalar run(const Scalar& x, const Scalar& y) - { - return x + Scalar((NonInteger(y)-x+1) * std::rand() / (RAND_MAX + NonInteger(1))); + { + typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; + if(y range); + + return Scalar(ScalarX(x) + offset); } static inline Scalar run() @@ -508,7 +638,7 @@ struct random_default_impl #ifdef EIGEN_MAKING_DOCS return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); #else - enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value, + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, scalar_bits = sizeof(Scalar) * CHAR_BIT, shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 @@ -545,97 +675,322 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); } +// Implementatin of is* functions + +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isnan_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isinf_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isfinite_impl(const T&) { return true; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isfinite_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isfinite; + return isfinite EIGEN_NOT_A_MACRO (x); + #else + return x::highest() && x>NumTraits::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isinf_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isnan_impl(const T& x) +{ + #if EIGEN_USE_STD_FPCLASSIFY + using std::isnan; + return isnan EIGEN_NOT_A_MACRO (x); + #else + return x != x; + #endif +} + +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) +{ + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; +} + +//MSVC defines a _isnan builtin function, but for double only +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } + +EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) +#else + // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), + // while the second prevent too aggressive optimizations in fast-math mode: + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); } + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + +// The following overload are defined at the end of this file +template bool isfinite_impl(const std::complex& x); +template bool isnan_impl(const std::complex& x); +template bool isinf_impl(const std::complex& x); + } // end namespace internal /**************************************************************************** -* Generic math function * +* Generic math functions * ****************************************************************************/ namespace numext { +#ifndef __CUDA_ARCH__ +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(min); + return min EIGEN_NOT_A_MACRO (x,y); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(max); + return max EIGEN_NOT_A_MACRO (x,y); +} +#else +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + return y < x ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) +{ + return fmin(x, y); +} +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + return x < y ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) +{ + return fmax(x, y); +} +#endif + + template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x) { return internal::real_ref_impl::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) { return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x) { return internal::imag_ref_impl::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x) { return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y) { return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); } template -inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y) +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) { - return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y); + return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) { return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); } -// std::isfinite is non standard, so let's define our own version, -// even though it is not very efficient. -template bool (isfinite)(const T& x) +template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } +template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } +template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +T (floor)(const T& x) +{ + EIGEN_USING_STD_MATH(floor); + return floor(x); +} + +template +EIGEN_DEVICE_FUNC +T (ceil)(const T& x) { - return x::highest() && x>NumTraits::lowest(); + EIGEN_USING_STD_MATH(ceil); + return ceil(x); +} + +// Log base 2 for 32 bits positive integers. +// Conveniently returns 0 for x==0. +inline int log2(int x) +{ + eigen_assert(x>=0); + unsigned int v(x); + static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return table[(v * 0x07C4ACDDU) >> 27]; } } // end namespace numext namespace internal { +template +bool isfinite_impl(const std::complex& x) +{ + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +bool isnan_impl(const std::complex& x) +{ + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +bool isinf_impl(const std::complex& x) +{ + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + /**************************************************************************** * Implementation of fuzzy comparisons * ****************************************************************************/ @@ -649,18 +1004,19 @@ template struct scalar_fuzzy_default_impl { typedef typename NumTraits::Real RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) { - using std::abs; + EIGEN_USING_STD_MATH(abs); return abs(x) <= abs(y) * prec; } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - using std::min; - using std::abs; - return abs(x - y) <= (min)(abs(x), abs(y)) * prec; + EIGEN_USING_STD_MATH(abs); + return abs(x - y) <= numext::mini(abs(x), abs(y)) * prec; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) { return x <= y || isApprox(x, y, prec); @@ -671,15 +1027,17 @@ template struct scalar_fuzzy_default_impl { typedef typename NumTraits::Real RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&) { return x == Scalar(0); } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&) { return x == y; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&) { return x <= y; @@ -697,29 +1055,28 @@ struct scalar_fuzzy_default_impl } static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - using std::min; - return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec; + return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; } }; template struct scalar_fuzzy_impl : scalar_fuzzy_default_impl::IsComplex, NumTraits::IsInteger> {}; -template +template EIGEN_DEVICE_FUNC inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, typename NumTraits::Real precision = NumTraits::dummy_precision()) { return scalar_fuzzy_impl::template isMuchSmallerThan(x, y, precision); } -template +template EIGEN_DEVICE_FUNC inline bool isApprox(const Scalar& x, const Scalar& y, typename NumTraits::Real precision = NumTraits::dummy_precision()) { return scalar_fuzzy_impl::isApprox(x, y, precision); } -template +template EIGEN_DEVICE_FUNC inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, typename NumTraits::Real precision = NumTraits::dummy_precision()) { @@ -742,17 +1099,19 @@ template<> struct scalar_fuzzy_impl { typedef bool RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { return x == y; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&) { return (!x) || y; diff --git a/nuparu/include/Eigen/src/Core/Matrix.h b/nuparu/include/Eigen/src/Core/Matrix.h index 0ba5d90c..ce1b70d2 100644 --- a/nuparu/include/Eigen/src/Core/Matrix.h +++ b/nuparu/include/Eigen/src/Core/Matrix.h @@ -24,13 +24,13 @@ namespace Eigen { * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note"). * * The first three template parameters are required: - * \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex. - * User defined sclar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). * \tparam _Rows Number of rows, or \b Dynamic * \tparam _Cols Number of columns, or \b Dynamic * * The remaining template parameters are optional -- in most cases you don't have to worry about them. - * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either * \b #AutoAlign or \b #DontAlign. * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required * for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size. @@ -97,6 +97,40 @@ namespace Eigen { * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic. * * + * ABI and storage layout + * + * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3. + * + * + * + * + * + * + *
Matrix typeEquivalent C structure
\code Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index rows, cols; + * }; + * \endcode
\code + * Matrix + * Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index size; + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0 + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0 + * Eigen::Index rows, cols; + * }; + * \endcode
+ * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two + * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES. + * * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, * \ref TopicStorageOrders */ @@ -105,9 +139,23 @@ namespace internal { template struct traits > { +private: + enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret }; + typedef typename find_best_packet<_Scalar,size>::type PacketScalar; + enum { + row_major_bit = _Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, + default_alignment = compute_default_alignment<_Scalar,max_size>::value, + actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, + required_alignment = unpacket_traits::alignment, + packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 + }; + +public: typedef _Scalar Scalar; typedef Dense StorageKind; - typedef DenseIndex Index; + typedef Eigen::Index StorageIndex; typedef MatrixXpr XprKind; enum { RowsAtCompileTime = _Rows, @@ -115,10 +163,13 @@ struct traits > MaxRowsAtCompileTime = _MaxRows, MaxColsAtCompileTime = _MaxCols, Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, - CoeffReadCost = NumTraits::ReadCost, Options = _Options, InnerStrideAtCompileTime = 1, - OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment }; }; } @@ -151,6 +202,7 @@ class Matrix * * \callgraph */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other) { return Base::_set(other); @@ -167,7 +219,8 @@ class Matrix * remain row-vectors and vectors remain vectors. */ template - EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase& other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase& other) { return Base::_set(other); } @@ -179,12 +232,14 @@ class Matrix * \copydetails DenseBase::operator=(const EigenBase &other) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase &other) { return Base::operator=(other); } template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue& func) { return Base::operator=(func); @@ -200,6 +255,7 @@ class Matrix * * \sa resize(Index,Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix() : Base() { Base::_check_template_params(); @@ -207,45 +263,87 @@ class Matrix } // FIXME is it still needed - Matrix(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + explicit Matrix(internal::constructor_without_unaligned_array_assert) : Base(internal::constructor_without_unaligned_array_assert()) { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } - /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors - * - * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, - * it is redundant to pass the dimension here, so it makes more sense to use the default - * constructor Matrix() instead. - */ - EIGEN_STRONG_INLINE explicit Matrix(Index dim) - : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim) +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Matrix(Matrix&& other) + : Base(std::move(other)) { Base::_check_template_params(); - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix) - eigen_assert(dim >= 0); - eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); } + EIGEN_DEVICE_FUNC + Matrix& operator=(Matrix&& other) + { + other.swap(*this); + return *this; + } +#endif #ifndef EIGEN_PARSED_BY_DOXYGEN + + // This constructor is for both 1x1 matrices and dynamic vectors + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Matrix(const T& x) + { + Base::_check_template_params(); + Base::template _init1(x); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) { Base::_check_template_params(); Base::template _init2(x, y); } #else + /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC + explicit Matrix(const Scalar *data); + + /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * This is useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance, + * calling Matrix(1) will call the initialization constructor: Matrix(const Scalar&). + * For fixed-size \c 1x1 matrices it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_STRONG_INLINE explicit Matrix(Index dim); + /** \brief Constructs an initialized 1x1 matrix with the given coefficient */ + Matrix(const Scalar& x); /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns. * * This is useful for dynamic-size matrices. For fixed-size matrices, * it is redundant to pass these parameters, so one should use the default constructor - * Matrix() instead. */ + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance, + * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y). + * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_DEVICE_FUNC Matrix(Index rows, Index cols); + /** \brief Constructs an initialized 2D vector with given coefficients */ Matrix(const Scalar& x, const Scalar& y); #endif /** \brief Constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) { Base::_check_template_params(); @@ -255,6 +353,7 @@ class Matrix m_storage.data()[2] = z; } /** \brief Constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) { Base::_check_template_params(); @@ -265,76 +364,33 @@ class Matrix m_storage.data()[3] = w; } - explicit Matrix(const Scalar *data); - /** \brief Constructor copying the value of the expression \a other */ - template - EIGEN_STRONG_INLINE Matrix(const MatrixBase& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - // This test resides here, to bring the error messages closer to the user. Normally, these checks - // are performed deeply within the library, thus causing long and scary error traces. - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::_check_template_params(); - Base::_set_noalias(other); - } /** \brief Copy constructor */ - EIGEN_STRONG_INLINE Matrix(const Matrix& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } - /** \brief Copy constructor with in-place evaluation */ - template - EIGEN_STRONG_INLINE Matrix(const ReturnByValue& other) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - other.evalTo(*this); - } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other) + { } /** \brief Copy constructor for generic expressions. * \sa MatrixBase::operator=(const EigenBase&) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase &other) - : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to - // go for pure _set() implementations, right? - *this = other; - } - - /** \internal - * \brief Override MatrixBase::swap() since for dynamic-sized matrices - * of same type it is enough to swap the data pointers. - */ - template - void swap(MatrixBase const & other) - { this->_swap(other.derived()); } + : Base(other.derived()) + { } - inline Index innerStride() const { return 1; } - inline Index outerStride() const { return this->innerSize(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } /////////// Geometry module /////////// template + EIGEN_DEVICE_FUNC explicit Matrix(const RotationBase& r); template + EIGEN_DEVICE_FUNC Matrix& operator=(const RotationBase& r); - #ifdef EIGEN2_SUPPORT - template - explicit Matrix(const eigen2_RotationBase& r); - template - Matrix& operator=(const eigen2_RotationBase& r); - #endif - // allow to extend Matrix outside Eigen #ifdef EIGEN_MATRIX_PLUGIN #include EIGEN_MATRIX_PLUGIN diff --git a/nuparu/include/Eigen/src/Core/MatrixBase.h b/nuparu/include/Eigen/src/Core/MatrixBase.h index 9193b6ab..9d612c85 100644 --- a/nuparu/include/Eigen/src/Core/MatrixBase.h +++ b/nuparu/include/Eigen/src/Core/MatrixBase.h @@ -52,7 +52,7 @@ template class MatrixBase #ifndef EIGEN_PARSED_BY_DOXYGEN typedef MatrixBase StorageBaseType; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -66,8 +66,7 @@ template class MatrixBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -81,6 +80,8 @@ template class MatrixBase using Base::operator-=; using Base::operator*=; using Base::operator/=; + using Base::operator*; + using Base::operator/; typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; @@ -98,25 +99,14 @@ template class MatrixBase /** \returns the size of the main diagonal, which is min(rows(),cols()). * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC inline Index diagonalSize() const { return (std::min)(rows(),cols()); } - /** \brief The plain matrix type corresponding to this expression. - * - * This is not necessarily exactly the return type of eval(). In the case of plain matrices, - * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed - * that the return type of eval() is either PlainObject or const PlainObject&. - */ - typedef Matrix::Scalar, - internal::traits::RowsAtCompileTime, - internal::traits::ColsAtCompileTime, - AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), - internal::traits::MaxRowsAtCompileTime, - internal::traits::MaxColsAtCompileTime - > PlainObject; + typedef typename Base::PlainObject PlainObject; #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; /** \internal the return type of MatrixBase::adjoint() */ typedef typename internal::conditional::IsComplex, CwiseUnaryOp, ConstTransposeReturnType>, @@ -125,7 +115,7 @@ template class MatrixBase /** \internal Return type of eigenvalues() */ typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; /** \internal the return type of identity */ - typedef CwiseNullaryOp,Derived> IdentityReturnType; + typedef CwiseNullaryOp,PlainObject> IdentityReturnType; /** \internal the return type of unit vectors */ typedef Block, SquareMatrixType>, internal::traits::RowsAtCompileTime, @@ -145,39 +135,48 @@ template class MatrixBase /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase& other); // We cannot inherit here via Base::operator= since it is causing // trouble with MSVC. template + EIGEN_DEVICE_FUNC Derived& operator=(const DenseBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& other); -#ifndef EIGEN_PARSED_BY_DOXYGEN - template - Derived& lazyAssign(const ProductBase& other); - - template - Derived& lazyAssign(const MatrixPowerProduct& other); -#endif // not EIGEN_PARSED_BY_DOXYGEN - template + EIGEN_DEVICE_FUNC Derived& operator+=(const MatrixBase& other); template + EIGEN_DEVICE_FUNC Derived& operator-=(const MatrixBase& other); +#ifdef __CUDACC__ template - const typename ProductReturnType::Type + EIGEN_DEVICE_FUNC + const Product + operator*(const MatrixBase &other) const + { return this->lazyProduct(other); } +#else + + template + const Product operator*(const MatrixBase &other) const; +#endif + template - const typename LazyProductReturnType::Type + EIGEN_DEVICE_FUNC + const Product lazyProduct(const MatrixBase &other) const; template @@ -190,88 +189,91 @@ template class MatrixBase void applyOnTheRight(const EigenBase& other); template - const DiagonalProduct + EIGEN_DEVICE_FUNC + const Product operator*(const DiagonalBase &diagonal) const; template + EIGEN_DEVICE_FUNC typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType dot(const MatrixBase& other) const; - #ifdef EIGEN2_SUPPORT - template - Scalar eigen2_dot(const MatrixBase& other) const; - #endif - - RealScalar squaredNorm() const; - RealScalar norm() const; + EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; + EIGEN_DEVICE_FUNC RealScalar norm() const; RealScalar stableNorm() const; RealScalar blueNorm() const; RealScalar hypotNorm() const; - const PlainObject normalized() const; - void normalize(); + EIGEN_DEVICE_FUNC const PlainObject normalized() const; + EIGEN_DEVICE_FUNC void normalize(); - const AdjointReturnType adjoint() const; - void adjointInPlace(); + EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; + EIGEN_DEVICE_FUNC void adjointInPlace(); typedef Diagonal DiagonalReturnType; + EIGEN_DEVICE_FUNC DiagonalReturnType diagonal(); - typedef typename internal::add_const >::type ConstDiagonalReturnType; + + typedef typename internal::add_const >::type ConstDiagonalReturnType; + EIGEN_DEVICE_FUNC ConstDiagonalReturnType diagonal() const; template struct DiagonalIndexReturnType { typedef Diagonal Type; }; template struct ConstDiagonalIndexReturnType { typedef const Diagonal Type; }; - template typename DiagonalIndexReturnType::Type diagonal(); - template typename ConstDiagonalIndexReturnType::Type diagonal() const; - - // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations. - // On the other hand they confuse MSVC8... - #if (defined _MSC_VER) && (_MSC_VER >= 1500) // 2008 or later - typename MatrixBase::template DiagonalIndexReturnType::Type diagonal(Index index); - typename MatrixBase::template ConstDiagonalIndexReturnType::Type diagonal(Index index) const; - #else - typename DiagonalIndexReturnType::Type diagonal(Index index); - typename ConstDiagonalIndexReturnType::Type diagonal(Index index) const; - #endif - - #ifdef EIGEN2_SUPPORT - template typename internal::eigen2_part_return_type::type part(); - template const typename internal::eigen2_part_return_type::type part() const; + template + EIGEN_DEVICE_FUNC + typename DiagonalIndexReturnType::Type diagonal(); + + template + EIGEN_DEVICE_FUNC + typename ConstDiagonalIndexReturnType::Type diagonal() const; - // huuuge hack. make Eigen2's matrix.part() work in eigen3. Problem: Diagonal is now a class template instead - // of an integer constant. Solution: overload the part() method template wrt template parameters list. - template class U> - const DiagonalWrapper part() const - { return diagonal().asDiagonal(); } - #endif // EIGEN2_SUPPORT + typedef Diagonal DiagonalDynamicIndexReturnType; + typedef typename internal::add_const >::type ConstDiagonalDynamicIndexReturnType; + + EIGEN_DEVICE_FUNC + DiagonalDynamicIndexReturnType diagonal(Index index); + EIGEN_DEVICE_FUNC + ConstDiagonalDynamicIndexReturnType diagonal(Index index) const; template struct TriangularViewReturnType { typedef TriangularView Type; }; template struct ConstTriangularViewReturnType { typedef const TriangularView Type; }; - template typename TriangularViewReturnType::Type triangularView(); - template typename ConstTriangularViewReturnType::Type triangularView() const; + template + EIGEN_DEVICE_FUNC + typename TriangularViewReturnType::Type triangularView(); + template + EIGEN_DEVICE_FUNC + typename ConstTriangularViewReturnType::Type triangularView() const; template struct SelfAdjointViewReturnType { typedef SelfAdjointView Type; }; template struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView Type; }; - template typename SelfAdjointViewReturnType::Type selfadjointView(); - template typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + template + EIGEN_DEVICE_FUNC + typename SelfAdjointViewReturnType::Type selfadjointView(); + template + EIGEN_DEVICE_FUNC + typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; const SparseView sparseView(const Scalar& m_reference = Scalar(0), const typename NumTraits::Real& m_epsilon = NumTraits::dummy_precision()) const; - static const IdentityReturnType Identity(); - static const IdentityReturnType Identity(Index rows, Index cols); - static const BasisReturnType Unit(Index size, Index i); - static const BasisReturnType Unit(Index i); - static const BasisReturnType UnitX(); - static const BasisReturnType UnitY(); - static const BasisReturnType UnitZ(); - static const BasisReturnType UnitW(); - + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(); + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitX(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitY(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitW(); + + EIGEN_DEVICE_FUNC const DiagonalWrapper asDiagonal() const; const PermutationWrapper asPermutation() const; + EIGEN_DEVICE_FUNC Derived& setIdentity(); + EIGEN_DEVICE_FUNC Derived& setIdentity(Index rows, Index cols); bool isIdentity(const RealScalar& prec = NumTraits::dummy_precision()) const; @@ -303,59 +305,49 @@ template class MatrixBase NoAlias noalias(); - inline const ForceAlignedAccess forceAlignedAccess() const; - inline ForceAlignedAccess forceAlignedAccess(); - template inline typename internal::add_const_on_value_type,Derived&>::type>::type forceAlignedAccessIf() const; - template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + // TODO forceAlignedAccess is temporarily disabled + // Need to find a nicer workaround. + inline const Derived& forceAlignedAccess() const { return derived(); } + inline Derived& forceAlignedAccess() { return derived(); } + template inline const Derived& forceAlignedAccessIf() const { return derived(); } + template inline Derived& forceAlignedAccessIf() { return derived(); } - Scalar trace() const; + EIGEN_DEVICE_FUNC Scalar trace() const; -/////////// Array module /////////// + template EIGEN_DEVICE_FUNC RealScalar lpNorm() const; - template RealScalar lpNorm() const; - - MatrixBase& matrix() { return *this; } - const MatrixBase& matrix() const { return *this; } + EIGEN_DEVICE_FUNC MatrixBase& matrix() { return *this; } + EIGEN_DEVICE_FUNC const MatrixBase& matrix() const { return *this; } /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - ArrayWrapper array() { return derived(); } - const ArrayWrapper array() const { return derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return ArrayWrapper(derived()); } + /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { return ArrayWrapper(derived()); } /////////// LU module /////////// - const FullPivLU fullPivLu() const; - const PartialPivLU partialPivLu() const; + EIGEN_DEVICE_FUNC + inline const FullPivLU fullPivLu() const; + EIGEN_DEVICE_FUNC + inline const PartialPivLU partialPivLu() const; - #if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS - const LU lu() const; - #endif + EIGEN_DEVICE_FUNC + inline const PartialPivLU lu() const; - #ifdef EIGEN2_SUPPORT - const LU eigen2_lu() const; - #endif - - #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - const PartialPivLU lu() const; - #endif + EIGEN_DEVICE_FUNC + inline const Inverse inverse() const; - #ifdef EIGEN2_SUPPORT template - void computeInverse(MatrixBase *result) const { - *result = this->inverse(); - } - #endif - - const internal::inverse_impl inverse() const; - template - void computeInverseAndDetWithCheck( + inline void computeInverseAndDetWithCheck( ResultType& inverse, typename ResultType::Scalar& determinant, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() ) const; template - void computeInverseWithCheck( + inline void computeInverseWithCheck( ResultType& inverse, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() @@ -364,29 +356,24 @@ template class MatrixBase /////////// Cholesky module /////////// - const LLT llt() const; - const LDLT ldlt() const; + inline const LLT llt() const; + inline const LDLT ldlt() const; /////////// QR module /////////// - const HouseholderQR householderQr() const; - const ColPivHouseholderQR colPivHouseholderQr() const; - const FullPivHouseholderQR fullPivHouseholderQr() const; - - #ifdef EIGEN2_SUPPORT - const QR qr() const; - #endif + inline const HouseholderQR householderQr() const; + inline const ColPivHouseholderQR colPivHouseholderQr() const; + inline const FullPivHouseholderQR fullPivHouseholderQr() const; - EigenvaluesReturnType eigenvalues() const; - RealScalar operatorNorm() const; +/////////// Eigenvalues module /////////// -/////////// SVD module /////////// + inline EigenvaluesReturnType eigenvalues() const; + inline RealScalar operatorNorm() const; - JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; +/////////// SVD module /////////// - #ifdef EIGEN2_SUPPORT - SVD svd() const; - #endif + inline JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + inline BDCSVD bdcSvd(unsigned int computationOptions = 0) const; /////////// Geometry module /////////// @@ -398,20 +385,25 @@ template class MatrixBase }; #endif // EIGEN_PARSED_BY_DOXYGEN template - typename cross_product_return_type::type + EIGEN_DEVICE_FUNC + inline typename cross_product_return_type::type cross(const MatrixBase& other) const; + template - PlainObject cross3(const MatrixBase& other) const; - PlainObject unitOrthogonal(void) const; - Matrix eulerAngles(Index a0, Index a1, Index a2) const; + EIGEN_DEVICE_FUNC + inline PlainObject cross3(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC + inline PlainObject unitOrthogonal(void) const; - #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - ScalarMultipleReturnType operator*(const UniformScaling& s) const; + inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; + + inline ScalarMultipleReturnType operator*(const UniformScaling& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) - enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal }; + enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) + : ColsAtCompileTime==1 ? Vertical : Horizontal }; typedef Homogeneous HomogeneousReturnType; - HomogeneousReturnType homogeneous() const; - #endif + inline HomogeneousReturnType homogeneous() const; enum { SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 @@ -422,7 +414,7 @@ template class MatrixBase typedef CwiseUnaryOp::Scalar>, const ConstStartMinusOne > HNormalizedReturnType; - const HNormalizedReturnType hnormalized() const; + inline const HNormalizedReturnType hnormalized() const; ////////// Householder module /////////// @@ -446,6 +438,15 @@ template class MatrixBase template void applyOnTheRight(Index p, Index q, const JacobiRotation& j); +///////// SparseCore module ///////// + + template + EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type + cwiseProduct(const SparseMatrixBase &other) const + { + return other.cwiseProduct(derived()); + } + ///////// MatrixFunctions module ///////// typedef typename internal::stem_function::type StemFunction; @@ -458,49 +459,15 @@ template class MatrixBase const MatrixSquareRootReturnValue sqrt() const; const MatrixLogarithmReturnValue log() const; const MatrixPowerReturnValue pow(const RealScalar& p) const; - -#ifdef EIGEN2_SUPPORT - template - Derived& operator+=(const Flagged, 0, - EvalBeforeAssigningBit>& other); - - template - Derived& operator-=(const Flagged, 0, - EvalBeforeAssigningBit>& other); - - /** \deprecated because .lazy() is deprecated - * Overloaded for cache friendly product evaluation */ - template - Derived& lazyAssign(const Flagged& other) - { return lazyAssign(other._expression()); } - - template - const Flagged marked() const; - const Flagged lazy() const; - - inline const Cwise cwise() const; - inline Cwise cwise(); - - VectorBlock start(Index size); - const VectorBlock start(Index size) const; - VectorBlock end(Index size); - const VectorBlock end(Index size) const; - template VectorBlock start(); - template const VectorBlock start() const; - template VectorBlock end(); - template const VectorBlock end() const; - - Minor minor(Index row, Index col); - const Minor minor(Index row, Index col) const; -#endif + const MatrixComplexPowerReturnValue pow(const std::complex& p) const; protected: - MatrixBase() : Base() {} + EIGEN_DEVICE_FUNC MatrixBase() : Base() {} private: - explicit MatrixBase(int); - MatrixBase(int,int); - template explicit MatrixBase(const MatrixBase&); + EIGEN_DEVICE_FUNC explicit MatrixBase(int); + EIGEN_DEVICE_FUNC MatrixBase(int,int); + template EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase&); protected: // mixing arrays and matrices is not legal template Derived& operator+=(const ArrayBase& ) @@ -510,6 +477,51 @@ template class MatrixBase {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} }; + +/*************************************************************************** +* Implementation of matrix base methods +***************************************************************************/ + +/** replaces \c *this by \c *this * \a other. + * + * \returns a reference to \c *this + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline Derived& +MatrixBase::operator*=(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); + return derived(); +} + +/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=(). + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline void MatrixBase::applyOnTheRight(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); +} + +/** replaces \c *this by \a other * \c *this. + * + * Example: \include MatrixBase_applyOnTheLeft.cpp + * Output: \verbinclude MatrixBase_applyOnTheLeft.out + */ +template +template +inline void MatrixBase::applyOnTheLeft(const EigenBase &other) +{ + other.derived().applyThisOnTheLeft(derived()); +} + } // end namespace Eigen #endif // EIGEN_MATRIXBASE_H diff --git a/nuparu/include/Eigen/src/Core/NestByValue.h b/nuparu/include/Eigen/src/Core/NestByValue.h index a893b176..9aeaf8d1 100644 --- a/nuparu/include/Eigen/src/Core/NestByValue.h +++ b/nuparu/include/Eigen/src/Core/NestByValue.h @@ -40,29 +40,29 @@ template class NestByValue typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) - inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -91,7 +91,7 @@ template class NestByValue m_expression.const_cast_derived().template writePacket(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType m_expression; diff --git a/nuparu/include/Eigen/src/Core/NoAlias.h b/nuparu/include/Eigen/src/Core/NoAlias.h index 768bfb18..0ade7525 100644 --- a/nuparu/include/Eigen/src/Core/NoAlias.h +++ b/nuparu/include/Eigen/src/Core/NoAlias.h @@ -30,62 +30,36 @@ namespace Eigen { template class StorageBase> class NoAlias { - typedef typename ExpressionType::Scalar Scalar; public: - NoAlias(ExpressionType& expression) : m_expression(expression) {} - - /** Behaves like MatrixBase::lazyAssign(other) - * \sa MatrixBase::lazyAssign() */ + typedef typename ExpressionType::Scalar Scalar; + + explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) - { return internal::assign_selector::run(m_expression,other.derived()); } - - /** \sa MatrixBase::operator+= */ + { + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + return m_expression; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { - typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested::type OtherDerivedNested; - typedef typename internal::remove_all::type _OtherDerivedNested; - internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); return m_expression; } - - /** \sa MatrixBase::operator-= */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { - typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested::type OtherDerivedNested; - typedef typename internal::remove_all::type _OtherDerivedNested; - internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); return m_expression; } -#ifndef EIGEN_PARSED_BY_DOXYGEN - template - EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase& other) - { other.derived().addTo(m_expression); return m_expression; } - - template - EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase& other) - { other.derived().subTo(m_expression); return m_expression; } - - template - EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct& other) - { return m_expression.derived() += CoeffBasedProduct(other.lhs(), other.rhs()); } - - template - EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct& other) - { return m_expression.derived() -= CoeffBasedProduct(other.lhs(), other.rhs()); } - - template - ExpressionType& operator=(const ReturnByValue& func) - { return m_expression = func; } -#endif - + EIGEN_DEVICE_FUNC ExpressionType& expression() const { return m_expression; @@ -126,7 +100,7 @@ class NoAlias template NoAlias MatrixBase::noalias() { - return derived(); + return NoAlias(derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/NumTraits.h b/nuparu/include/Eigen/src/Core/NumTraits.h index bac9e50b..1d85dec7 100644 --- a/nuparu/include/Eigen/src/Core/NumTraits.h +++ b/nuparu/include/Eigen/src/Core/NumTraits.h @@ -68,21 +68,40 @@ template struct GenericNumTraits >::type NonInteger; typedef T Nested; - static inline Real epsilon() { return std::numeric_limits::epsilon(); } + EIGEN_DEVICE_FUNC + static inline Real epsilon() + { + #if defined(__CUDA_ARCH__) + return internal::device::numeric_limits::epsilon(); + #else + return std::numeric_limits::epsilon(); + #endif + } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { // make sure to override this for floating-point types return Real(0); } - static inline T highest() { return (std::numeric_limits::max)(); } - static inline T lowest() { return IsInteger ? (std::numeric_limits::min)() : (-(std::numeric_limits::max)()); } - -#ifdef EIGEN2_SUPPORT - enum { - HasFloatingPoint = !IsInteger - }; - typedef NonInteger FloatingPoint; + + + EIGEN_DEVICE_FUNC + static inline T highest() { +#if defined(__CUDA_ARCH__) + return (internal::device::numeric_limits::max)(); +#else + return (std::numeric_limits::max)(); #endif + } + + EIGEN_DEVICE_FUNC + static inline T lowest() { +#if defined(__CUDA_ARCH__) + return IsInteger ? (internal::device::numeric_limits::min)() : (-(internal::device::numeric_limits::max)()); +#else + return IsInteger ? (std::numeric_limits::min)() : (-(std::numeric_limits::max)()); +#endif + } }; template struct NumTraits : GenericNumTraits @@ -91,11 +110,13 @@ template struct NumTraits : GenericNumTraits template<> struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC static inline float dummy_precision() { return 1e-5f; } }; template<> struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC static inline double dummy_precision() { return 1e-12; } }; @@ -136,9 +157,9 @@ struct NumTraits > IsInteger = NumTraits::IsInteger, IsSigned = NumTraits::IsSigned, RequireInitialization = 1, - ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, - AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::AddCost, - MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::MulCost + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost }; static inline RealScalar epsilon() { return NumTraits::epsilon(); } diff --git a/nuparu/include/Eigen/src/Core/PermutationMatrix.h b/nuparu/include/Eigen/src/Core/PermutationMatrix.h index 4fc5dd31..90e1df23 100644 --- a/nuparu/include/Eigen/src/Core/PermutationMatrix.h +++ b/nuparu/include/Eigen/src/Core/PermutationMatrix.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2009 Benoit Jacob -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -13,8 +13,6 @@ namespace Eigen { -template class PermutedImpl; - /** \class PermutationBase * \ingroup Core_Module * @@ -41,10 +39,6 @@ template -struct permut_matrix_product_retval; -template -struct permut_sparsematrix_product_retval; enum PermPermProduct_t {PermPermProduct}; } // end namespace internal @@ -60,19 +54,20 @@ class PermutationBase : public EigenBase typedef typename Traits::IndicesType IndicesType; enum { Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, RowsAtCompileTime = Traits::RowsAtCompileTime, ColsAtCompileTime = Traits::ColsAtCompileTime, MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, MaxColsAtCompileTime = Traits::MaxColsAtCompileTime }; - typedef typename Traits::Scalar Scalar; - typedef typename Traits::Index Index; - typedef Matrix + typedef typename Traits::StorageIndex StorageIndex; + typedef Matrix DenseMatrixType; - typedef PermutationMatrix + typedef PermutationMatrix PlainPermutationType; + typedef PlainPermutationType PlainObject; using Base::derived; + typedef Inverse InverseReturnType; + typedef void Scalar; #endif /** Copies the other permutation into *this */ @@ -118,7 +113,7 @@ class PermutationBase : public EigenBase void evalTo(MatrixBase& other) const { other.setZero(); - for (int i=0; i /** Sets *this to be the identity permutation matrix */ void setIdentity() { - for(Index i = 0; i < size(); ++i) + StorageIndex n = StorageIndex(size()); + for(StorageIndex i = 0; i < n; ++i) indices().coeffRef(i) = i; } @@ -163,18 +159,18 @@ class PermutationBase : public EigenBase * * \returns a reference to *this. * - * \warning This is much slower than applyTranspositionOnTheRight(int,int): + * \warning This is much slower than applyTranspositionOnTheRight(Index,Index): * this has linear complexity and requires a lot of branching. * - * \sa applyTranspositionOnTheRight(int,int) + * \sa applyTranspositionOnTheRight(Index,Index) */ Derived& applyTranspositionOnTheLeft(Index i, Index j) { eigen_assert(i>=0 && j>=0 && i * * This is a fast operation, it only consists in swapping two indices. * - * \sa applyTranspositionOnTheLeft(int,int) + * \sa applyTranspositionOnTheLeft(Index,Index) */ Derived& applyTranspositionOnTheRight(Index i, Index j) { @@ -198,14 +194,14 @@ class PermutationBase : public EigenBase * * \note \note_try_to_help_rvo */ - inline Transpose inverse() const - { return derived(); } + inline InverseReturnType inverse() const + { return InverseReturnType(derived()); } /** \returns the tranpose permutation matrix. * * \note \note_try_to_help_rvo */ - inline Transpose transpose() const - { return derived(); } + inline InverseReturnType transpose() const + { return InverseReturnType(derived()); } /**** multiplication helpers to hopefully get RVO ****/ @@ -215,13 +211,13 @@ class PermutationBase : public EigenBase template void assignTranspose(const PermutationBase& other) { - for (int i=0; i void assignProduct(const Lhs& lhs, const Rhs& rhs) { eigen_assert(lhs.cols() == rhs.rows()); - for (int i=0; i * \note \note_try_to_help_rvo */ template - inline PlainPermutationType operator*(const Transpose >& other) const + inline PlainPermutationType operator*(const InverseImpl& other) const { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); } /** \returns the product of an inverse permutation with another permutation. @@ -248,8 +244,37 @@ class PermutationBase : public EigenBase * \note \note_try_to_help_rvo */ template friend - inline PlainPermutationType operator*(const Transpose >& other, const PermutationBase& perm) + inline PlainPermutationType operator*(const InverseImpl& other, const PermutationBase& perm) { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); } + + /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation. + * + * This function is O(\c n) procedure allocating a buffer of \c n booleans. + */ + Index determinant() const + { + Index res = 1; + Index n = size(); + Matrix mask(n); + mask.fill(false); + Index r = 0; + while(r < n) + { + // search for the next seed + while(r=n) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + mask.coeffRef(k0) = true; + for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k)) + { + mask.coeffRef(k) = true; + res = -res; + } + } + return res; + } protected: @@ -262,7 +287,7 @@ class PermutationBase : public EigenBase * * \param SizeAtCompileTime the number of rows/cols, or Dynamic * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. - * \param IndexType the interger type of the indices + * \param StorageIndex the integer type of the indices * * This class represents a permutation matrix, internally stored as a vector of integers. * @@ -270,24 +295,29 @@ class PermutationBase : public EigenBase */ namespace internal { -template -struct traits > - : traits > +template +struct traits > + : traits > { - typedef IndexType Index; - typedef Matrix IndicesType; + typedef PermutationStorage StorageKind; + typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } -template -class PermutationMatrix : public PermutationBase > +template +class PermutationMatrix : public PermutationBase > { typedef PermutationBase Base; typedef internal::traits Traits; public: + typedef const PermutationMatrix& Nested; + #ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Traits::IndicesType IndicesType; + typedef typename Traits::StorageIndex StorageIndex; #endif inline PermutationMatrix() @@ -295,8 +325,10 @@ class PermutationMatrix : public PermutationBase::highest()); + } /** Copy constructor. */ template @@ -317,7 +349,7 @@ class PermutationMatrix : public PermutationBase - explicit inline PermutationMatrix(const MatrixBase& a_indices) : m_indices(a_indices) + explicit inline PermutationMatrix(const MatrixBase& indices) : m_indices(indices) {} /** Convert the Transpositions \a tr to a permutation matrix */ @@ -364,10 +396,13 @@ class PermutationMatrix : public PermutationBase - PermutationMatrix(const Transpose >& other) - : m_indices(other.nestedPermutation().size()) + PermutationMatrix(const InverseImpl& other) + : m_indices(other.derived().nestedExpression().size()) { - for (int i=0; i::highest()); + StorageIndex end = StorageIndex(m_indices.size()); + for (StorageIndex i=0; i PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) @@ -384,18 +419,20 @@ class PermutationMatrix : public PermutationBase -struct traits,_PacketAccess> > - : traits > +template +struct traits,_PacketAccess> > + : traits > { - typedef IndexType Index; - typedef Map, _PacketAccess> IndicesType; + typedef PermutationStorage StorageKind; + typedef Map, _PacketAccess> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } -template -class Map,_PacketAccess> - : public PermutationBase,_PacketAccess> > +template +class Map,_PacketAccess> + : public PermutationBase,_PacketAccess> > { typedef PermutationBase Base; typedef internal::traits Traits; @@ -403,14 +440,14 @@ class Map, #ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Traits::IndicesType IndicesType; - typedef typename IndicesType::Scalar Index; + typedef typename IndicesType::Scalar StorageIndex; #endif - inline Map(const Index* indicesPtr) + inline Map(const StorageIndex* indicesPtr) : m_indices(indicesPtr) {} - inline Map(const Index* indicesPtr, Index size) + inline Map(const StorageIndex* indicesPtr, Index size) : m_indices(indicesPtr,size) {} @@ -457,24 +494,21 @@ class Map, * \sa class PermutationBase, class PermutationMatrix */ -struct PermutationStorage {}; - template class TranspositionsWrapper; namespace internal { template struct traits > { typedef PermutationStorage StorageKind; - typedef typename _IndicesType::Scalar Scalar; - typedef typename _IndicesType::Scalar Index; + typedef void Scalar; + typedef typename _IndicesType::Scalar StorageIndex; typedef _IndicesType IndicesType; enum { RowsAtCompileTime = _IndicesType::SizeAtCompileTime, ColsAtCompileTime = _IndicesType::SizeAtCompileTime, - MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime, - Flags = 0, - CoeffReadCost = _IndicesType::CoeffReadCost + MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + Flags = 0 }; }; } @@ -490,8 +524,8 @@ class PermutationWrapper : public PermutationBase -inline const internal::permut_matrix_product_retval -operator*(const MatrixBase& matrix, - const PermutationBase &permutation) +template +EIGEN_DEVICE_FUNC +const Product +operator*(const MatrixBase &matrix, + const PermutationBase& permutation) { - return internal::permut_matrix_product_retval - - (permutation.derived(), matrix.derived()); + return Product + (matrix.derived(), permutation.derived()); } /** \returns the matrix with the permutation applied to the rows. */ -template -inline const internal::permut_matrix_product_retval - +template +EIGEN_DEVICE_FUNC +const Product operator*(const PermutationBase &permutation, - const MatrixBase& matrix) + const MatrixBase& matrix) { - return internal::permut_matrix_product_retval - - (permutation.derived(), matrix.derived()); + return Product + (permutation.derived(), matrix.derived()); } -namespace internal { - -template -struct traits > -{ - typedef typename MatrixType::PlainObject ReturnType; -}; -template -struct permut_matrix_product_retval - : public ReturnByValue > +template +class InverseImpl + : public EigenBase > { - typedef typename remove_all::type MatrixTypeNestedCleaned; - typedef typename MatrixType::Index Index; - - permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix) - : m_permutation(perm), m_matrix(matrix) - {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - template inline void evalTo(Dest& dst) const - { - const Index n = Side==OnTheLeft ? rows() : cols(); - - if(is_same::value && extract_data(dst) == extract_data(m_matrix)) - { - // apply the permutation inplace - Matrix mask(m_permutation.size()); - mask.fill(false); - Index r = 0; - while(r < m_permutation.size()) - { - // search for the next seed - while(r=m_permutation.size()) - break; - // we got one, let's follow it until we are back to the seed - Index k0 = r++; - Index kPrev = k0; - mask.coeffRef(k0) = true; - for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k)) - { - Block(dst, k) - .swap(Block - (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); - - mask.coeffRef(k) = true; - kPrev = k; - } - } - } - else - { - for(int i = 0; i < n; ++i) - { - Block - (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i) - - = - - Block - (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i); - } - } - } - - protected: - const PermutationType& m_permutation; - typename MatrixType::Nested m_matrix; -}; - -/* Template partial specialization for transposed/inverse permutations */ - -template -struct traits > > - : traits -{}; - -} // end namespace internal - -template -class Transpose > - : public EigenBase > > -{ - typedef Derived PermutationType; - typedef typename PermutationType::IndicesType IndicesType; typedef typename PermutationType::PlainPermutationType PlainPermutationType; + typedef internal::traits PermTraits; + protected: + InverseImpl() {} public: + typedef Inverse InverseType; + using EigenBase >::derived; #ifndef EIGEN_PARSED_BY_DOXYGEN - typedef internal::traits Traits; - typedef typename Derived::DenseMatrixType DenseMatrixType; + typedef typename PermutationType::DenseMatrixType DenseMatrixType; enum { - Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, - RowsAtCompileTime = Traits::RowsAtCompileTime, - ColsAtCompileTime = Traits::ColsAtCompileTime, - MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + RowsAtCompileTime = PermTraits::RowsAtCompileTime, + ColsAtCompileTime = PermTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime }; - typedef typename Traits::Scalar Scalar; #endif - Transpose(const PermutationType& p) : m_permutation(p) {} - - inline int rows() const { return m_permutation.rows(); } - inline int cols() const { return m_permutation.cols(); } - #ifndef EIGEN_PARSED_BY_DOXYGEN template void evalTo(MatrixBase& other) const { other.setZero(); - for (int i=0; i friend - inline const internal::permut_matrix_product_retval - operator*(const MatrixBase& matrix, const Transpose& trPerm) + const Product + operator*(const MatrixBase& matrix, const InverseType& trPerm) { - return internal::permut_matrix_product_retval(trPerm.m_permutation, matrix.derived()); + return Product(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template - inline const internal::permut_matrix_product_retval + const Product operator*(const MatrixBase& matrix) const { - return internal::permut_matrix_product_retval(m_permutation, matrix.derived()); + return Product(derived(), matrix.derived()); } - - const PermutationType& nestedPermutation() const { return m_permutation; } - - protected: - const PermutationType& m_permutation; }; template @@ -683,6 +625,12 @@ const PermutationWrapper MatrixBase::asPermutation() con return derived(); } +namespace internal { + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/nuparu/include/Eigen/src/Core/PlainObjectBase.h b/nuparu/include/Eigen/src/Core/PlainObjectBase.h index af0a479c..1225e85b 100644 --- a/nuparu/include/Eigen/src/Core/PlainObjectBase.h +++ b/nuparu/include/Eigen/src/Core/PlainObjectBase.h @@ -28,6 +28,7 @@ namespace internal { template struct check_rows_cols_for_overflow { template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE void run(Index, Index) { } @@ -35,6 +36,7 @@ template struct check_rows_cols_for_overflow { template<> struct check_rows_cols_for_overflow { template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols) { // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 @@ -47,7 +49,10 @@ template<> struct check_rows_cols_for_overflow { } }; -template struct conservative_resize_like_impl; +template +struct conservative_resize_like_impl; template struct matrix_swap_impl; @@ -64,8 +69,9 @@ template struct m #ifdef EIGEN_PARSED_BY_DOXYGEN namespace internal { -// this is a warkaround to doxygen not being able to understand the inheritence logic +// this is a workaround to doxygen not being able to understand the inheritance logic // when it is hidden by the dense_xpr_base helper struct. +/** This class is just a workaround for Doxygen and it does not not actually exist. */ template struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase {}; /** This class is just a workaround for Doxygen and it does not not actually exist. */ template @@ -90,8 +96,8 @@ class PlainObjectBase : public internal::dense_xpr_base::type typedef typename internal::dense_xpr_base::type Base; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef Derived DenseType; @@ -110,28 +116,36 @@ class PlainObjectBase : public internal::dense_xpr_base::type typedef Eigen::Map MapType; friend class Eigen::Map; typedef const Eigen::Map ConstMapType; - friend class Eigen::Map; - typedef Eigen::Map AlignedMapType; - friend class Eigen::Map; - typedef const Eigen::Map ConstAlignedMapType; +#if EIGEN_MAX_ALIGN_BYTES>0 + // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice. + friend class Eigen::Map; + friend class Eigen::Map; +#endif + typedef Eigen::Map AlignedMapType; + typedef const Eigen::Map ConstAlignedMapType; template struct StridedMapType { typedef Eigen::Map type; }; template struct StridedConstMapType { typedef Eigen::Map type; }; - template struct StridedAlignedMapType { typedef Eigen::Map type; }; - template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; + template struct StridedAlignedMapType { typedef Eigen::Map type; }; + template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; protected: DenseStorage m_storage; public: - enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits::Flags & AlignedBit) != 0 }; + enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits::Alignment>0) }; EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) + EIGEN_DEVICE_FUNC Base& base() { return *static_cast(this); } + EIGEN_DEVICE_FUNC const Base& base() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const { if(Flags & RowMajorBit) @@ -140,11 +154,13 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { return m_storage.data()[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId) { if(Flags & RowMajorBit) @@ -153,11 +169,13 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_storage.data()[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const { if(Flags & RowMajorBit) @@ -166,6 +184,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const { return m_storage.data()[index]; @@ -206,11 +225,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type } /** \returns a const pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE const Scalar *data() const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } /** \returns a pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE Scalar *data() + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } /** Resizes \c *this to a \a rows x \a cols matrix. @@ -229,22 +248,22 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t) */ - EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols) - { - eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime) - && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,nbCols==ColsAtCompileTime) - && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,nbRows<=MaxRowsAtCompileTime) - && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime) - && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array."); - internal::check_rows_cols_for_overflow::run(nbRows, nbCols); + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void resize(Index rows, Index cols) + { + eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime) + && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime) + && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array."); + internal::check_rows_cols_for_overflow::run(rows, cols); #ifdef EIGEN_INITIALIZE_COEFFS - Index size = nbRows*nbCols; + Index size = rows*cols; bool size_changed = size != this->size(); - m_storage.resize(size, nbRows, nbCols); + m_storage.resize(size, rows, cols); if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED #else - internal::check_rows_cols_for_overflow::run(nbRows, nbCols); - m_storage.resize(nbRows*nbCols, nbRows, nbCols); + m_storage.resize(rows*cols, rows, cols); #endif } @@ -259,6 +278,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t) */ + EIGEN_DEVICE_FUNC inline void resize(Index size) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase) @@ -283,9 +303,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index) */ - inline void resize(NoChange_t, Index nbCols) + EIGEN_DEVICE_FUNC + inline void resize(NoChange_t, Index cols) { - resize(rows(), nbCols); + resize(rows(), cols); } /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange @@ -296,9 +317,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index) */ - inline void resize(Index nbRows, NoChange_t) + EIGEN_DEVICE_FUNC + inline void resize(Index rows, NoChange_t) { - resize(nbRows, cols()); + resize(rows, cols()); } /** Resizes \c *this to have the same dimensions as \a other. @@ -309,6 +331,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * remain row-vectors and vectors remain vectors. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) { const OtherDerived& other = _other.derived(); @@ -336,9 +359,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * Matrices are resized relative to the top-left element. In case values need to be * appended to the matrix they will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols) { - internal::conservative_resize_like_impl::run(*this, nbRows, nbCols); + internal::conservative_resize_like_impl::run(*this, rows, cols); } /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. @@ -348,10 +372,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * In case the matrix is growing, new rows will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t) { // Note: see the comment in conservativeResize(Index,Index) - conservativeResize(nbRows, cols()); + conservativeResize(rows, cols()); } /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. @@ -361,10 +386,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * In case the matrix is growing, new columns will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols) { // Note: see the comment in conservativeResize(Index,Index) - conservativeResize(rows(), nbCols); + conservativeResize(rows(), cols); } /** Resizes the vector to \a size while retaining old values. @@ -375,6 +401,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * When values are appended, they will be uninitialized. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResize(Index size) { internal::conservative_resize_like_impl::run(*this, size); @@ -390,6 +417,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * appended to the matrix they will copied from \c other. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase& other) { internal::conservative_resize_like_impl::run(*this, other); @@ -398,6 +426,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) { return _set(other); @@ -405,6 +434,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** \sa MatrixBase::lazyAssign() */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase& other) { _resize_to_match(other); @@ -412,12 +442,18 @@ class PlainObjectBase : public internal::dense_xpr_base::type } template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue& func) { resize(func.rows(), func.cols()); return Base::operator=(func); } + // Prevent user from trying to instantiate PlainObjectBase objects + // by making all its constructor protected. See bug 1074. + protected: + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() { // _check_template_params(); @@ -427,38 +463,85 @@ class PlainObjectBase : public internal::dense_xpr_base::type #ifndef EIGEN_PARSED_BY_DOXYGEN // FIXME is it still needed ? /** \internal */ - PlainObjectBase(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) : m_storage(internal::constructor_without_unaligned_array_assert()) { // _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } #endif - EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols) - : m_storage(a_size, nbRows, nbCols) +#ifdef EIGEN_HAVE_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + PlainObjectBase(PlainObjectBase&& other) + : m_storage( std::move(other.m_storage) ) + { + } + + EIGEN_DEVICE_FUNC + PlainObjectBase& operator=(PlainObjectBase&& other) + { + using std::swap; + swap(m_storage, other.m_storage); + return *this; + } +#endif + + /** Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other) + : Base(), m_storage(other.m_storage) { } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) + : m_storage(size, rows, cols) { // _check_template_params(); // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } - /** \copydoc MatrixBase::operator=(const EigenBase&) - */ + /** \sa PlainObjectBase::operator=(const EigenBase&) */ template - EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase &other) + : m_storage() { - _resize_to_match(other); - Base::operator=(other.derived()); - return this->derived(); + _check_template_params(); + resizeLike(other); + _set_noalias(other); } - /** \sa MatrixBase::operator=(const EigenBase&) */ + /** \sa PlainObjectBase::operator=(const EigenBase&) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) - : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) + : m_storage() { _check_template_params(); - internal::check_rows_cols_for_overflow::run(other.derived().rows(), other.derived().cols()); + resizeLike(other); + *this = other.derived(); + } + /** \brief Copy constructor with in-place evaluation */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue& other) + { + _check_template_params(); + // FIXME this does not automatically transpose vectors if necessary + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + public: + + /** \copydoc MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + { + _resize_to_match(other); Base::operator=(other.derived()); + return this->derived(); } /** \name Map @@ -535,16 +618,16 @@ class PlainObjectBase : public internal::dense_xpr_base::type //@} using Base::setConstant; - Derived& setConstant(Index size, const Scalar& value); - Derived& setConstant(Index rows, Index cols, const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value); using Base::setZero; - Derived& setZero(Index size); - Derived& setZero(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setZero(Index size); + EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols); using Base::setOnes; - Derived& setOnes(Index size); - Derived& setOnes(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setOnes(Index size); + EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols); using Base::setRandom; Derived& setRandom(Index size); @@ -563,6 +646,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * remain row-vectors and vectors remain vectors. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase& other) { #ifdef EIGEN_NO_AUTOMATIC_RESIZING @@ -589,25 +673,23 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \internal */ + // aliasing is dealt once in internall::call_assignment + // so at this stage we have to assume aliasing... and resising has to be done later. template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set(const DenseBase& other) { - _set_selector(other.derived(), typename internal::conditional(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type()); + internal::call_assignment(this->derived(), other.derived()); return this->derived(); } - template - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); } - - template - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); } - /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which * is the case when creating a new matrix) so one can enforce lazy evaluation. * * \sa operator=(const MatrixBase&), _set() */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase& other) { // I don't think we need this resize call since the lazyAssign will anyways resize @@ -615,40 +697,166 @@ class PlainObjectBase : public internal::dense_xpr_base::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - return internal::assign_selector::run(this->derived(), other.derived()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + return this->derived(); } template - EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if::type* = 0) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if::type* = 0) { EIGEN_STATIC_ASSERT(bool(NumTraits::IsInteger) && bool(NumTraits::IsInteger), FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) - resize(nbRows,nbCols); + resize(rows,cols); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if::type* = 0) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) m_storage.data()[0] = val0; m_storage.data()[1] = val1; } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==2,T1>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, + // then the argument is meant to be the size of the object. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible::value) + && ((!internal::is_same::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0) + { + // NOTE MSVC 2008 complains if we directly put bool(NumTraits::IsInteger) as the EIGEN_STATIC_ASSERT argument. + const bool is_integer = NumTraits::IsInteger; + EIGEN_STATIC_ASSERT(is_integer, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(size); + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if::value,T>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = val0; + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==1 + && internal::is_convertible::value,T*>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = Scalar(val0); + } + + // Initialize a fixed size matrix from a pointer to raw data + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar* data){ + this->_set_noalias(ConstMapType(data)); + } + + // Initialize an arbitrary matrix from a dense expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const DenseBase& other){ + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from a generic Eigen expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const EigenBase& other){ + this->derived() = other; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const ReturnByValue& other) + { + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const RotationBase& r) + { + this->derived() = r; + } + + // For fixed -size arrays: + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, + typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T>::type* = 0) + { + Base::setConstant(val0); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T*>::type* = 0) + { + Base::setConstant(val0); + } + template friend struct internal::matrix_swap_impl; - /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. + public: + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal + * \brief Override DenseBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. */ template - void _swap(DenseBase const & other) + EIGEN_DEVICE_FUNC + void swap(DenseBase & other) { enum { SwapPointers = internal::is_same::value && Base::SizeAtCompileTime==Dynamic }; - internal::matrix_swap_impl::run(this->derived(), other.const_cast_derived()); + internal::matrix_swap_impl::run(this->derived(), other.derived()); } - - public: -#ifndef EIGEN_PARSED_BY_DOXYGEN + + /** \internal + * \brief const version forwarded to DenseBase::swap + */ + template + EIGEN_DEVICE_FUNC + void swap(DenseBase const & other) + { Base::swap(other.derived()); } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void _check_template_params() { EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor) @@ -662,16 +870,16 @@ class PlainObjectBase : public internal::dense_xpr_base::type && (Options & (DontAlign|RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS) } -#endif -private: - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 1 }; +#endif }; +namespace internal { + template -struct internal::conservative_resize_like_impl +struct conservative_resize_like_impl { - typedef typename Derived::Index Index; static void run(DenseBase& _this, Index rows, Index cols) { if (_this.rows() == rows && _this.cols() == cols) return; @@ -729,12 +937,14 @@ struct internal::conservative_resize_like_impl } }; -namespace internal { - +// Here, the specialization for vectors inherits from the general matrix case +// to allow calling .conservativeResize(rows,cols) on vectors. template struct conservative_resize_like_impl + : conservative_resize_like_impl { - typedef typename Derived::Index Index; + using conservative_resize_like_impl::run; + static void run(DenseBase& _this, Index size) { const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size; @@ -760,6 +970,7 @@ struct conservative_resize_like_impl template struct matrix_swap_impl { + EIGEN_DEVICE_FUNC static inline void run(MatrixTypeA& a, MatrixTypeB& b) { a.base().swap(b); @@ -769,6 +980,7 @@ struct matrix_swap_impl template struct matrix_swap_impl { + EIGEN_DEVICE_FUNC static inline void run(MatrixTypeA& a, MatrixTypeB& b) { static_cast(a).m_storage.swap(static_cast(b).m_storage); diff --git a/nuparu/include/Eigen/src/Core/Product.h b/nuparu/include/Eigen/src/Core/Product.h new file mode 100644 index 00000000..fdd2fed3 --- /dev/null +++ b/nuparu/include/Eigen/src/Core/Product.h @@ -0,0 +1,222 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PRODUCT_H +#define EIGEN_PRODUCT_H + +namespace Eigen { + +template class ProductImpl; + +/** \class Product + * \ingroup Core_Module + * + * \brief Expression of the product of two arbitrary matrices or vectors + * + * \param Lhs the type of the left-hand side expression + * \param Rhs the type of the right-hand side expression + * + * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct + * + */ + + +namespace internal { + +// Determine the scalar of Product. This is normally the same as Lhs::Scalar times +// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. +template::Shape, + typename RhsShape = typename evaluator_traits::Shape > +struct product_result_scalar +{ + typedef typename scalar_product_traits::ReturnType Scalar; +}; + +template +struct product_result_scalar +{ + typedef typename Rhs::Scalar Scalar; +}; + +template + struct product_result_scalar +{ + typedef typename Lhs::Scalar Scalar; +}; + +template +struct product_result_scalar +{ + typedef typename Rhs::Scalar Scalar; +}; + +template + struct product_result_scalar +{ + typedef typename Lhs::Scalar Scalar; +}; + +template +struct traits > +{ + typedef typename remove_all::type LhsCleaned; + typedef typename remove_all::type RhsCleaned; + typedef traits LhsTraits; + typedef traits RhsTraits; + + typedef MatrixXpr XprKind; + + typedef typename product_result_scalar::Scalar Scalar; + typedef typename product_promote_storage_type::ret>::ret StorageKind; + typedef typename promote_index_type::type StorageIndex; + + enum { + RowsAtCompileTime = LhsTraits::RowsAtCompileTime, + ColsAtCompileTime = RhsTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime, + + // FIXME: only needed by GeneralMatrixMatrixTriangular + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime), + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit)) + || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit + : NoPreferredStorageOrderBit + }; +}; + +} // end namespace internal + + +template +class Product : public ProductImpl<_Lhs,_Rhs,Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits<_Rhs>::StorageKind, + internal::product_type<_Lhs,_Rhs>::ret>::ret> +{ + public: + + typedef _Lhs Lhs; + typedef _Rhs Rhs; + + typedef typename ProductImpl< + Lhs, Rhs, Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + internal::product_type::ret>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Product) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) + { + eigen_assert(lhs.cols() == rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } + + protected: + + LhsNested m_lhs; + RhsNested m_rhs; +}; + +namespace internal { + +template::ret> +class dense_product_base + : public internal::dense_xpr_base >::type +{}; + +/** Convertion to scalar for inner-products */ +template +class dense_product_base + : public internal::dense_xpr_base >::type +{ + typedef Product ProductXpr; + typedef typename internal::dense_xpr_base::type Base; +public: + using Base::derived; + typedef typename Base::Scalar Scalar; + + operator const Scalar() const + { + return internal::evaluator(derived()).coeff(0,0); + } +}; + +} // namespace internal + +// Generic API dispatcher +template +class ProductImpl : public internal::generic_xpr_base, MatrixXpr, StorageKind>::type +{ + public: + typedef typename internal::generic_xpr_base, MatrixXpr, StorageKind>::type Base; +}; + +template +class ProductImpl + : public internal::dense_product_base +{ + typedef Product Derived; + + public: + + typedef typename internal::dense_product_base Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + protected: + enum { + IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && + (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic), + EnableCoeff = IsOneByOne || Option==LazyProduct + }; + + public: + + EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(i); + } + + +}; + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/nuparu/include/Eigen/src/Core/ProductBase.h b/nuparu/include/Eigen/src/Core/ProductBase.h deleted file mode 100644 index a494b5f8..00000000 --- a/nuparu/include/Eigen/src/Core/ProductBase.h +++ /dev/null @@ -1,278 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PRODUCTBASE_H -#define EIGEN_PRODUCTBASE_H - -namespace Eigen { - -/** \class ProductBase - * \ingroup Core_Module - * - */ - -namespace internal { -template -struct traits > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<_Lhs>::type Lhs; - typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; - enum { - RowsAtCompileTime = traits::RowsAtCompileTime, - ColsAtCompileTime = traits::ColsAtCompileTime, - MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = traits::MaxColsAtCompileTime, - Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) - | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, - // Note that EvalBeforeNestingBit and NestByRefBit - // are not used in practice because nested is overloaded for products - CoeffReadCost = 0 // FIXME why is it needed ? - }; -}; -} - -#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \ - typedef ProductBase Base; \ - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Base::LhsNested LhsNested; \ - typedef typename Base::_LhsNested _LhsNested; \ - typedef typename Base::LhsBlasTraits LhsBlasTraits; \ - typedef typename Base::ActualLhsType ActualLhsType; \ - typedef typename Base::_ActualLhsType _ActualLhsType; \ - typedef typename Base::RhsNested RhsNested; \ - typedef typename Base::_RhsNested _RhsNested; \ - typedef typename Base::RhsBlasTraits RhsBlasTraits; \ - typedef typename Base::ActualRhsType ActualRhsType; \ - typedef typename Base::_ActualRhsType _ActualRhsType; \ - using Base::m_lhs; \ - using Base::m_rhs; - -template -class ProductBase : public MatrixBase -{ - public: - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase) - - typedef typename Lhs::Nested LhsNested; - typedef typename internal::remove_all::type _LhsNested; - typedef internal::blas_traits<_LhsNested> LhsBlasTraits; - typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; - typedef typename internal::remove_all::type _ActualLhsType; - typedef typename internal::traits::Scalar LhsScalar; - - typedef typename Rhs::Nested RhsNested; - typedef typename internal::remove_all::type _RhsNested; - typedef internal::blas_traits<_RhsNested> RhsBlasTraits; - typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef typename internal::remove_all::type _ActualRhsType; - typedef typename internal::traits::Scalar RhsScalar; - - // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once - typedef CoeffBasedProduct FullyLazyCoeffBaseProductType; - - public: - - typedef typename Base::PlainObject PlainObject; - - ProductBase(const Lhs& a_lhs, const Rhs& a_rhs) - : m_lhs(a_lhs), m_rhs(a_rhs) - { - eigen_assert(a_lhs.cols() == a_rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); } - - const _LhsNested& lhs() const { return m_lhs; } - const _RhsNested& rhs() const { return m_rhs; } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - operator const PlainObject& () const - { - m_result.resize(m_lhs.rows(), m_rhs.cols()); - derived().evalTo(m_result); - return m_result; - } - - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - template - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - const Diagonal diagonal(Index index) const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); } - - // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression - typename Base::CoeffReturnType coeff(Index row, Index col) const - { -#ifdef EIGEN2_SUPPORT - return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum(); -#else - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(row,col); -#endif - } - - typename Base::CoeffReturnType coeff(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(i); - } - - const Scalar& coeffRef(Index row, Index col) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(row,col); - } - - const Scalar& coeffRef(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(i); - } - - protected: - - LhsNested m_lhs; - RhsNested m_rhs; - - mutable PlainObject m_result; -}; - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -namespace internal { -template -struct nested, N, PlainObject> -{ - typedef PlainObject const& type; -}; -} - -template -class ScaledProduct; - -// Note that these two operator* functions are not defined as member -// functions of ProductBase, because, otherwise we would have to -// define all overloads defined in MatrixBase. Furthermore, Using -// "using Base::operator*" would not work with MSVC. -// -// Also note that here we accept any compatible scalar types -template -const ScaledProduct -operator*(const ProductBase& prod, const typename Derived::Scalar& x) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const ProductBase& prod, const typename Derived::RealScalar& x) -{ return ScaledProduct(prod.derived(), x); } - - -template -const ScaledProduct -operator*(const typename Derived::Scalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const typename Derived::RealScalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -namespace internal { -template -struct traits > - : traits, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> > -{ - typedef typename traits::StorageKind StorageKind; -}; -} - -template -class ScaledProduct - : public ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> -{ - public: - typedef ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::PlainObject PlainObject; -// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct) - - ScaledProduct(const NestedProduct& prod, const Scalar& x) - : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {} - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); } - - const Scalar& alpha() const { return m_alpha; } - - protected: - const NestedProduct& m_prod; - Scalar m_alpha; -}; - -/** \internal - * Overloaded to perform an efficient C = (A*B).lazy() */ -template -template -Derived& MatrixBase::lazyAssign(const ProductBase& other) -{ - other.derived().evalTo(derived()); - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_PRODUCTBASE_H diff --git a/nuparu/include/Eigen/src/Core/ProductEvaluators.h b/nuparu/include/Eigen/src/Core/ProductEvaluators.h new file mode 100755 index 00000000..794038a2 --- /dev/null +++ b/nuparu/include/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,1061 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2011 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * Evaluator of a product expression. + * Since products require special treatments to handle all possible cases, + * we simply deffer the evaluation logic to a product_evaluator class + * which offers more partial specialization possibilities. + * + * \sa class product_evaluator + */ +template +struct evaluator > + : public product_evaluator > +{ + typedef Product XprType; + typedef product_evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// TODO we should apply that rule only if that's really helpful +template +struct evaluator_traits, const Product > > + : evaluator_traits_base, const Product > > +{ + enum { AssumeAliasing = 1 }; +}; +template +struct evaluator, const Product > > + : public evaluator,const Lhs>, Rhs, DefaultProduct> > +{ + typedef CwiseUnaryOp, const Product > XprType; + typedef evaluator,const Lhs>, Rhs, DefaultProduct> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + {} +}; + + +template +struct evaluator, DiagIndex> > + : public evaluator, DiagIndex> > +{ + typedef Diagonal, DiagIndex> XprType; + typedef evaluator, DiagIndex> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(Diagonal, DiagIndex>( + Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), + xpr.index() )) + {} +}; + + +// Helper class to perform a matrix product with the destination at hand. +// Depending on the sizes of the factors, there are different evaluation strategies +// as controlled by internal::product_type. +template< typename Lhs, typename Rhs, + typename LhsShape = typename evaluator_traits::Shape, + typename RhsShape = typename evaluator_traits::Shape, + int ProductType = internal::product_type::value> +struct generic_product_impl; + +template +struct evaluator_traits > + : evaluator_traits_base > +{ + enum { AssumeAliasing = 1 }; +}; + +template +struct evaluator_traits > + : evaluator_traits_base > +{ + enum { AssumeAliasing = 0 }; +}; + +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template +struct product_evaluator, ProductTag, LhsShape, RhsShape> + : public evaluator::PlainObject> +{ + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + +// FIXME shall we handle nested_eval here?, +// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) +// typedef typename internal::nested_eval::type LhsNested; +// typedef typename internal::nested_eval::type RhsNested; +// typedef typename internal::remove_all::type LhsNestedCleaned; +// typedef typename internal::remove_all::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl::evalTo(m_result, lhs, rhs); + + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +// Dense = Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense += Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::add_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl::addTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense -= Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::sub_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> +{ + typedef Product SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + // FIXME shall we handle nested_eval here? + generic_product_impl::subTo(dst, src.lhs(), src.rhs()); + } +}; + + +// Dense ?= scalar * Product +// TODO we should apply that rule if that's really helpful +// for instance, this is not good for inner products +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> +struct Assignment, + const Product >, AssignFunc, Dense2Dense, Scalar> +{ + typedef CwiseUnaryOp, + const Product > SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + { + call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); + } +}; + +//---------------------------------------- +// Catch "Dense ?= xpr + Product<>" expression to save one temporary +// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct + +template +struct assignment_from_xpr_plus_product +{ + typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + { + call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.rhs(), Func2()); + } +}; + +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::assign_op, internal::add_assign_op > +{}; +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::add_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::add_assign_op, internal::add_assign_op > +{}; +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> +struct Assignment, const OtherXpr, + const Product >, internal::sub_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, Scalar, internal::sub_assign_op, internal::sub_assign_op > +{}; +//---------------------------------------- + +template +struct generic_product_impl +{ + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } +}; + + +/*********************************************************************** +* Implementation of outer dense * dense vector product +***********************************************************************/ + +// Column major result +template +EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +{ + evaluator rhsEval(rhs); + typename nested_eval::type actual_lhs(lhs); + // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + const Index cols = dst.cols(); + for (Index j=0; j +EIGEN_DONT_INLINE void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) +{ + evaluator lhsEval(lhs); + typename nested_eval::type actual_rhs(rhs); + // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + const Index rows = dst.rows(); + for (Index i=0; i +struct generic_product_impl +{ + template struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + typedef typename Product::Scalar Scalar; + + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct adds { + Scalar m_scale; + explicit adds(const Scalar& s) : m_scale(s) {} + template void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); + } + + template + static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); + } + +}; + + +// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo +template +struct generic_product_impl_base +{ + typedef typename Product::Scalar Scalar; + + template + static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + + template + static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } + + template + static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } + +}; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef typename internal::conditional::type MatrixType; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::gemv_dense_selector::HasUsableDirectAccess) + >::run(lhs, rhs, dst, alpha); + } +}; + +template +struct generic_product_impl +{ + typedef typename Product::Scalar Scalar; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // Same as: dst.noalias() = lhs.lazyProduct(rhs); + // but easier on the compiler side + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() += lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() -= lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + } + +// template +// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) +// { dst.noalias() += alpha * lhs.lazyProduct(rhs); } +}; + +// This specialization enforces the use of a coefficient-based evaluation strategy +template +struct generic_product_impl + : generic_product_impl {}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template +struct etor_product_coeff_impl; + +template +struct etor_product_packet_impl; + +template +struct product_evaluator, ProductTag, DenseShape, DenseShape> + : evaluator_base > +{ + typedef Product XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, + // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. + m_innerDim(xpr.lhs().cols()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + // Everything below here is taken from CoeffBasedProduct.h + + typedef typename internal::nested_eval::type LhsNested; + typedef typename internal::nested_eval::type RhsNested; + + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + typedef evaluator LhsEtorType; + typedef evaluator RhsEtorType; + + enum { + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, + + PacketSize = packet_traits::size, + + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, + RhsCoeffReadCost = RhsEtorType::CoeffReadCost, + CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost + : InnerSize == Dynamic ? HugeCost + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost, + + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsAlignment = LhsEtorType::Alignment, + RhsAlignment = RhsEtorType::Alignment, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + SameType = is_same::value, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (RhsRowMajor && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) + | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), + + LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), + RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), + + Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) + : 0, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (InnerSize % packet_traits::size == 0) + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const + { + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector + */ + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const + { + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + template + const PacketType packet(Index row, Index col) const + { + PacketType res; + typedef etor_product_packet_impl PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template + const PacketType packet(Index index) const + { + const Index row = RowsAtCompileTime == 1 ? 0 : index; + const Index col = RowsAtCompileTime == 1 ? index : 0; + return packet(row,col); + } + +protected: + const LhsNested m_lhs; + const RhsNested m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +template +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> +{ + typedef Product XprType; + typedef Product BaseProduct; + typedef product_evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(BaseProduct(xpr.lhs(),xpr.rhs())) + {} +}; + +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1(lhs.coeff(row, UnrollingIndex-1)), rhs.template packet(UnrollingIndex-1, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet(row, UnrollingIndex-1), pset1(rhs.coeff(UnrollingIndex-1, col)), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(pset1(lhs.coeff(row, 0)),rhs.template packet(0, col)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(lhs.template packet(row, 0), pset1(rhs.coeff(0, col))); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(0); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(0); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(0); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(0); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); + } +}; + + +/*************************************************************************** +* Triangular products +***************************************************************************/ +template +struct triangular_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl + ::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* SelfAdjoint products +***************************************************************************/ +template +struct selfadjoint_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* Diagonal products +***************************************************************************/ + +template +struct diagonal_product_evaluator_base + : evaluator_base +{ + typedef typename scalar_product_traits::ReturnType Scalar; +public: + enum { + CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, + + MatrixFlags = evaluator::Flags, + DiagFlags = evaluator::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), + Alignment = evaluator::Alignment + }; + + diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) + : m_diagImpl(diag), m_matImpl(mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + } + +protected: + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const + { + return internal::pmul(m_matImpl.template packet(row, col), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const + { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + }; + return internal::pmul(m_matImpl.template packet(row, col), + m_diagImpl.template packet(id)); + } + + evaluator m_diagImpl; + evaluator m_matImpl; +}; + +// diagonal * dense +template +struct product_evaluator, ProductTag, DiagonalShape, DenseShape> + : diagonal_product_evaluator_base, OnTheLeft> +{ + typedef diagonal_product_evaluator_base, OnTheLeft> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { + StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.rhs(), xpr.lhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. + return this->template packet_impl(row,col, row, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +// dense * diagonal +template +struct product_evaluator, ProductTag, DenseShape, DiagonalShape> + : diagonal_product_evaluator_base, OnTheRight> +{ + typedef diagonal_product_evaluator_base, OnTheRight> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + return this->template packet_impl(row,col, col, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +/*************************************************************************** +* Products with permutation matrices +***************************************************************************/ + +/** \internal + * \class permutation_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h + */ +template +struct permutation_matrix_product; + +template +struct permutation_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) + { + MatrixType mat(xpr); + const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); + // FIXME we need an is_same for expression that is not sensitive to constness. For instance + // is_same_xpr, Block >::value should be true. + //if(is_same::value && extract_data(dst) == extract_data(mat)) + if(is_same_dense(dst, mat)) + { + // apply the permutation inplace + Matrix mask(perm.size()); + mask.fill(false); + Index r = 0; + while(r < perm.size()) + { + // search for the next seed + while(r=perm.size()) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + Index kPrev = k0; + mask.coeffRef(k0) = true; + for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k)) + { + Block(dst, k) + .swap(Block + (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); + + mask.coeffRef(k) = true; + kPrev = k; + } + } + } + else + { + for(Index i = 0; i < n; ++i) + { + Block + (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) + + = + + Block + (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); + } + } + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, rhs, lhs); + } +}; + +template +struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) + { + permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + + +/*************************************************************************** +* Products with transpositions matrices +***************************************************************************/ + +// FIXME could we unify Transpositions and Permutation into a single "shape"?? + +/** \internal + * \class transposition_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + */ +template +struct transposition_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) + { + MatrixType mat(xpr); + typedef typename TranspositionType::StorageIndex StorageIndex; + const Index size = tr.size(); + StorageIndex j = 0; + + if(!(is_same::value && extract_data(dst) == extract_data(mat))) + dst = mat; + + for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, rhs, lhs); + } +}; + + +template +struct generic_product_impl, Rhs, TranspositionsShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, TranspositionsShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + { + transposition_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H diff --git a/nuparu/include/Eigen/src/Core/Random.h b/nuparu/include/Eigen/src/Core/Random.h index 480fea40..02038e9e 100644 --- a/nuparu/include/Eigen/src/Core/Random.h +++ b/nuparu/include/Eigen/src/Core/Random.h @@ -28,12 +28,18 @@ struct functor_traits > /** \returns a random matrix expression * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * + * \not_reentrant + * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used * instead. + * * * Example: \include MatrixBase_random_int_int.cpp * Output: \verbinclude MatrixBase_random_int_int.out @@ -41,22 +47,28 @@ struct functor_traits > * This expression has the "evaluate before nesting" flag so that it will be evaluated into * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. + * + * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators. * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random() + * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index rows, Index cols) { return NullaryExpr(rows, cols, internal::scalar_random_op()); } /** \returns a random vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. * * The parameter \a size is the size of the returned vector. * Must be compatible with this MatrixBase type. * * \only_for_vectors + * \not_reentrant * * This variant is meant to be used for dynamic-size vector types. For fixed-size types, * it is redundant to pass \a size as argument, so Random() should be used @@ -69,10 +81,10 @@ DenseBase::Random(Index rows, Index cols) * a temporary vector whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random() + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random() */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index size) { return NullaryExpr(size, internal::scalar_random_op()); @@ -80,6 +92,9 @@ DenseBase::Random(Index size) /** \returns a fixed-size random matrix or vector expression * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you * need to use the variants taking size arguments. * @@ -89,11 +104,13 @@ DenseBase::Random(Index size) * This expression has the "evaluate before nesting" flag so that it will be evaluated into * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. + * + * \not_reentrant * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index) + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index) */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random() { return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op()); @@ -101,6 +118,11 @@ DenseBase::Random() /** Sets all coefficients in this expression to random values. * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * * Example: \include MatrixBase_setRandom.cpp * Output: \verbinclude MatrixBase_setRandom.out * @@ -114,12 +136,16 @@ inline Derived& DenseBase::setRandom() /** Resizes to the given \a newSize, and sets all coefficients in this expression to random values. * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * \only_for_vectors + * \not_reentrant * * Example: \include Matrix_setRandom_int.cpp * Output: \verbinclude Matrix_setRandom_int.out * - * \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random() + * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random() */ template EIGEN_STRONG_INLINE Derived& @@ -131,19 +157,24 @@ PlainObjectBase::setRandom(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to random values. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setRandom_int_int.cpp * Output: \verbinclude Matrix_setRandom_int_int.out * - * \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random() + * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random() */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setRandom(Index nbRows, Index nbCols) +PlainObjectBase::setRandom(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setRandom(); } diff --git a/nuparu/include/Eigen/src/Core/Redux.h b/nuparu/include/Eigen/src/Core/Redux.h index 50548fa9..d170cae2 100644 --- a/nuparu/include/Eigen/src/Core/Redux.h +++ b/nuparu/include/Eigen/src/Core/Redux.h @@ -50,21 +50,34 @@ struct redux_traits public: enum { - Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost == Dynamic - || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost == Dynamic) - ) ? Dynamic - : Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: enum { - Unrolling = Cost != Dynamic && Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(Derived::Flags) + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; + } +#endif }; /*************************************************************************** @@ -82,6 +95,7 @@ struct redux_novec_unroller typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { return func(redux_novec_unroller::run(mat,func), @@ -99,6 +113,7 @@ struct redux_novec_unroller typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) { return mat.coeffByOuterInner(outer, inner); @@ -112,6 +127,7 @@ template struct redux_novec_unroller { typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } }; @@ -143,7 +159,7 @@ struct redux_vec_unroller index = Start * packet_traits::size, outer = index / int(Derived::InnerSizeAtCompileTime), inner = index % int(Derived::InnerSizeAtCompileTime), - alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned + alignment = Derived::Alignment }; typedef typename Derived::Scalar Scalar; @@ -151,7 +167,7 @@ struct redux_vec_unroller static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) { - return mat.template packetByOuterInner(outer, inner); + return mat.template packetByOuterInner(outer, inner); } }; @@ -169,8 +185,8 @@ template struct redux_impl { typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); Scalar res; @@ -194,18 +210,18 @@ struct redux_impl { typedef typename Derived::Scalar Scalar; typedef typename packet_traits::type PacketScalar; - typedef typename Derived::Index Index; - static Scalar run(const Derived& mat, const Func& func) + static Scalar run(const Derived &mat, const Func& func) { const Index size = mat.size(); - eigen_assert(size && "you are using an empty matrix"); + const Index packetSize = packet_traits::size; - const Index alignedStart = internal::first_aligned(mat); + const int packetAlignment = unpacket_traits::alignment; enum { - alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit) - ? Aligned : Unaligned + alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), + alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) }; + const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); const Index alignedEnd2 = alignedStart + alignedSize2; @@ -213,19 +229,19 @@ struct redux_impl Scalar res; if(alignedSize) { - PacketScalar packet_res0 = mat.template packet(alignedStart); + PacketScalar packet_res0 = mat.template packet(alignedStart); if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop { - PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); + PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) { - packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); - packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); + packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); + packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); } packet_res0 = func.packetOp(packet_res0,packet_res1); if(alignedEnd>alignedEnd2) - packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); + packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); } res = func.predux(packet_res0); @@ -247,14 +263,14 @@ struct redux_impl } }; -template -struct redux_impl +// NOTE: for SliceVectorizedTraversal we simply bypass unrolling +template +struct redux_impl { typedef typename Derived::Scalar Scalar; - typedef typename packet_traits::type PacketScalar; - typedef typename Derived::Index Index; + typedef typename packet_traits::type PacketType; - static Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); const Index innerSize = mat.innerSize(); @@ -266,10 +282,10 @@ struct redux_impl Scalar res; if(packetedInnerSize) { - PacketScalar packet_res = mat.template packet(0,0); + PacketType packet_res = mat.template packet(0,0); for(Index j=0; j(j,i)); + packet_res = func.packetOp(packet_res, mat.template packetByOuterInner(j,i)); res = func.predux(packet_res); for(Index j=0; j Size = Derived::SizeAtCompileTime, VectorizedSize = (Size / PacketSize) * PacketSize }; - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); - Scalar res = func.predux(redux_vec_unroller::run(mat,func)); - if (VectorizedSize != Size) - res = func(res,redux_novec_unroller::run(mat,func)); - return res; + if (VectorizedSize > 0) { + Scalar res = func.predux(redux_vec_unroller::run(mat,func)); + if (VectorizedSize != Size) + res = func(res,redux_novec_unroller::run(mat,func)); + return res; + } + else { + return redux_novec_unroller::run(mat,func); + } } }; +// evaluator adaptor +template +class redux_evaluator +{ +public: + typedef _XprType XprType; + EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + enum { + MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, + // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator + Flags = evaluator::Flags & ~DirectAccessBit, + IsRowMajor = XprType::IsRowMajor, + SizeAtCompileTime = XprType::SizeAtCompileTime, + InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, + CoeffReadCost = evaluator::CoeffReadCost, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index row, Index col) const + { return m_evaluator.coeff(row, col); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index index) const + { return m_evaluator.coeff(index); } + + template + PacketReturnType packet(Index row, Index col) const + { return m_evaluator.template packet(row, col); } + + template + PacketReturnType packet(Index index) const + { return m_evaluator.template packet(index); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + template + PacketReturnType packetByOuterInner(Index outer, Index inner) const + { return m_evaluator.template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + const XprType & nestedExpression() const { return m_xpr; } + +protected: + internal::evaluator m_evaluator; + const XprType &m_xpr; +}; + } // end namespace internal /*************************************************************************** @@ -316,18 +399,21 @@ struct redux_impl /** \returns the result of a full redux operation on the whole matrix or vector using \a func * * The template parameter \a BinaryOp is the type of the functor \a func which must be - * an associative operator. Both current STL and TR1 functor styles are handled. + * an associative operator. Both current C++98 and C++11 functor styles are handled. * * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() */ template template -EIGEN_STRONG_INLINE typename internal::result_of::Scalar)>::type +typename internal::traits::Scalar DenseBase::redux(const Func& func) const { - typedef typename internal::remove_all::type ThisNested; - return internal::redux_impl - ::run(derived(), func); + eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); + + typedef typename internal::redux_evaluator ThisEvaluator; + ThisEvaluator thisEval(derived()); + + return internal::redux_impl::run(thisEval, func); } /** \returns the minimum of all coefficients of \c *this. @@ -337,7 +423,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { - return this->redux(Eigen::internal::scalar_min_op()); + return derived().redux(Eigen::internal::scalar_min_op()); } /** \returns the maximum of all coefficients of \c *this. @@ -347,7 +433,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { - return this->redux(Eigen::internal::scalar_max_op()); + return derived().redux(Eigen::internal::scalar_max_op()); } /** \returns the sum of all coefficients of *this @@ -360,7 +446,7 @@ DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return this->redux(Eigen::internal::scalar_sum_op()); + return derived().redux(Eigen::internal::scalar_sum_op()); } /** \returns the mean of all coefficients of *this @@ -371,7 +457,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::mean() const { - return Scalar(this->redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); } /** \returns the product of all coefficients of *this @@ -387,7 +473,7 @@ DenseBase::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(1); - return this->redux(Eigen::internal::scalar_product_op()); + return derived().redux(Eigen::internal::scalar_product_op()); } /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. diff --git a/nuparu/include/Eigen/src/Core/Ref.h b/nuparu/include/Eigen/src/Core/Ref.h index aba795bd..61de5ed1 100644 --- a/nuparu/include/Eigen/src/Core/Ref.h +++ b/nuparu/include/Eigen/src/Core/Ref.h @@ -12,24 +12,20 @@ namespace Eigen { -template class RefBase; -template,OuterStride<> >::type > class Ref; - /** \class Ref * \ingroup Core_Module * - * \brief A matrix or vector expression mapping an existing expressions + * \brief A matrix or vector expression mapping an existing expression * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), - * but accept a variable outer stride (leading dimension). + * but accepts a variable outer stride (leading dimension). * This can be overridden by specifying strides. * The type passed here must be a specialization of the Stride template, see examples below. * - * This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies. + * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies. * A Ref<> object can represent either a const expression or a l-value: * \code * // in-out argument: @@ -39,10 +35,10 @@ template& x); * \endcode * - * In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. + * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. * By default, a Ref can reference any dense vector expression of float having a contiguous memory layout. - * Likewise, a Ref can reference any column major dense matrix expression of float whose column's elements are contiguously stored with - * the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension), + * Likewise, a Ref can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with + * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) * can be greater than the number of rows. * * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. @@ -52,21 +48,22 @@ template > x); * foo3(A.row()); // OK * \endcode - * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more - * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more + * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a * template function, e.g.: * \code * // in the .h: @@ -94,24 +91,27 @@ struct traits > typedef _PlainObjectType PlainObjectType; typedef _StrideType StrideType; enum { - Options = _Options + Options = _Options, + Flags = traits >::Flags | NestByRefBit, + Alignment = traits >::Alignment }; template struct match { enum { HasDirectAccess = internal::has_direct_access::ret, - StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)), + StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)), InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic) || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime) || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), OuterStrideMatch = Derived::IsVectorAtCompileTime || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), - AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits::Flags&AlignedBit)==AlignedBit), - MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch + AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (int(evaluator::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment + ScalarTypeMatch = internal::is_same::value, + MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch }; typedef typename internal::conditional::type type; }; - + }; template @@ -130,12 +130,12 @@ template class RefBase typedef MapBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() : IsVectorAtCompileTime ? this->size() @@ -143,7 +143,7 @@ template class RefBase : this->rows(); } - RefBase() + EIGEN_DEVICE_FUNC RefBase() : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, @@ -157,7 +157,7 @@ template class RefBase typedef Stride StrideBase; template - void construct(Expression& expr) + EIGEN_DEVICE_FUNC void construct(Expression& expr) { if(PlainObjectType::RowsAtCompileTime==1) { @@ -171,8 +171,12 @@ template class RefBase } else ::new (static_cast(this)) Base(expr.data(), expr.rows(), expr.cols()); - ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(), - StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride()); + + if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit))) + ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1); + else + ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(), + StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride()); } StrideBase m_stride; @@ -182,7 +186,11 @@ template class RefBase template class Ref : public RefBase > { + private: typedef internal::traits Traits; + template + EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0); public: typedef RefBase Base; @@ -191,20 +199,23 @@ template class Ref #ifndef EIGEN_PARSED_BY_DOXYGEN template - inline Ref(PlainObjectBase& expr, - typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) { - Base::construct(expr); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); } template - inline Ref(const DenseBase& expr, - typename internal::enable_if::value&&bool(Traits::template match::MatchAtCompileTime)),Derived>::type* = 0, - int = Derived::ThisConstantIsPrivateInPlainObjectBase) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) #else template inline Ref(DenseBase& expr) #endif { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); Base::construct(expr.const_cast_derived()); } @@ -223,7 +234,8 @@ template class Ref< EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template - inline Ref(const DenseBase& expr) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::ScalarTypeMatch),Derived>::type* = 0) { // std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; @@ -231,18 +243,27 @@ template class Ref< construct(expr.derived(), typename Traits::template match::type()); } + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template + EIGEN_DEVICE_FUNC inline Ref(const RefBase& other) { + construct(other.derived(), typename Traits::template match::type()); + } + protected: template - void construct(const Expression& expr,internal::true_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type) { Base::construct(expr); } template - void construct(const Expression& expr, internal::false_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - m_object.lazyAssign(expr); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); Base::construct(m_object); } diff --git a/nuparu/include/Eigen/src/Core/Replicate.h b/nuparu/include/Eigen/src/Core/Replicate.h index dde86a83..bec59831 100644 --- a/nuparu/include/Eigen/src/Core/Replicate.h +++ b/nuparu/include/Eigen/src/Core/Replicate.h @@ -35,10 +35,7 @@ struct traits > typedef typename MatrixType::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - enum { - Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor - }; - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; enum { RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic @@ -53,8 +50,9 @@ struct traits > IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0), - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 }; }; } @@ -68,10 +66,12 @@ template class Replicate typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + typedef typename internal::remove_all::type NestedExpression; template - inline explicit Replicate(const OriginalMatrixType& a_matrix) - : m_matrix(a_matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) + EIGEN_DEVICE_FUNC + inline explicit Replicate(const OriginalMatrixType& matrix) + : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) { EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) @@ -79,41 +79,20 @@ template class Replicate } template - inline Replicate(const OriginalMatrixType& a_matrix, Index rowFactor, Index colFactor) - : m_matrix(a_matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) + EIGEN_DEVICE_FUNC + inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) + : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) { EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); } - inline Scalar coeff(Index rowId, Index colId) const - { - // try to avoid using modulo; this is a pure optimization strategy - const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 - : RowFactor==1 ? rowId - : rowId%m_matrix.rows(); - const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 - : ColFactor==1 ? colId - : colId%m_matrix.cols(); - - return m_matrix.coeff(actual_row, actual_col); - } - template - inline PacketScalar packet(Index rowId, Index colId) const - { - const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 - : RowFactor==1 ? rowId - : rowId%m_matrix.rows(); - const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 - : ColFactor==1 ? colId - : colId%m_matrix.cols(); - - return m_matrix.template packet(actual_row, actual_col); - } - + EIGEN_DEVICE_FUNC const _MatrixTypeNested& nestedExpression() const { return m_matrix; @@ -135,27 +114,12 @@ template class Replicate */ template template -inline const Replicate +const Replicate DenseBase::replicate() const { return Replicate(derived()); } -/** - * \return an expression of the replication of \c *this - * - * Example: \include MatrixBase_replicate_int_int.cpp - * Output: \verbinclude MatrixBase_replicate_int_int.out - * - * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate - */ -template -inline const Replicate -DenseBase::replicate(Index rowFactor,Index colFactor) const -{ - return Replicate(derived(),rowFactor,colFactor); -} - /** * \return an expression of the replication of each column (or row) of \c *this * diff --git a/nuparu/include/Eigen/src/Core/ReturnByValue.h b/nuparu/include/Eigen/src/Core/ReturnByValue.h index d66c24ba..7feb6e01 100644 --- a/nuparu/include/Eigen/src/Core/ReturnByValue.h +++ b/nuparu/include/Eigen/src/Core/ReturnByValue.h @@ -38,9 +38,10 @@ struct traits > * So internal::nested always gives the plain return matrix type. * * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators */ template -struct nested, n, PlainObject> +struct nested_eval, n, PlainObject> { typedef typename traits::ReturnType type; }; @@ -48,7 +49,7 @@ struct nested, n, PlainObject> } // end namespace internal template class ReturnByValue - : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue >::type + : public internal::dense_xpr_base< ReturnByValue >::type, internal::no_assignment_operator { public: typedef typename internal::traits::ReturnType ReturnType; @@ -57,10 +58,11 @@ template class ReturnByValue EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { static_cast(this)->evalTo(dst); } - inline Index rows() const { return static_cast(this)->rows(); } - inline Index cols() const { return static_cast(this)->cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast(this)->rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast(this)->cols(); } #ifndef EIGEN_PARSED_BY_DOXYGEN #define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT @@ -72,6 +74,7 @@ template class ReturnByValue const Unusable& coeff(Index,Index) const { return *reinterpret_cast(this); } Unusable& coeffRef(Index) { return *reinterpret_cast(this); } Unusable& coeffRef(Index,Index) { return *reinterpret_cast(this); } +#undef Unusable #endif }; @@ -83,6 +86,33 @@ Derived& DenseBase::operator=(const ReturnByValue& other) return derived(); } +namespace internal { + +// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that +// when a ReturnByValue expression is assigned, the evaluator is not constructed. +// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world + +template +struct evaluator > + : public evaluator::ReturnType> +{ + typedef ReturnByValue XprType; + typedef typename internal::traits::ReturnType PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + xpr.evalTo(m_result); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_RETURNBYVALUE_H diff --git a/nuparu/include/Eigen/src/Core/Reverse.h b/nuparu/include/Eigen/src/Core/Reverse.h index e30ae3d2..d7c380c7 100644 --- a/nuparu/include/Eigen/src/Core/Reverse.h +++ b/nuparu/include/Eigen/src/Core/Reverse.h @@ -37,32 +37,25 @@ struct traits > typedef typename MatrixType::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - // let's enable LinearAccess only with vectorization because of the product overhead - LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) - ? LinearAccessBit : 0, - - Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess), - - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) }; }; -template struct reverse_packet_cond +template struct reverse_packet_cond { - static inline PacketScalar run(const PacketScalar& x) { return preverse(x); } + static inline PacketType run(const PacketType& x) { return preverse(x); } }; -template struct reverse_packet_cond +template struct reverse_packet_cond { - static inline PacketScalar run(const PacketScalar& x) { return x; } + static inline PacketType run(const PacketType& x) { return x; } }; } // end namespace internal @@ -74,12 +67,9 @@ template class Reverse typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + typedef typename internal::remove_all::type NestedExpression; using Base::IsRowMajor; - // next line is necessary because otherwise const version of operator() - // is hidden by non-const version defined in this file - using Base::operator(); - protected: enum { PacketSize = internal::packet_traits::size, @@ -95,82 +85,19 @@ template class Reverse typedef internal::reverse_packet_cond reverse_packet; public: - inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } - inline Scalar& operator()(Index row, Index col) - { - eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return coeffRef(row, col); - } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, - ReverseCol ? m_matrix.cols() - col - 1 : col); - } - - inline CoeffReturnType coeff(Index row, Index col) const - { - return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, - ReverseCol ? m_matrix.cols() - col - 1 : col); - } - - inline CoeffReturnType coeff(Index index) const - { - return m_matrix.coeff(m_matrix.size() - index - 1); - } - - inline Scalar& coeffRef(Index index) - { - return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); - } - - inline Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < m_matrix.size()); - return coeffRef(index); - } - - template - inline const PacketScalar packet(Index row, Index col) const - { - return reverse_packet::run(m_matrix.template packet( - ReverseRow ? m_matrix.rows() - row - OffsetRow : row, - ReverseCol ? m_matrix.cols() - col - OffsetCol : col)); - } - - template - inline void writePacket(Index row, Index col, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket( - ReverseRow ? m_matrix.rows() - row - OffsetRow : row, - ReverseCol ? m_matrix.cols() - col - OffsetCol : col, - reverse_packet::run(x)); - } - - template - inline const PacketScalar packet(Index index) const - { - return internal::preverse(m_matrix.template packet( m_matrix.size() - index - PacketSize )); - } - - template - inline void writePacket(Index index, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(m_matrix.size() - index - PacketSize, internal::preverse(x)); - } - - const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { return m_matrix; @@ -190,33 +117,93 @@ template inline typename DenseBase::ReverseReturnType DenseBase::reverse() { - return derived(); + return ReverseReturnType(derived()); } -/** This is the const version of reverse(). */ -template -inline const typename DenseBase::ConstReverseReturnType -DenseBase::reverse() const -{ - return derived(); -} + +//reverse const overload moved DenseBase.h due to a CUDA compiler bug /** This is the "in place" version of reverse: it reverses \c *this. * * In most cases it is probably better to simply use the reversed expression * of a matrix. However, when reversing the matrix data itself is really needed, * then this "in-place" version is probably the right choice because it provides - * the following additional features: + * the following additional benefits: * - less error prone: doing the same operation with .reverse() requires special care: * \code m = m.reverse().eval(); \endcode - * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap) + * - this API enables reverse operations without the need for a temporary * - it allows future optimizations (cache friendliness, etc.) * - * \sa reverse() */ + * \sa VectorwiseOp::reverseInPlace(), reverse() */ template inline void DenseBase::reverseInPlace() { - derived() = derived().reverse().eval(); + if(cols()>rows()) + { + Index half = cols()/2; + leftCols(half).swap(rightCols(half).reverse()); + if((cols()%2)==1) + { + Index half2 = rows()/2; + col(half).head(half2).swap(col(half).tail(half2).reverse()); + } + } + else + { + Index half = rows()/2; + topRows(half).swap(bottomRows(half).reverse()); + if((rows()%2)==1) + { + Index half2 = cols()/2; + row(half).head(half2).swap(row(half).tail(half2).reverse()); + } + } +} + +namespace internal { + +template +struct vectorwise_reverse_inplace_impl; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.rows()/2; + xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse()); + } +}; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.cols()/2; + xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse()); + } +}; + +} // end namespace internal + +/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * + * \sa DenseBase::reverseInPlace(), reverse() */ +template +void VectorwiseOp::reverseInPlace() +{ + internal::vectorwise_reverse_inplace_impl::run(_expression().const_cast_derived()); } } // end namespace Eigen diff --git a/nuparu/include/Eigen/src/Core/Select.h b/nuparu/include/Eigen/src/Core/Select.h index 87993bbb..79eec1b5 100644 --- a/nuparu/include/Eigen/src/Core/Select.h +++ b/nuparu/include/Eigen/src/Core/Select.h @@ -43,23 +43,21 @@ struct traits > ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, - CoeffReadCost = traits::type>::CoeffReadCost - + EIGEN_SIZE_MAX(traits::type>::CoeffReadCost, - traits::type>::CoeffReadCost) + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit }; }; } template -class Select : internal::no_assignment_operator, - public internal::dense_xpr_base< Select >::type +class Select : public internal::dense_xpr_base< Select >::type, + internal::no_assignment_operator { public: typedef typename internal::dense_xpr_base