diff --git a/src/support/CMakeLists.txt b/src/support/CMakeLists.txt index b106d179..b17af0b3 100644 --- a/src/support/CMakeLists.txt +++ b/src/support/CMakeLists.txt @@ -23,6 +23,7 @@ set(support_HEADERS # Add source files set(support_SOURCES + svd.cc PARENT_SCOPE ) diff --git a/src/support/lsfits.h b/src/support/lsfits.h index 63f7ba7b..699e1f3a 100644 --- a/src/support/lsfits.h +++ b/src/support/lsfits.h @@ -159,8 +159,8 @@ template } size_t ma = D*(D+3)/2; - std::vector > u(nvals-1, vector(ma)); - std::vector > v(ma, vector(ma)); + std::vector > u(nvals-1, std::vector(ma)); + std::vector > v(ma, std::vector(ma)); std::vector w(ma); for (int i=0; i < nvals-1; i++) { for (int j=0; j < D*(D+3)/2; j++) { @@ -194,7 +194,7 @@ template } // solve the problem for coefficients, in "result". - vector result(ma); + std::vector result(ma); svd_solve(u,w,v,F,result); return result; diff --git a/src/support/svd.cc b/src/support/svd.cc new file mode 100644 index 00000000..c81558be --- /dev/null +++ b/src/support/svd.cc @@ -0,0 +1,355 @@ +/* +This file is part of the Ristra portage project. +Please see the license file at the root of this repository, or at: + https://github.com/laristra/portage/blob/master/LICENSE +*/ + +/* + * svd.cc - Double precision SVD decomposition routine. + * Takes an mxn matrix a and decomposes it into udv, where u,v are + * left and right orthogonal transformation matrices, and d is a + * diagonal matrix of singular values. + * + * This routine was adapted from Numerical Recipes SVD routines in C + * to be called with C++ std::vectors by Michael Rogers. + * + * Input to svd is as follows: + * a = mxn matrix to be decomposed, gets overwritten with u + * w = returns the vector of singular values of a + * v = returns the right orthogonal transformation matrix +*/ + +#include "portage/support/svd.h" + +#include +#include +#include +#include +#include +#define MIN(x,y) ( (x) < (y) ? (x) : (y) ) +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define SIGN(a, b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) + +namespace Portage { + + +static double PYTHAG(double a, double b) +{ + double at = fabs(a), bt = fabs(b), ct, result; + + if (at > bt) { ct = bt / at; result = at * sqrt(1.0 + ct * ct); } + else if (bt > 0.0) { ct = at / bt; result = bt * sqrt(1.0 + ct * ct); } + else result = 0.0; + return(result); +} + +// Perform singular value decomposition + +int svd(std::vector > & a, std::vector & w, std::vector > & v) +{ + int flag, i, its, j, jj, k, l, nm; + int m, n; // dimensions of a (and u) + double c, f, h, s, x, y, z; + double anorm = 0.0, g = 0.0, scale = 0.0; + double *rv1; + + m = a.size(); // number of rows + n = a[0].size(); // number of columns + + if (m < n) + { + fprintf(stderr, "#rows must be >= #cols \n"); + return(1); + } + + rv1 = (double *)malloc((unsigned int) n*sizeof(double)); + +/* Householder reduction to bidiagonal form */ + for (i = 0; i < n; i++) + { + /* left-hand reduction */ + l = i + 1; + rv1[i] = scale * g; + g = s = scale = 0.0; + if (i < m) + { + for (k = i; k < m; k++) + scale += fabs((double)a[k][i]); + if (scale) + { + for (k = i; k < m; k++) + { + a[k][i] = (double)((double)a[k][i]/scale); + s += ((double)a[k][i] * (double)a[k][i]); + } + f = (double)a[i][i]; + g = -SIGN(sqrt(s), f); + h = f * g - s; + a[i][i] = (double)(f - g); + if (i != n - 1) + { + for (j = l; j < n; j++) + { + for (s = 0.0, k = i; k < m; k++) + s += ((double)a[k][i] * (double)a[k][j]); + f = s / h; + for (k = i; k < m; k++) + a[k][j] += (double)(f * (double)a[k][i]); + } + } + for (k = i; k < m; k++) + a[k][i] = (double)((double)a[k][i]*scale); + } + } + w[i] = (double)(scale * g); + + /* right-hand reduction */ + g = s = scale = 0.0; + if (i < m && i != n - 1) + { + for (k = l; k < n; k++) + scale += fabs((double)a[i][k]); + if (scale) + { + for (k = l; k < n; k++) + { + a[i][k] = (double)((double)a[i][k]/scale); + s += ((double)a[i][k] * (double)a[i][k]); + } + f = (double)a[i][l]; + g = -SIGN(sqrt(s), f); + h = f * g - s; + a[i][l] = (double)(f - g); + for (k = l; k < n; k++) + rv1[k] = (double)a[i][k] / h; + if (i != m - 1) + { + for (j = l; j < m; j++) + { + for (s = 0.0, k = l; k < n; k++) + s += ((double)a[j][k] * (double)a[i][k]); + for (k = l; k < n; k++) + a[j][k] += (double)(s * rv1[k]); + } + } + for (k = l; k < n; k++) + a[i][k] = (double)((double)a[i][k]*scale); + } + } + anorm = MAX(anorm, (fabs((double)w[i]) + fabs(rv1[i]))); + } + + /* accumulate the right-hand transformation */ + for (i = n - 1; i >= 0; i--) + { + if (i < n - 1) + { + if (g) + { + for (j = l; j < n; j++) + v[j][i] = (double)(((double)a[i][j] / (double)a[i][l]) / g); + /* double division to avoid underflow */ + for (j = l; j < n; j++) + { + for (s = 0.0, k = l; k < n; k++) + s += ((double)a[i][k] * (double)v[k][j]); + for (k = l; k < n; k++) + v[k][j] += (double)(s * (double)v[k][i]); + } + } + for (j = l; j < n; j++) + v[i][j] = v[j][i] = 0.0; + } + v[i][i] = 1.0; + g = rv1[i]; + l = i; + } + /* accumulate the left-hand transformation */ + for (i = n - 1; i >= 0; i--) + { + l = i + 1; + g = (double)w[i]; + if (i < n - 1) + for (j = l; j < n; j++) + a[i][j] = 0.0; + if (g) + { + g = 1.0 / g; + if (i != n - 1) + { + for (j = l; j < n; j++) + { + for (s = 0.0, k = l; k < m; k++) + s += ((double)a[k][i] * (double)a[k][j]); + f = (s / (double)a[i][i]) * g; + for (k = i; k < m; k++) + a[k][j] += (double)(f * (double)a[k][i]); + } + } + for (j = i; j < m; j++) + a[j][i] = (double)((double)a[j][i]*g); + } + else + { + for (j = i; j < m; j++) + a[j][i] = 0.0; + } + ++a[i][i]; + } + + /* diagonalize the bidiagonal form */ + for (k = n - 1; k >= 0; k--) + { /* loop over singular values */ + for (its = 0; its < 30; its++) + { /* loop over allowed iterations */ + flag = 1; + for (l = k; l >= 0; l--) + { /* test for splitting */ + nm = l - 1; + if (fabs(rv1[l]) + anorm == anorm) + { + flag = 0; + break; + } + if (fabs((double)w[nm]) + anorm == anorm) + break; + } + if (flag) + { + c = 0.0; + s = 1.0; + for (i = l; i <= k; i++) + { + f = s * rv1[i]; + if (fabs(f) + anorm != anorm) + { + g = (double)w[i]; + h = PYTHAG(f, g); + w[i] = (double)h; + h = 1.0 / h; + c = g * h; + s = (- f * h); + for (j = 0; j < m; j++) + { + y = (double)a[j][nm]; + z = (double)a[j][i]; + a[j][nm] = (double)(y * c + z * s); + a[j][i] = (double)(z * c - y * s); + } + } + } + } + z = (double)w[k]; + if (l == k) + { /* convergence */ + if (z < 0.0) + { /* make singular value nonnegative */ + w[k] = (double)(-z); + for (j = 0; j < n; j++) + v[j][k] = (-v[j][k]); + } + break; + } + if (its >= 30) { + free((void*) rv1); + fprintf(stderr, "No convergence after 30,000! iterations \n"); + return(1); + } + + /* shift from bottom 2 x 2 minor */ + x = (double)w[l]; + nm = k - 1; + y = (double)w[nm]; + g = rv1[nm]; + h = rv1[k]; + f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); + g = PYTHAG(f, 1.0); + f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x; + + /* next QR transformation */ + c = s = 1.0; + for (j = l; j <= nm; j++) + { + i = j + 1; + g = rv1[i]; + y = (double)w[i]; + h = s * g; + g = c * g; + z = PYTHAG(f, h); + rv1[j] = z; + c = f / z; + s = h / z; + f = x * c + g * s; + g = g * c - x * s; + h = y * s; + y = y * c; + for (jj = 0; jj < n; jj++) + { + x = (double)v[jj][j]; + z = (double)v[jj][i]; + v[jj][j] = (double)(x * c + z * s); + v[jj][i] = (double)(z * c - x * s); + } + z = PYTHAG(f, h); + w[j] = (double)z; + if (z) + { + z = 1.0 / z; + c = f * z; + s = h * z; + } + f = (c * g) + (s * y); + x = (c * y) - (s * g); + for (jj = 0; jj < m; jj++) + { + y = (double)a[jj][j]; + z = (double)a[jj][i]; + a[jj][j] = (double)(y * c + z * s); + a[jj][i] = (double)(z * c - y * s); + } + } + rv1[l] = 0.0; + rv1[k] = f; + w[k] = (double)x; + } + } + free((void*) rv1); + return(0); +} + +// Singular value backsubstitution. + +void svd_solve(const std::vector > & u, const std::vector & w, \ + const std::vector > & v, \ + std::vector & b, std::vector & x) + // Solves A*x=b for vector x. A is specified by the arrays + // u[1...m][1...n], w[1...n],v[1...n[1...n], as returned by + // svd. m and n are the dimensions of a. b[1...m] is the rhs. + // x[1...n] is the solution vector (output). Nothing is destroyed + // so it can be called sequentially with different b vectors. +{ + int jj, j, i; + int m, n; // dimensions of the matrix u + double s; + + m = u.size(); // number of rows + n = u[0].size(); // number of columns + + std::vector tmp(n,0.0); + for (j=0;j<=n-1;j++) { + s=0.0; + if (w[j]) { + for (i=0;i<=m-1;i++) s += u[i][j]*b[i]; + s /= w[j]; + } + tmp[j] = s; + } + for (j=0;j<=n-1;j++) { + s=0.0; + for (jj=0;jj<=n-1;jj++) s += v[j][jj]*tmp[jj]; + x[j]=s; + } +} + +} // namespace Portage + diff --git a/src/support/svd.h b/src/support/svd.h index ccb01286..70877ec1 100644 --- a/src/support/svd.h +++ b/src/support/svd.h @@ -19,331 +19,28 @@ Please see the license file at the root of this repository, or at: * v = returns the right orthogonal transformation matrix */ -#include -#include -#include -#include -#include -#define MIN(x,y) ( (x) < (y) ? (x) : (y) ) -#define MAX(x,y) ((x)>(y)?(x):(y)) -#define SIGN(a, b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) - -using namespace std; +#ifndef SRC_SVD_H_ +#define SRC_SVD_H_ -static double PYTHAG(double a, double b) -{ - double at = fabs(a), bt = fabs(b), ct, result; +#include - if (at > bt) { ct = bt / at; result = at * sqrt(1.0 + ct * ct); } - else if (bt > 0.0) { ct = at / bt; result = bt * sqrt(1.0 + ct * ct); } - else result = 0.0; - return(result); -} +namespace Portage { // Perform singular value decomposition -int svd(vector > & a, vector & w, vector > & v) -{ - int flag, i, its, j, jj, k, l, nm; - int m, n; // dimensions of a (and u) - double c, f, h, s, x, y, z; - double anorm = 0.0, g = 0.0, scale = 0.0; - double *rv1; - - m = a.size(); // number of rows - n = a[0].size(); // number of columns - - if (m < n) - { - fprintf(stderr, "#rows must be >= #cols \n"); - return(1); - } - - rv1 = (double *)malloc((unsigned int) n*sizeof(double)); - -/* Householder reduction to bidiagonal form */ - for (i = 0; i < n; i++) - { - /* left-hand reduction */ - l = i + 1; - rv1[i] = scale * g; - g = s = scale = 0.0; - if (i < m) - { - for (k = i; k < m; k++) - scale += fabs((double)a[k][i]); - if (scale) - { - for (k = i; k < m; k++) - { - a[k][i] = (double)((double)a[k][i]/scale); - s += ((double)a[k][i] * (double)a[k][i]); - } - f = (double)a[i][i]; - g = -SIGN(sqrt(s), f); - h = f * g - s; - a[i][i] = (double)(f - g); - if (i != n - 1) - { - for (j = l; j < n; j++) - { - for (s = 0.0, k = i; k < m; k++) - s += ((double)a[k][i] * (double)a[k][j]); - f = s / h; - for (k = i; k < m; k++) - a[k][j] += (double)(f * (double)a[k][i]); - } - } - for (k = i; k < m; k++) - a[k][i] = (double)((double)a[k][i]*scale); - } - } - w[i] = (double)(scale * g); - - /* right-hand reduction */ - g = s = scale = 0.0; - if (i < m && i != n - 1) - { - for (k = l; k < n; k++) - scale += fabs((double)a[i][k]); - if (scale) - { - for (k = l; k < n; k++) - { - a[i][k] = (double)((double)a[i][k]/scale); - s += ((double)a[i][k] * (double)a[i][k]); - } - f = (double)a[i][l]; - g = -SIGN(sqrt(s), f); - h = f * g - s; - a[i][l] = (double)(f - g); - for (k = l; k < n; k++) - rv1[k] = (double)a[i][k] / h; - if (i != m - 1) - { - for (j = l; j < m; j++) - { - for (s = 0.0, k = l; k < n; k++) - s += ((double)a[j][k] * (double)a[i][k]); - for (k = l; k < n; k++) - a[j][k] += (double)(s * rv1[k]); - } - } - for (k = l; k < n; k++) - a[i][k] = (double)((double)a[i][k]*scale); - } - } - anorm = MAX(anorm, (fabs((double)w[i]) + fabs(rv1[i]))); - } - - /* accumulate the right-hand transformation */ - for (i = n - 1; i >= 0; i--) - { - if (i < n - 1) - { - if (g) - { - for (j = l; j < n; j++) - v[j][i] = (double)(((double)a[i][j] / (double)a[i][l]) / g); - /* double division to avoid underflow */ - for (j = l; j < n; j++) - { - for (s = 0.0, k = l; k < n; k++) - s += ((double)a[i][k] * (double)v[k][j]); - for (k = l; k < n; k++) - v[k][j] += (double)(s * (double)v[k][i]); - } - } - for (j = l; j < n; j++) - v[i][j] = v[j][i] = 0.0; - } - v[i][i] = 1.0; - g = rv1[i]; - l = i; - } - /* accumulate the left-hand transformation */ - for (i = n - 1; i >= 0; i--) - { - l = i + 1; - g = (double)w[i]; - if (i < n - 1) - for (j = l; j < n; j++) - a[i][j] = 0.0; - if (g) - { - g = 1.0 / g; - if (i != n - 1) - { - for (j = l; j < n; j++) - { - for (s = 0.0, k = l; k < m; k++) - s += ((double)a[k][i] * (double)a[k][j]); - f = (s / (double)a[i][i]) * g; - for (k = i; k < m; k++) - a[k][j] += (double)(f * (double)a[k][i]); - } - } - for (j = i; j < m; j++) - a[j][i] = (double)((double)a[j][i]*g); - } - else - { - for (j = i; j < m; j++) - a[j][i] = 0.0; - } - ++a[i][i]; - } - - /* diagonalize the bidiagonal form */ - for (k = n - 1; k >= 0; k--) - { /* loop over singular values */ - for (its = 0; its < 30; its++) - { /* loop over allowed iterations */ - flag = 1; - for (l = k; l >= 0; l--) - { /* test for splitting */ - nm = l - 1; - if (fabs(rv1[l]) + anorm == anorm) - { - flag = 0; - break; - } - if (fabs((double)w[nm]) + anorm == anorm) - break; - } - if (flag) - { - c = 0.0; - s = 1.0; - for (i = l; i <= k; i++) - { - f = s * rv1[i]; - if (fabs(f) + anorm != anorm) - { - g = (double)w[i]; - h = PYTHAG(f, g); - w[i] = (double)h; - h = 1.0 / h; - c = g * h; - s = (- f * h); - for (j = 0; j < m; j++) - { - y = (double)a[j][nm]; - z = (double)a[j][i]; - a[j][nm] = (double)(y * c + z * s); - a[j][i] = (double)(z * c - y * s); - } - } - } - } - z = (double)w[k]; - if (l == k) - { /* convergence */ - if (z < 0.0) - { /* make singular value nonnegative */ - w[k] = (double)(-z); - for (j = 0; j < n; j++) - v[j][k] = (-v[j][k]); - } - break; - } - if (its >= 30) { - free((void*) rv1); - fprintf(stderr, "No convergence after 30,000! iterations \n"); - return(1); - } - - /* shift from bottom 2 x 2 minor */ - x = (double)w[l]; - nm = k - 1; - y = (double)w[nm]; - g = rv1[nm]; - h = rv1[k]; - f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); - g = PYTHAG(f, 1.0); - f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x; - - /* next QR transformation */ - c = s = 1.0; - for (j = l; j <= nm; j++) - { - i = j + 1; - g = rv1[i]; - y = (double)w[i]; - h = s * g; - g = c * g; - z = PYTHAG(f, h); - rv1[j] = z; - c = f / z; - s = h / z; - f = x * c + g * s; - g = g * c - x * s; - h = y * s; - y = y * c; - for (jj = 0; jj < n; jj++) - { - x = (double)v[jj][j]; - z = (double)v[jj][i]; - v[jj][j] = (double)(x * c + z * s); - v[jj][i] = (double)(z * c - x * s); - } - z = PYTHAG(f, h); - w[j] = (double)z; - if (z) - { - z = 1.0 / z; - c = f * z; - s = h * z; - } - f = (c * g) + (s * y); - x = (c * y) - (s * g); - for (jj = 0; jj < m; jj++) - { - y = (double)a[jj][j]; - z = (double)a[jj][i]; - a[jj][j] = (double)(y * c + z * s); - a[jj][i] = (double)(z * c - y * s); - } - } - rv1[l] = 0.0; - rv1[k] = f; - w[k] = (double)x; - } - } - free((void*) rv1); - return(0); -} +int svd(std::vector > & a, std::vector & w, std::vector > & v); // Singular value backsubstitution. -void svd_solve(const vector > & u, const vector & w, \ - const vector > & v, \ - vector & b, vector & x) +void svd_solve(const std::vector > & u, const std::vector & w, \ + const std::vector > & v, \ + std::vector & b, std::vector & x); // Solves A*x=b for vector x. A is specified by the arrays // u[1...m][1...n], w[1...n],v[1...n[1...n], as returned by // svd. m and n are the dimensions of a. b[1...m] is the rhs. // x[1...n] is the solution vector (output). Nothing is destroyed // so it can be called sequentially with different b vectors. -{ - int jj, j, i; - int m, n; // dimensions of the matrix u - double s; - m = u.size(); // number of rows - n = u[0].size(); // number of columns +} // namespace Portage - vector tmp(n,0.0); - for (j=0;j<=n-1;j++) { - s=0.0; - if (w[j]) { - for (i=0;i<=m-1;i++) s += u[i][j]*b[i]; - s /= w[j]; - } - tmp[j] = s; - } - for (j=0;j<=n-1;j++) { - s=0.0; - for (jj=0;jj<=n-1;jj++) s += v[j][jj]*tmp[jj]; - x[j]=s; - } -} +#endif diff --git a/src/wonton/CMakeLists.txt b/src/wonton/CMakeLists.txt index 70871806..0cf36f2e 100644 --- a/src/wonton/CMakeLists.txt +++ b/src/wonton/CMakeLists.txt @@ -25,7 +25,7 @@ if (ENABLE_FleCSI) ) endif () -set(wrappers_HEADERS +set(wonton_HEADERS ${jali_wrapper_headers} ${flecsi_wrapper_headers} mesh/AuxMeshTopology.h