Skip to content

Commit

Permalink
[BugFix] Fix high-AM CPU sn-K (#98)
Browse files Browse the repository at this point in the history
* Fix (d|f) and (f|f) sn-K contractions on the CPU

* MIN -> min in f sn-K CPU kernels

* Fix (p|g) ... (g|g) sn-K CPU kernels

* Missing MIN -> std::min

* Added BasisSet::max_l

* Added H2O2 def2-{T,Q}ZVP tests to check sn-K with high AM, disable L > 2 for device tests for now

* Add L=4 to CUDA collocation kernels
  • Loading branch information
wavefunction91 authored Mar 14, 2024
1 parent b0f2d7e commit 843fe01
Show file tree
Hide file tree
Showing 27 changed files with 131,380 additions and 4,335 deletions.
5 changes: 5 additions & 0 deletions include/gauxc/basisset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ struct BasisSet : public std::vector<Shell<F>> {
return _nbf;
}

inline int32_t max_l() const {
return std::max_element(this->cbegin(), this->cend(),
[](const auto& a, const auto& b) { return a.l() < b.l(); })->l();
}

}; // class BasisSet

} // namespace GauXC
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,99 @@ GPGAUEVAL_INLINE __device__ void collocation_cartesian_angular_3_deriv1(

}

template <typename T>
GPGAUEVAL_INLINE __device__ void collocation_cartesian_angular_4(
int32_t npts,
const T bf,
const T x,
const T y,
const T z,
T* __restrict__ eval
) {

eval[npts * 0] = bf*x*x*x*x;
eval[npts * 1] = bf*x*x*x*y;
eval[npts * 2] = bf*x*x*x*z;
eval[npts * 3] = bf*x*x*y*y;
eval[npts * 4] = bf*x*x*y*z;
eval[npts * 5] = bf*x*x*z*z;
eval[npts * 6] = bf*x*y*y*y;
eval[npts * 7] = bf*x*y*y*z;
eval[npts * 8] = bf*x*y*z*z;
eval[npts * 9] = bf*x*z*z*z;
eval[npts * 10] = bf*y*y*y*y;
eval[npts * 11] = bf*y*y*y*z;
eval[npts * 12] = bf*y*y*z*z;
eval[npts * 13] = bf*y*z*z*z;
eval[npts * 14] = bf*z*z*z*z;

}

template <typename T>
GPGAUEVAL_INLINE __device__ void collocation_cartesian_angular_4_deriv1(
const int32_t npts,
const T bf,
const T bf_x,
const T bf_y,
const T bf_z,
const T x,
const T y,
const T z,
T* __restrict__ eval_x,
T* __restrict__ eval_y,
T* __restrict__ eval_z
) {

eval_x[npts * 0] = x*x*x*(4*bf + bf_x*x);
eval_x[npts * 1] = x*x*y*(3*bf + bf_x*x);
eval_x[npts * 2] = x*x*z*(3*bf + bf_x*x);
eval_x[npts * 3] = x*y*y*(2*bf + bf_x*x);
eval_x[npts * 4] = x*y*z*(2*bf + bf_x*x);
eval_x[npts * 5] = x*z*z*(2*bf + bf_x*x);
eval_x[npts * 6] = y*y*y*(bf + bf_x*x);
eval_x[npts * 7] = y*y*z*(bf + bf_x*x);
eval_x[npts * 8] = y*z*z*(bf + bf_x*x);
eval_x[npts * 9] = z*z*z*(bf + bf_x*x);
eval_x[npts * 10] = bf_x*y*y*y*y;
eval_x[npts * 11] = bf_x*y*y*y*z;
eval_x[npts * 12] = bf_x*y*y*z*z;
eval_x[npts * 13] = bf_x*y*z*z*z;
eval_x[npts * 14] = bf_x*z*z*z*z;

eval_y[npts * 0] = bf_y*x*x*x*x;
eval_y[npts * 1] = x*x*x*(bf + bf_y*y);
eval_y[npts * 2] = bf_y*x*x*x*z;
eval_y[npts * 3] = x*x*y*(2*bf + bf_y*y);
eval_y[npts * 4] = x*x*z*(bf + bf_y*y);
eval_y[npts * 5] = bf_y*x*x*z*z;
eval_y[npts * 6] = x*y*y*(3*bf + bf_y*y);
eval_y[npts * 7] = x*y*z*(2*bf + bf_y*y);
eval_y[npts * 8] = x*z*z*(bf + bf_y*y);
eval_y[npts * 9] = bf_y*x*z*z*z;
eval_y[npts * 10] = y*y*y*(4*bf + bf_y*y);
eval_y[npts * 11] = y*y*z*(3*bf + bf_y*y);
eval_y[npts * 12] = y*z*z*(2*bf + bf_y*y);
eval_y[npts * 13] = z*z*z*(bf + bf_y*y);
eval_y[npts * 14] = bf_y*z*z*z*z;

eval_z[npts * 0] = bf_z*x*x*x*x;
eval_z[npts * 1] = bf_z*x*x*x*y;
eval_z[npts * 2] = x*x*x*(bf + bf_z*z);
eval_z[npts * 3] = bf_z*x*x*y*y;
eval_z[npts * 4] = x*x*y*(bf + bf_z*z);
eval_z[npts * 5] = x*x*z*(2*bf + bf_z*z);
eval_z[npts * 6] = bf_z*x*y*y*y;
eval_z[npts * 7] = x*y*y*(bf + bf_z*z);
eval_z[npts * 8] = x*y*z*(2*bf + bf_z*z);
eval_z[npts * 9] = x*z*z*(3*bf + bf_z*z);
eval_z[npts * 10] = bf_z*y*y*y*y;
eval_z[npts * 11] = y*y*y*(bf + bf_z*z);
eval_z[npts * 12] = y*y*z*(2*bf + bf_z*z);
eval_z[npts * 13] = y*z*z*(3*bf + bf_z*z);
eval_z[npts * 14] = z*z*z*(4*bf + bf_z*z);

}


template <typename T>
GPGAUEVAL_INLINE __device__ void collocation_cartesian_angular(
Expand Down Expand Up @@ -255,6 +348,10 @@ GPGAUEVAL_INLINE __device__ void collocation_cartesian_angular(

collocation_cartesian_angular_3( npts, bf, x, y, z, eval );

} else if( l == 4 ) {

collocation_cartesian_angular_4( npts, bf, x, y, z, eval );

} else {
assert( false && "L < L_MAX" );
}
Expand Down Expand Up @@ -300,6 +397,11 @@ GPGAUEVAL_INLINE __device__ void collocation_cartesian_angular_deriv1(
collocation_cartesian_angular_3( npts, bf, x, y, z, eval );
collocation_cartesian_angular_3_deriv1( npts, bf, bf_x, bf_y, bf_z, x, y, z, eval_x, eval_y, eval_z );

} else if( l == 4 ) {

collocation_cartesian_angular_4( npts, bf, x, y, z, eval );
collocation_cartesian_angular_4_deriv1( npts, bf, bf_x, bf_y, bf_z, x, y, z, eval_x, eval_y, eval_z );

} else {
assert( false && "L < L_MAX" );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,17 +187,17 @@ GPGAUEVAL_INLINE __device__ void collocation_spherical_unnorm_angular_3_deriv1(

eval_x[npts * 0] = sqrt_10*y*(6*bf*x + bf_x*(3*x*x - y*y))/4;
eval_x[npts * 1] = sqrt_15*y*z*(bf + bf_x*x);
eval_x[npts * 2] = -sqrt_6*y*(2*bf*x + bf_x*(x*x + y*y - 4*z*z))/4;
eval_x[npts * 3] = -z*(6*bf*x + bf_x*(3*x*x + 3*y*y - 2*z*z))/2;
eval_x[npts * 4] = -sqrt_6*(bf*(3*x*x + y*y - 4*z*z) + bf_x*x*(x*x + y*y - 4*z*z))/4;
eval_x[npts * 2] = sqrt_6*y*(-2*bf*x - bf_x*(x*x + y*y - 4*z*z))/4;
eval_x[npts * 3] = z*(-6*bf*x - bf_x*(3*x*x + 3*y*y - 2*z*z))/2;
eval_x[npts * 4] = sqrt_6*(-bf*(3*x*x + y*y - 4*z*z) - bf_x*x*(x*x + y*y - 4*z*z))/4;
eval_x[npts * 5] = sqrt_15*z*(2*bf*x + bf_x*(x*x - y*y))/2;
eval_x[npts * 6] = sqrt_10*(3*bf*(x*x - y*y) + bf_x*x*(x*x - 3*y*y))/4;

eval_y[npts * 0] = sqrt_10*(-3*bf*(-x*x + y*y) + bf_y*y*(3*x*x - y*y))/4;
eval_y[npts * 1] = sqrt_15*x*z*(bf + bf_y*y);
eval_y[npts * 2] = -sqrt_6*(bf*(x*x + 3*y*y - 4*z*z) + bf_y*y*(x*x + y*y - 4*z*z))/4;
eval_y[npts * 3] = -z*(6*bf*y + bf_y*(3*x*x + 3*y*y - 2*z*z))/2;
eval_y[npts * 4] = -sqrt_6*x*(2*bf*y + bf_y*(x*x + y*y - 4*z*z))/4;
eval_y[npts * 2] = sqrt_6*(-bf*(x*x + 3*y*y - 4*z*z) - bf_y*y*(x*x + y*y - 4*z*z))/4;
eval_y[npts * 3] = z*(-6*bf*y - bf_y*(3*x*x + 3*y*y - 2*z*z))/2;
eval_y[npts * 4] = sqrt_6*x*(-2*bf*y - bf_y*(x*x + y*y - 4*z*z))/4;
eval_y[npts * 5] = sqrt_15*z*(-2*bf*y + bf_y*(x*x - y*y))/2;
eval_y[npts * 6] = sqrt_10*x*(-6*bf*y + bf_y*(x*x - 3*y*y))/4;

Expand All @@ -211,6 +211,75 @@ GPGAUEVAL_INLINE __device__ void collocation_spherical_unnorm_angular_3_deriv1(

}

template <typename T>
GPGAUEVAL_INLINE __device__ void collocation_spherical_unnorm_angular_4(
int32_t npts,
const T bf,
const T x,
const T y,
const T z,
T* __restrict__ eval
) {

eval[npts * 0] = sqrt_35*bf*x*y*(x*x - y*y)/2;
eval[npts * 1] = sqrt_70*bf*y*z*(3*x*x - y*y)/4;
eval[npts * 2] = sqrt_5*bf*x*y*(-x*x - y*y + 6*z*z)/2;
eval[npts * 3] = sqrt_10*bf*y*z*(-3*x*x - 3*y*y + 4*z*z)/4;
eval[npts * 4] = bf*(3*x*x*x*x + 6*x*x*y*y - 24*x*x*z*z + 3*y*y*y*y - 24*y*y*z*z + 8*z*z*z*z)/8;
eval[npts * 5] = sqrt_10*bf*x*z*(-3*x*x - 3*y*y + 4*z*z)/4;
eval[npts * 6] = sqrt_5*bf*(-x*x*x*x + 6*x*x*z*z + y*y*y*y - 6*y*y*z*z)/4;
eval[npts * 7] = sqrt_70*bf*x*z*(x*x - 3*y*y)/4;
eval[npts * 8] = sqrt_35*bf*(x*x*x*x - 6*x*x*y*y + y*y*y*y)/8;

}

template <typename T>
GPGAUEVAL_INLINE __device__ void collocation_spherical_unnorm_angular_4_deriv1(
const int32_t npts,
const T bf,
const T bf_x,
const T bf_y,
const T bf_z,
const T x,
const T y,
const T z,
T* __restrict__ eval_x,
T* __restrict__ eval_y,
T* __restrict__ eval_z
) {

eval_x[npts * 0] = sqrt_35*y*(bf*(3*x*x - y*y) + bf_x*x*(x*x - y*y))/2;
eval_x[npts * 1] = sqrt_70*y*z*(6*bf*x + bf_x*(3*x*x - y*y))/4;
eval_x[npts * 2] = sqrt_5*y*(-bf*(3*x*x + y*y - 6*z*z) - bf_x*x*(x*x + y*y - 6*z*z))/2;
eval_x[npts * 3] = sqrt_10*y*z*(-6*bf*x - bf_x*(3*x*x + 3*y*y - 4*z*z))/4;
eval_x[npts * 4] = 3*bf*x*(x*x + y*y - 4*z*z)/2 + bf_x*(3*x*x*x*x + 6*x*x*y*y - 24*x*x*z*z + 3*y*y*y*y - 24*y*y*z*z + 8*z*z*z*z)/8;
eval_x[npts * 5] = sqrt_10*z*(-bf*(9*x*x + 3*y*y - 4*z*z) - bf_x*x*(3*x*x + 3*y*y - 4*z*z))/4;
eval_x[npts * 6] = sqrt_5*(-bf*x*(x*x - 3*z*z) - bf_x*(x*x*x*x - 6*x*x*z*z - y*y*y*y + 6*y*y*z*z)/4);
eval_x[npts * 7] = sqrt_70*z*(3*bf*(x*x - y*y) + bf_x*x*(x*x - 3*y*y))/4;
eval_x[npts * 8] = sqrt_35*(4*bf*x*(x*x - 3*y*y) + bf_x*(x*x*x*x - 6*x*x*y*y + y*y*y*y))/8;

eval_y[npts * 0] = sqrt_35*x*(-bf*(-x*x + 3*y*y) + bf_y*y*(x*x - y*y))/2;
eval_y[npts * 1] = sqrt_70*z*(-3*bf*(-x*x + y*y) + bf_y*y*(3*x*x - y*y))/4;
eval_y[npts * 2] = sqrt_5*x*(-bf*(x*x + 3*y*y - 6*z*z) - bf_y*y*(x*x + y*y - 6*z*z))/2;
eval_y[npts * 3] = sqrt_10*z*(-bf*(3*x*x + 9*y*y - 4*z*z) - bf_y*y*(3*x*x + 3*y*y - 4*z*z))/4;
eval_y[npts * 4] = 3*bf*y*(x*x + y*y - 4*z*z)/2 + bf_y*(3*x*x*x*x + 6*x*x*y*y - 24*x*x*z*z + 3*y*y*y*y - 24*y*y*z*z + 8*z*z*z*z)/8;
eval_y[npts * 5] = sqrt_10*x*z*(-6*bf*y - bf_y*(3*x*x + 3*y*y - 4*z*z))/4;
eval_y[npts * 6] = sqrt_5*(bf*y*(y*y - 3*z*z) - bf_y*(x*x*x*x - 6*x*x*z*z - y*y*y*y + 6*y*y*z*z)/4);
eval_y[npts * 7] = sqrt_70*x*z*(-6*bf*y + bf_y*(x*x - 3*y*y))/4;
eval_y[npts * 8] = sqrt_35*(-4*bf*y*(3*x*x - y*y) + bf_y*(x*x*x*x - 6*x*x*y*y + y*y*y*y))/8;

eval_z[npts * 0] = sqrt_35*bf_z*x*y*(x*x - y*y)/2;
eval_z[npts * 1] = sqrt_70*y*(bf + bf_z*z)*(3*x*x - y*y)/4;
eval_z[npts * 2] = sqrt_5*x*y*(12*bf*z - bf_z*(x*x + y*y - 6*z*z))/2;
eval_z[npts * 3] = sqrt_10*y*(3*bf*(-x*x - y*y + 4*z*z) - bf_z*z*(3*x*x + 3*y*y - 4*z*z))/4;
eval_z[npts * 4] = -2*bf*z*(3*x*x + 3*y*y - 2*z*z) + bf_z*(3*x*x*x*x + 6*x*x*y*y - 24*x*x*z*z + 3*y*y*y*y - 24*y*y*z*z + 8*z*z*z*z)/8;
eval_z[npts * 5] = sqrt_10*x*(3*bf*(-x*x - y*y + 4*z*z) - bf_z*z*(3*x*x + 3*y*y - 4*z*z))/4;
eval_z[npts * 6] = sqrt_5*(12*bf*z*(x*x - y*y) - bf_z*(x*x*x*x - 6*x*x*z*z - y*y*y*y + 6*y*y*z*z))/4;
eval_z[npts * 7] = sqrt_70*x*(bf + bf_z*z)*(x*x - 3*y*y)/4;
eval_z[npts * 8] = sqrt_35*bf_z*(x*x*x*x - 6*x*x*y*y + y*y*y*y)/8;

}


template <typename T>
GPGAUEVAL_INLINE __device__ void collocation_spherical_unnorm_angular(
Expand Down Expand Up @@ -239,6 +308,10 @@ GPGAUEVAL_INLINE __device__ void collocation_spherical_unnorm_angular(

collocation_spherical_unnorm_angular_3( npts, bf, x, y, z, eval );

} else if( l == 4 ) {

collocation_spherical_unnorm_angular_4( npts, bf, x, y, z, eval );

} else {
assert( false && "L < L_MAX" );
}
Expand Down Expand Up @@ -284,6 +357,11 @@ GPGAUEVAL_INLINE __device__ void collocation_spherical_unnorm_angular_deriv1(
collocation_spherical_unnorm_angular_3( npts, bf, x, y, z, eval );
collocation_spherical_unnorm_angular_3_deriv1( npts, bf, bf_x, bf_y, bf_z, x, y, z, eval_x, eval_y, eval_z );

} else if( l == 4 ) {

collocation_spherical_unnorm_angular_4( npts, bf, x, y, z, eval );
collocation_spherical_unnorm_angular_4_deriv1( npts, bf, bf_x, bf_y, bf_z, x, y, z, eval_x, eval_y, eval_z );

} else {
assert( false && "L < L_MAX" );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@

namespace GauXC {

constexpr double sqrt_15 = 3.872983346207417;
constexpr double sqrt_10 = 3.1622776601683795;
constexpr double sqrt_3 = 1.7320508075688772;
constexpr double sqrt_15 = 3.872983346207417;
constexpr double sqrt_35 = 5.916079783099616;
constexpr double sqrt_6 = 2.449489742783178;
constexpr double sqrt_10 = 3.1622776601683795;
constexpr double sqrt_70 = 8.366600265340756;
constexpr double sqrt_5 = 2.23606797749979;

} // namespace GauXC
Loading

0 comments on commit 843fe01

Please sign in to comment.