diff --git a/include/gauxc/xc_integrator/integrator_factory.hpp b/include/gauxc/xc_integrator/integrator_factory.hpp index 5bb17e2d..88d27c78 100644 --- a/include/gauxc/xc_integrator/integrator_factory.hpp +++ b/include/gauxc/xc_integrator/integrator_factory.hpp @@ -55,6 +55,10 @@ class XCIntegratorFactory { std::shared_ptr func, std::shared_ptr lb ) { + // Early check for MGGAs and Device + if( ex_ == ExecutionSpace::Device && func->is_mgga() ) + GAUXC_GENERIC_EXCEPTION("DEVICE IS NOT READY FOR MGGA"); + // Create Local Work Driver auto lwd = LocalWorkDriverFactory::make_local_work_driver( ex_, lwd_kernel_, local_work_settings_ ); diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx b/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx index 8de8b22f..fd58656b 100644 --- a/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx +++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx @@ -77,6 +77,27 @@ void LocalHostWorkDriver::eval_collocation_hessian( size_t npts, size_t nshells, } +// Collocation 3rd +void LocalHostWorkDriver::eval_collocation_der3( size_t npts, size_t nshells, size_t nbe, + const double* pts, const BasisSet& basis, const int32_t* shell_list, + double* basis_eval, double* dbasis_x_eval, double* dbasis_y_eval, + double* dbasis_z_eval, double* d2basis_xx_eval, double* d2basis_xy_eval, + double* d2basis_xz_eval, double* d2basis_yy_eval, double* d2basis_yz_eval, + double* d2basis_zz_eval, double* d3basis_xxx_eval, double* d3basis_xxy_eval, + double* d3basis_xxz_eval, double* d3basis_xyy_eval, double* d3basis_xyz_eval, + double* d3basis_xzz_eval, double* d3basis_yyy_eval, double* d3basis_yyz_eval, + double* d3basis_yzz_eval, double* d3basis_zzz_eval) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_collocation_der3(npts, nshells, nbe, pts, basis, shell_list, basis_eval, + dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, d2basis_xx_eval, d2basis_xy_eval, + d2basis_xz_eval, d2basis_yy_eval, d2basis_yz_eval, d2basis_zz_eval, + d3basis_xxx_eval, d3basis_xxy_eval, d3basis_xxz_eval, d3basis_xyy_eval, + d3basis_xyz_eval, d3basis_xzz_eval, d3basis_yyy_eval, d3basis_yyz_eval, + d3basis_yzz_eval, d3basis_zzz_eval); + +} + // X matrix (fac * P * B) void LocalHostWorkDriver::eval_xmat( size_t npts, size_t nbf, size_t nbe, @@ -89,7 +110,6 @@ void LocalHostWorkDriver::eval_xmat( size_t npts, size_t nbf, size_t nbe, } - void LocalHostWorkDriver::eval_exx_fmat( size_t npts, size_t nbf, size_t nbe_bra, size_t nbe_ket, const submat_map_t& submat_map_bra, const submat_map_t& submat_map_ket, const double* P, size_t ldp, @@ -176,6 +196,40 @@ void LocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe, } +// U/VVar MGGA(density, grad, gamma, tau, lapl) +void LocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe, + const double* basis_eval, const double* dbasis_x_eval, + const double* dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, + const double* X, size_t ldx, const double* mmat_x, const double* mmat_y, const double* mmat_z, + size_t ldm, double* den_eval, double* dden_x_eval, double* dden_y_eval, + double* dden_z_eval, double* gamma, double* tau, double* lapl ) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_uvvar_mgga_rks(npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, lbasis_eval, X, ldx, mmat_x, mmat_y, mmat_z, ldm, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, + gamma, tau, lapl); + +} + + +// U/VVar MGGA(density, grad, gamma, tau, lapl) +void LocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, + const double* basis_eval, const double* dbasis_x_eval, + const double* dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, + const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, + const double* mmat_xs, const double* mmat_ys, const double* mmat_zs, size_t ldms, + const double* mmat_xz, const double* mmat_yz, const double* mmat_zz, size_t ldmz, + double* den_eval, double* dden_x_eval, double* dden_y_eval, + double* dden_z_eval, double* gamma, double* tau, double* lapl ) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_uvvar_mgga_uks(npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, lbasis_eval, Xs, ldxs, Xz, ldxz, mmat_xs, mmat_ys, mmat_zs, ldms, + mmat_xz, mmat_yz, mmat_zz, ldmz, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, + gamma, tau, lapl); + +} + // Eval Z Matrix LDA VXC void LocalHostWorkDriver::eval_zmat_lda_vxc_rks( size_t npts, size_t nbe, const double* vrho, const double* basis_eval, double* Z, size_t ldz ) { @@ -210,7 +264,6 @@ void LocalHostWorkDriver::eval_zmat_gga_vxc_rks( size_t npts, size_t nbe, } - void LocalHostWorkDriver::eval_zmat_gga_vxc_uks( size_t npts, size_t nbe, const double* vrho, const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, @@ -225,14 +278,74 @@ void LocalHostWorkDriver::eval_zmat_gga_vxc_uks( size_t npts, size_t nbe, } + +// Eval Z Matrix MGGA VXC +void LocalHostWorkDriver::eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, + const double* vrho, const double* vgamma, const double* vlapl, + const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* lbasis_eval, const double* dden_x_eval, + const double* dden_y_eval, const double* dden_z_eval, double* Z, size_t ldz ) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_zmat_mgga_vxc_rks(npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, lbasis_eval, dden_x_eval, dden_y_eval, dden_z_eval, + Z, ldz); + +} + + +// Eval Z Matrix MGGA VXC +void LocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t nbe, + const double* vrho, const double* vgamma, const double* vlapl, + const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* lbasis_eval, const double* dden_x_eval, + const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, + double* Zz, size_t ldzz) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_zmat_mgga_vxc_uks(npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, lbasis_eval, dden_x_eval, dden_y_eval, dden_z_eval, + Zs, ldzs, Zz, ldzz); + +} + + +// Eval M Matrix MGGA VXC +void LocalHostWorkDriver::eval_mmat_mgga_vxc_rks( size_t npts, size_t nbe, + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, double* mmat_x, double* mmat_y, double* mmat_z, size_t ldm ) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_mmat_mgga_vxc_rks(npts, nbe, vtau, vlapl, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, mmat_x, mmat_y, mmat_z, ldm); + +} + + +// Eval M Matrix MGGA VXC +void LocalHostWorkDriver::eval_mmat_mgga_vxc_uks( size_t npts, size_t nbe, + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, double* mmat_xs, double* mmat_ys, double* mmat_zs, size_t ldms, + double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz ) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_mmat_mgga_vxc_uks(npts, nbe, vtau, vlapl, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, mmat_xs, mmat_ys, mmat_zs, ldms, mmat_xz, mmat_yz, + mmat_zz, ldmz ); + +} + // Increment VXC by Z void LocalHostWorkDriver::inc_vxc( size_t npts, size_t nbf, size_t nbe, const double* basis_eval, const submat_map_t& submat_map, const double* Z, size_t ldz, double* VXC, size_t ldvxc, double* scr ) { throw_if_invalid_pimpl(pimpl_); - pimpl_->inc_vxc(npts, nbf, nbe, basis_eval, submat_map, Z, ldz, VXC, ldvxc, - scr); + pimpl_->inc_vxc(npts, nbf, nbe, basis_eval, submat_map, Z, ldz, VXC, ldvxc, scr); } diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp b/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp index 003066d5..c5be4a87 100644 --- a/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp +++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp @@ -136,6 +136,46 @@ class LocalHostWorkDriver : public LocalWorkDriver { double* d2basis_xz_eval, double* d2basis_yy_eval, double* d2basis_yz_eval, double* d2basis_zz_eval ); + /** Evaluation the collocation matrix + gradient + hessian + 3rd derivatives + * + * @param[in] npts Same as `eval_collocation` + * @param[in] nshells Same as `eval_collocation` + * @param[in] nbe Same as `eval_collocation` + * @param[in] pts Same as `eval_collocation` + * @param[in] basis Same as `eval_collocation` + * @param[in] shell_list Same as `eval_collocation` + * + * @param[out] basis_eval Same as `eval_collocation` + * @param[out] dbasis_x_eval Same as `eval_collocation_gradient` + * @param[out] dbasis_y_eval Same as `eval_collocation_gradient` + * @param[out] dbasis_z_eval Same as `eval_collocation_gradient` + * @param[out] d2basis_xx_eval Derivative of `basis_eval` wrt x+x (same dimensions) + * @param[out] d2basis_xy_eval Derivative of `basis_eval` wrt x+y (same dimensions) + * @param[out] d2basis_xz_eval Derivative of `basis_eval` wrt x+z (same dimensions) + * @param[out] d2basis_yy_eval Derivative of `basis_eval` wrt y+y (same dimensions) + * @param[out] d2basis_yz_eval Derivative of `basis_eval` wrt y+z (same dimensions) + * @param[out] d2basis_zz_eval Derivative of `basis_eval` wrt z+z (same dimensions) + * @param[out] d3basis_xxx_eval Derivative of `basis_eval` wrt x+x+x (same dimensions) + * @param[out] d3basis_xxy_eval Derivative of `basis_eval` wrt x+x+y (same dimensions) + * @param[out] d3basis_xxz_eval Derivative of `basis_eval` wrt x+x+z (same dimensions) + * @param[out] d3basis_xyy_eval Derivative of `basis_eval` wrt x+y+y (same dimensions) + * @param[out] d3basis_xyz_eval Derivative of `basis_eval` wrt x+y+z (same dimensions) + * @param[out] d3basis_xzz_eval Derivative of `basis_eval` wrt x+z+z (same dimensions) + * @param[out] d3basis_yyy_eval Derivative of `basis_eval` wrt y+y+y (same dimensions) + * @param[out] d3basis_yyz_eval Derivative of `basis_eval` wrt y+y+z (same dimensions) + * @param[out] d3basis_yzz_eval Derivative of `basis_eval` wrt y+z+z (same dimensions) + * @param[out] d3basis_zzz_eval Derivative of `basis_eval` wrt z+z+z (same dimensions) + */ + void eval_collocation_der3( size_t npts, size_t nshells, size_t nbe, + const double* pts, const BasisSet& basis, const int32_t* shell_list, + double* basis_eval, double* dbasis_x_eval, double* dbasis_y_eval, + double* dbasis_z_eval, double* d2basis_xx_eval, double* d2basis_xy_eval, + double* d2basis_xz_eval, double* d2basis_yy_eval, double* d2basis_yz_eval, + double* d2basis_zz_eval, double* d3basis_xxx_eval, double* d3basis_xxy_eval, + double* d3basis_xxz_eval, double* d3basis_xyy_eval, double* d3basis_xyz_eval, + double* d3basis_xzz_eval, double* d3basis_yyy_eval, double* d3basis_yyz_eval, + double* d3basis_yzz_eval, double* d3basis_zzz_eval); + /** Evaluate the compressed "X" matrix = fac * P * B * * @param[in] npts The number of points in the collocation matrix @@ -238,6 +278,49 @@ class LocalHostWorkDriver : public LocalWorkDriver { const double* Xz, size_t ldxz, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma ); + /** Evaluate the U and V variavles for RKS MGGA + * + * U = rho + gradient + tau + lapl + * V = rho + gamma + tau + lapl + * + * @param[in] npts Same as `eval_uvvar_lda` + * @param[in] nbe Same as `eval_uvvar_lda` + * @param[in] basis_eval Same as `eval_uvvar_lda` + * @param[in] dbasis_x_eval Derivative of `basis_eval` wrt x (same dims) + * @param[in] dbasis_y_eval Derivative of `basis_eval` wrt y (same dims) + * @param[in] dbasis_z_eval Derivative of `basis_eval` wrt z (same dims) + * @param[in] lbasis_eval Laplacian of `basis_eval` (same dims) + * @param[in] X Same as `eval_uvvar_lda` + * @param[in] ldx Same as `eval_uvvar_lda` + * @param[in] mmat_x + * @param[in] mmat_y + * @param[in] mmat_z + * @param[in] ldm + * @param[out] den_eval Same as `eval_uvvar_lda` + * @param[out] dden_x_eval Derivative of `den_eval` wrt x (npts) + * @param[out] dden_y_eval Derivative of `den_eval` wrt y (npts) + * @param[out] dden_z_eval Derivative of `den_eval` wrt z (npts) + * @param[out] gamma |grad rho|^2 (npts) + * @param[out] tau + * @param[out] lapl + * + */ + void eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eavl, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* lbasis_eval, + const double* X, size_t ldx, const double* mmat_x, + const double* mmat_y, const double* mmat_z, size_t ldm, double* den_eval, + double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, + double* tau, double* lapl); + void eval_uvvar_mgga_uks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eavl, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* lbasis_eval, + const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, + const double* mmat_xs, const double* mmat_ys, const double* mmat_zs, size_t ldms, + const double* mmat_xz, const double* mmat_yz, const double* mmat_zz, size_t ldmz, + double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, + double* gamma, double* tau, double* lapl); + /** Evaluate the VXC Z Matrix for RKS LDA * * Z(mu,i) = 0.5 * vrho(i) * B(mu, i) @@ -294,9 +377,57 @@ class LocalHostWorkDriver : public LocalWorkDriver { double* Zs, size_t ldzs, double* Zz, size_t ldzz ); + /** Evaluate the VXC Z Matrix for RKS MGGA + * + * Z(mu,i) = 0.5 * vrho(i) * B(mu, i) + + * 2.0 * vgamma(i) * (grad B(mu,i)) . (grad rho(i)) + + * 0.5 * vlapl(i) * lapl B(mu, i) + * + * TODO: Need to add an API for UKS/GKS + * + * @param[in] npts Same as `eval_zmat_lda_vxc` + * @param[in] nbe Same as `eval_zmat_lda_vxc` + * @param[in] vrho Same as `eval_zmat_lda_vxc` + * @param[in] vgamma Derivative of the XC functional wrt gamma scaled by quad weights (npts) + * @param[in] basis_eval Same as `eval_zmat_lda_vxc` + * @param[in] dbasis_x_eval Derivative of `basis_eval` wrt x (same dims) + * @param[in] dbasis_y_eval Derivative of `basis_eval` wrt y (same dims) + * @param[in] dbasis_z_eval Derivative of `basis_eval` wrt z (same dims) + * @param[in] lbasis_eval Laplacian of `basis_eval` (same dims) + * @param[in] dden_x_eval Derivative of rho wrt x (npts) + * @param[in] dden_y_eval Derivative of rho wrt y (npts) + * @param[in] dden_z_eval Derivative of rho wrt z (npts) + * @param[out] Z Same as `eval_zmat_lda_vxc` + * @param[in] ldz Same as `eval_zmat_lda_vxc` + * + */ + void eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* vlapl, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* lbasis_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Z, size_t ldz ); + void eval_zmat_mgga_vxc_uks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* vlapl, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* lbasis_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Zs, size_t ldzs, double* Zz, size_t ldzz ); + void eval_mmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vtau, + const double* vlapl, const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, double* mmat_x, double* mmat_y, double* mmat_z, + size_t ldm); + void eval_mmat_mgga_vxc_uks( size_t npts, size_t nbe, const double* vtau, + const double* vlapl, const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, double* mmat_xs, double* mmat_ys, double* mmat_zs, + size_t ldms, double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz); + + + /** Increment VXC integrand given Z / Collocation (RKS LDA+GGA) * * VXC += Z**H * B + h.c. + * VXC += M**H . dB + h.c. * * Only updates lower triangle * @@ -313,8 +444,8 @@ class LocalHostWorkDriver : public LocalWorkDriver { * */ void inc_vxc( size_t npts, size_t nbf, size_t nbe, const double* basis_eval, - const submat_map_t& submat_map, const double* Z, size_t ldz, double* VXC, - size_t ldvxc, double* scr ); + const submat_map_t& submat_map, const double* Z, size_t ldz, + double* VXC, size_t ldvxc, double* scr ); private: diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp b/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp index 5c9e7c28..e44bd562 100644 --- a/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp +++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp @@ -43,6 +43,15 @@ struct LocalHostWorkDriverPIMPL { double* dbasis_z_eval, double* d2basis_xx_eval, double* d2basis_xy_eval, double* d2basis_xz_eval, double* d2basis_yy_eval, double* d2basis_yz_eval, double* d2basis_zz_eval ) = 0; + virtual void eval_collocation_der3( size_t npts, size_t nshells, size_t nbe, + const double* pts, const BasisSet& basis, const int32_t* shell_list, + double* basis_eval, double* dbasis_x_eval, double* dbasis_y_eval, + double* dbasis_z_eval, double* d2basis_xx_eval, double* d2basis_xy_eval, + double* d2basis_xz_eval, double* d2basis_yy_eval, double* d2basis_yz_eval, + double* d2basis_zz_eval, double* d3basis_xxx_eval, double* d3basis_xxy_eval, + double* d3basis_xxz_eval, double* d3basis_xyy_eval, double* d3basis_xyz_eval, + double* d3basis_xzz_eval, double* d3basis_yyy_eval, double* d3basis_yyz_eval, + double* d3basis_yzz_eval, double* d3basis_zzz_eval) = 0; virtual void eval_xmat( size_t npts, size_t nbf, size_t nbe, const submat_map_t& submat_map, double fac, const double* P, size_t ldp, @@ -73,17 +82,34 @@ struct LocalHostWorkDriverPIMPL { double* den_eval) = 0; virtual void eval_uvvar_gga_rks( size_t npts, size_t nbe, const double* basis_eval, - const double* dbasis_x_eavl, const double *dbasis_y_eval, + const double* dbasis_x_eval, const double *dbasis_y_eval, const double* dbasis_z_eval, const double* X, size_t ldx, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma ) = 0; virtual void eval_uvvar_gga_uks( size_t npts, size_t nbe, const double* basis_eval, - const double* dbasis_x_eavl, const double *dbasis_y_eval, + const double* dbasis_x_eval, const double *dbasis_y_eval, const double* dbasis_z_eval, const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma ) = 0; + virtual void eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* lbasis_eval, + const double* X, size_t ldx, + const double* mmat_x, const double* mmat_y, const double* mmat_z, + size_t ldm, double* den_eval, double* dden_x_eval, double* dden_y_eval, + double* dden_z_eval, double* gamma, double* tau, double* lapl) = 0; + virtual void eval_uvvar_mgga_uks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* lbasis_eval, + const double* Xs, size_t ldxs, + const double* Xz, size_t ldxz, + const double* mmat_xs, const double* mmat_ys, const double* mmat_zs, + size_t ldms, const double* mmat_xz, const double* mmat_yz, const double* mmat_zz, + size_t ldmz, double* den_eval, double* dden_x_eval, double* dden_y_eval, + double* dden_z_eval, double* gamma, double* tau, double* lapl) = 0; + virtual void eval_zmat_lda_vxc_rks( size_t npts, size_t nbe, const double* vrho, const double* basis_eval, double* Z, size_t ldz ) = 0; @@ -101,6 +127,27 @@ struct LocalHostWorkDriverPIMPL { const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) = 0; + virtual void eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* vlapl, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* lbasis_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Z, size_t ldz) = 0; + virtual void eval_zmat_mgga_vxc_uks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* vlapl, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* lbasis_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Zs, size_t ldzs, double* Zz, size_t ldzz ) = 0; + virtual void eval_mmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vtau, + const double* vlapl, const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, double* mmat_x, double* mmat_y, double* mmat_z, + size_t ldm) = 0; + virtual void eval_mmat_mgga_vxc_uks( size_t npts, size_t nbe, const double* vtau, + const double* vlapl, const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, double* mmat_xs, double* mmat_ys, double* mmat_zs, + size_t ldms, double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz ) = 0; + virtual void inc_vxc( size_t npts, size_t nbf, size_t nbe, const double* basis_eval, const submat_map_t& submat_map, const double* Z, size_t ldz, double* VXC, size_t ldvxc, double* scr ) = 0; diff --git a/src/xc_integrator/local_work_driver/host/reference/collocation.hpp b/src/xc_integrator/local_work_driver/host/reference/collocation.hpp index 5bfc378c..d9d8d9c9 100644 --- a/src/xc_integrator/local_work_driver/host/reference/collocation.hpp +++ b/src/xc_integrator/local_work_driver/host/reference/collocation.hpp @@ -47,4 +47,32 @@ void gau2grid_collocation_hessian( size_t npts, double* d2basis_yy_eval, double* d2basis_yz_eval, double* d2basis_zz_eval); -} + +void gau2grid_collocation_der3( size_t npts, + size_t nshells, + size_t nbe, + const double* points, + const BasisSet& basis, + const int32_t* shell_mask, + double* basis_eval, + double* dbasis_x_eval, + double* dbasis_y_eval, + double* dbasis_z_eval, + double* d2basis_xx_eval, + double* d2basis_xy_eval, + double* d2basis_xz_eval, + double* d2basis_yy_eval, + double* d2basis_yz_eval, + double* d2basis_zz_eval, + double* d3basis_xxx_eval, + double* d3basis_xxy_eval, + double* d3basis_xxz_eval, + double* d3basis_xyy_eval, + double* d3basis_xyz_eval, + double* d3basis_xzz_eval, + double* d3basis_yyy_eval, + double* d3basis_yyz_eval, + double* d3basis_yzz_eval, + double* d3basis_zzz_eval); + + } diff --git a/src/xc_integrator/local_work_driver/host/reference/gau2grid_collocation.cxx b/src/xc_integrator/local_work_driver/host/reference/gau2grid_collocation.cxx index 5462b5c2..dbafa34b 100644 --- a/src/xc_integrator/local_work_driver/host/reference/gau2grid_collocation.cxx +++ b/src/xc_integrator/local_work_driver/host/reference/gau2grid_collocation.cxx @@ -198,9 +198,102 @@ void gau2grid_collocation_hessian( size_t npts, gg_fast_transpose( ncomp, npts, rv_yz, d2basis_yz_eval ); gg_fast_transpose( ncomp, npts, rv_zz, d2basis_zz_eval ); - a.deallocate( rv, 4*npts*nbe ); + a.deallocate( rv, 10*npts*nbe ); } +void gau2grid_collocation_der3( size_t npts, + size_t nshells, + size_t nbe, + const double* points, + const BasisSet& basis, + const int32_t* shell_mask, + double* basis_eval, + double* dbasis_x_eval, + double* dbasis_y_eval, + double* dbasis_z_eval, + double* d2basis_xx_eval, + double* d2basis_xy_eval, + double* d2basis_xz_eval, + double* d2basis_yy_eval, + double* d2basis_yz_eval, + double* d2basis_zz_eval, + double* d3basis_xxx_eval, + double* d3basis_xxy_eval, + double* d3basis_xxz_eval, + double* d3basis_xyy_eval, + double* d3basis_xyz_eval, + double* d3basis_xzz_eval, + double* d3basis_yyy_eval, + double* d3basis_yyz_eval, + double* d3basis_yzz_eval, + double* d3basis_zzz_eval) { + + std::allocator a; + auto* rv = a.allocate( 20 * npts * nbe ); + auto* rv_x = rv + npts * nbe; + auto* rv_y = rv_x + npts * nbe; + auto* rv_z = rv_y + npts * nbe; + auto* rv_xx = rv_z + npts * nbe; + auto* rv_xy = rv_xx + npts * nbe; + auto* rv_xz = rv_xy + npts * nbe; + auto* rv_yy = rv_xz + npts * nbe; + auto* rv_yz = rv_yy + npts * nbe; + auto* rv_zz = rv_yz + npts * nbe; + auto* rv_xxx = rv_zz + npts * nbe; + auto* rv_xxy = rv_xxx + npts * nbe; + auto* rv_xxz = rv_xxy + npts * nbe; + auto* rv_xyy = rv_xxz + npts * nbe; + auto* rv_xyz = rv_xyy + npts * nbe; + auto* rv_xzz = rv_xyz + npts * nbe; + auto* rv_yyy = rv_xzz + npts * nbe; + auto* rv_yyz = rv_yyy + npts * nbe; + auto* rv_yzz = rv_yyz + npts * nbe; + auto* rv_zzz = rv_yzz + npts * nbe; + + + size_t ncomp = 0; + for( size_t i = 0; i < nshells; ++i ) { + + const auto& sh = basis.at(shell_mask[i]); + int order = sh.pure() ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA; + + const auto ioff = ncomp*npts; + gg_collocation_deriv3( sh.l(), npts, points, 3, sh.nprim(), sh.coeff_data(), + sh.alpha_data(), sh.O_data(), order, rv + ioff, rv_x + ioff, rv_y + ioff, + rv_z + ioff, rv_xx + ioff, rv_xy + ioff, rv_xz + ioff, rv_yy + ioff, + rv_yz + ioff, rv_zz + ioff, rv_xxx + ioff, rv_xxy + ioff, rv_xxz + ioff, + rv_xyy + ioff, rv_xyz + ioff, rv_xzz + ioff, rv_yyy + ioff, rv_yyz + ioff, + rv_yzz + ioff, rv_zzz + ioff); + + ncomp += sh.size(); + + } + + gg_fast_transpose( ncomp, npts, rv, basis_eval ); + gg_fast_transpose( ncomp, npts, rv_x, dbasis_x_eval ); + gg_fast_transpose( ncomp, npts, rv_y, dbasis_y_eval ); + gg_fast_transpose( ncomp, npts, rv_z, dbasis_z_eval ); + gg_fast_transpose( ncomp, npts, rv_xx, d2basis_xx_eval ); + gg_fast_transpose( ncomp, npts, rv_xy, d2basis_xy_eval ); + gg_fast_transpose( ncomp, npts, rv_xz, d2basis_xz_eval ); + gg_fast_transpose( ncomp, npts, rv_yy, d2basis_yy_eval ); + gg_fast_transpose( ncomp, npts, rv_yz, d2basis_yz_eval ); + gg_fast_transpose( ncomp, npts, rv_zz, d2basis_zz_eval ); + gg_fast_transpose( ncomp, npts, rv_xxx, d3basis_xxx_eval ); + gg_fast_transpose( ncomp, npts, rv_xxy, d3basis_xxy_eval ); + gg_fast_transpose( ncomp, npts, rv_xxz, d3basis_xxz_eval ); + gg_fast_transpose( ncomp, npts, rv_xyy, d3basis_xyy_eval ); + gg_fast_transpose( ncomp, npts, rv_xyz, d3basis_xyz_eval ); + gg_fast_transpose( ncomp, npts, rv_xzz, d3basis_xzz_eval ); + gg_fast_transpose( ncomp, npts, rv_yyy, d3basis_yyy_eval ); + gg_fast_transpose( ncomp, npts, rv_yyz, d3basis_yyz_eval ); + gg_fast_transpose( ncomp, npts, rv_yzz, d3basis_yzz_eval ); + gg_fast_transpose( ncomp, npts, rv_zzz, d3basis_zzz_eval ); + + a.deallocate( rv, 20*npts*nbe ); + +} + } diff --git a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx index e1a13911..4f57e7fa 100644 --- a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx +++ b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx @@ -82,6 +82,23 @@ namespace GauXC { d2basis_zz_eval); } + void ReferenceLocalHostWorkDriver::eval_collocation_der3( size_t npts, + size_t nshells, size_t nbe, const double* pts, const BasisSet& basis, + const int32_t* shell_list, double* basis_eval, double* dbasis_x_eval, + double* dbasis_y_eval, double* dbasis_z_eval, double* d2basis_xx_eval, + double* d2basis_xy_eval, double* d2basis_xz_eval, double* d2basis_yy_eval, + double* d2basis_yz_eval, double* d2basis_zz_eval, double* d3basis_xxx_eval, + double* d3basis_xxy_eval, double* d3basis_xxz_eval, double* d3basis_xyy_eval, + double* d3basis_xyz_eval, double* d3basis_xzz_eval, double* d3basis_yyy_eval, + double* d3basis_yyz_eval, double* d3basis_yzz_eval, double* d3basis_zzz_eval) { + gau2grid_collocation_der3(npts, nshells, nbe, pts, basis, shell_list, + basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, d2basis_xx_eval, + d2basis_xy_eval, d2basis_xz_eval, d2basis_yy_eval, d2basis_yz_eval, + d2basis_zz_eval, d3basis_xxx_eval, d3basis_xxy_eval, d3basis_xxz_eval, + d3basis_xyy_eval, d3basis_xyz_eval, d3basis_xzz_eval, d3basis_yyy_eval, + d3basis_yyz_eval, d3basis_yzz_eval, d3basis_zzz_eval); + } + // X matrix (P * B) void ReferenceLocalHostWorkDriver::eval_xmat( size_t npts, size_t nbf, size_t nbe, @@ -103,6 +120,7 @@ namespace GauXC { } + // U/VVar LDA (density) void ReferenceLocalHostWorkDriver::eval_uvvar_lda_rks( size_t npts, size_t nbe, const double* basis_eval, const double* X, size_t ldx, double* den_eval) { @@ -226,6 +244,119 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe, } +void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe, + const double* basis_eval, const double* dbasis_x_eval, + const double *dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, + const double* X, size_t ldx, const double* mmat_x, const double* mmat_y, + const double* mmat_z, size_t ldm, + double* den_eval, double* dden_x_eval, double* dden_y_eval, + double* dden_z_eval, double* gamma, double* tau, double* lapl ) { + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const size_t ioff = size_t(i) * ldx; + const auto* X_i = X + ioff; + + den_eval[i] = blas::dot( nbe, basis_eval + ioff, 1, X_i, 1 ); + + const auto dx = 2. * blas::dot( nbe, dbasis_x_eval + ioff, 1, X_i, 1 ); + const auto dy = 2. * blas::dot( nbe, dbasis_y_eval + ioff, 1, X_i, 1 ); + const auto dz = 2. * blas::dot( nbe, dbasis_z_eval + ioff, 1, X_i, 1 ); + + dden_x_eval[i] = dx; + dden_y_eval[i] = dy; + dden_z_eval[i] = dz; + + gamma[i] = dx*dx + dy*dy + dz*dz; + + tau[i] = 0.5*blas::dot( nbe, dbasis_x_eval + ioff, 1, mmat_x + ioff, 1); + tau[i] += 0.5*blas::dot( nbe, dbasis_y_eval + ioff, 1, mmat_y + ioff, 1); + tau[i] += 0.5*blas::dot( nbe, dbasis_z_eval + ioff, 1, mmat_z + ioff, 1); + + if (lapl != nullptr) + lapl[i] = 2. * blas::dot( nbe, lbasis_eval + ioff, 1, X_i, 1) + 4. * tau[i]; + + } +} + +void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, + const double* basis_eval, const double* dbasis_x_eval, + const double *dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, + const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, + const double* mmat_xs, const double* mmat_ys, const double* mmat_zs, size_t ldms, + const double* mmat_xz, const double* mmat_yz, const double* mmat_zz, size_t ldmz, + double* den_eval, double* dden_x_eval, double* dden_y_eval, + double* dden_z_eval, double* gamma, double* tau, double* lapl ) { + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const size_t ioffs = size_t(i) * ldxs; + const size_t ioffz = size_t(i) * ldxz; + + const auto* Xs_i = Xs + ioffs; + const auto* Xz_i = Xz + ioffz; + + double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 ); // S density + double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 ); // Z density + + + den_eval[2*i] = 0.5*(rhos + rhoz); // rho_+ + den_eval[2*i+1] = 0.5*(rhos - rhoz); // rho_- + + const auto dndx = + 2. * blas::dot( nbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 ); + const auto dndy = + 2. * blas::dot( nbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 ); + const auto dndz = + 2. * blas::dot( nbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 ); + + const auto dMzdx = + 2. * blas::dot( nbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 ); + const auto dMzdy = + 2. * blas::dot( nbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 ); + const auto dMzdz = + 2. * blas::dot( nbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 ); + + dden_x_eval[2*i] = dndx; // dn / dx + dden_y_eval[2*i] = dndy; // dn / dy + dden_z_eval[2*i] = dndz; // dn / dz + + dden_x_eval[2*i+1] = dMzdx; // dMz / dx + dden_y_eval[2*i+1] = dMzdy; // dMz / dy + dden_z_eval[2*i+1] = dMzdz; // dMz / dz + + // (del n).(del n) + const auto dn_sq = dndx*dndx + dndy*dndy + dndz*dndz; + // (del Mz).(del Mz) + const auto dMz_sq = dMzdx*dMzdx + dMzdy*dMzdy + dMzdz*dMzdz; + // (del n).(del Mz) + const auto dn_dMz = dndx*dMzdx + dndy*dMzdy + dndz*dMzdz; + + gamma[3*i ] = 0.25*(dn_sq + dMz_sq) + 0.5*dn_dMz; + gamma[3*i+1] = 0.25*(dn_sq - dMz_sq); + gamma[3*i+2] = 0.25*(dn_sq + dMz_sq) - 0.5*dn_dMz; + + auto taus = 0.5*blas::dot( nbe, dbasis_x_eval + ioffs, 1, mmat_xs + ioffs, 1); + taus += 0.5*blas::dot( nbe, dbasis_y_eval + ioffs, 1, mmat_ys + ioffs, 1); + taus += 0.5*blas::dot( nbe, dbasis_z_eval + ioffs, 1, mmat_zs + ioffs, 1); + auto tauz = 0.5*blas::dot( nbe, dbasis_x_eval + ioffz, 1, mmat_xz + ioffz, 1); + tauz += 0.5*blas::dot( nbe, dbasis_y_eval + ioffz, 1, mmat_yz + ioffz, 1); + tauz += 0.5*blas::dot( nbe, dbasis_z_eval + ioffz, 1, mmat_zz + ioffz, 1); + + tau[2*i] = 0.5*(taus + tauz); + tau[2*i+1] = 0.5*(taus - tauz); + + if (lapl != nullptr) { + auto lapls = 2. * blas::dot( nbe, lbasis_eval + ioffs, 1, Xs_i, 1) + 4. * taus; + auto laplz = 2. * blas::dot( nbe, lbasis_eval + ioffz, 1, Xz_i, 1) + 4. * tauz; + + lapl[2*i] = 0.5*(lapls + laplz); + lapl[2*i+1] = 0.5*(lapls - laplz); + } + + } +} + // Eval Z Matrix LDA VXC void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_rks( size_t npts, size_t nbf, @@ -362,9 +493,217 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe, } } + + // Eval Z Matrix MGGA VXC + void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_rks( size_t npts, size_t nbf, + const double* vrho, const double* vgamma, const double* vlapl, + const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* lbasis_eval, + const double* dden_x_eval, + const double* dden_y_eval, const double* dden_z_eval, double* Z, size_t ldz ) { + + if( ldz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Z, nbf ); + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const int32_t ioff = i * nbf; + + auto* z_col = Z + ioff; + auto* bf_x_col = dbasis_x_eval + ioff; + auto* bf_y_col = dbasis_y_eval + ioff; + auto* bf_z_col = dbasis_z_eval + ioff; + + const auto lda_fact = 0.5 * vrho[i]; + blas::scal( nbf, lda_fact, z_col, 1 ); + + const auto gga_fact = 2. * vgamma[i]; + const auto x_fact = gga_fact * dden_x_eval[i]; + const auto y_fact = gga_fact * dden_y_eval[i]; + const auto z_fact = gga_fact * dden_z_eval[i]; + + blas::axpy( nbf, x_fact, bf_x_col, 1, z_col, 1 ); + blas::axpy( nbf, y_fact, bf_y_col, 1, z_col, 1 ); + blas::axpy( nbf, z_fact, bf_z_col, 1, z_col, 1 ); + + if ( vlapl != nullptr ) { + auto* lbf_col = lbasis_eval + ioff; + const auto lapl_fact = vlapl[i]; + blas::axpy( nbf, lapl_fact, lbf_col, 1, z_col, 1 ); + } + + } + + } + + + void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t nbf, + const double* vrho, const double* vgamma, const double* vlapl, + const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* lbasis_eval, + const double* dden_x_eval, + const double* dden_y_eval, const double* dden_z_eval, double* Zs, + size_t ldzs, double* Zz, size_t ldzz ) { + + + if( ldzs != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + if( ldzz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zs, ldzs); + blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zz, ldzz); + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const int32_t ioff = i * nbf; + + auto* zs_col = Zs + ioff; + auto* zz_col = Zz + ioff; + auto* bf_x_col = dbasis_x_eval + ioff; + auto* bf_y_col = dbasis_y_eval + ioff; + auto* bf_z_col = dbasis_z_eval + ioff; + auto* lbf_col = lbasis_eval + ioff; + + const double factp = 0.5 * vrho[2*i]; + const double factm = 0.5 * vrho[2*i+1]; + + GauXC::blas::scal( nbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " + GauXC::blas::scal( nbf, 0.5*(factp - factm), zz_col, 1 ); + + const auto gga_fact_pp = vgamma[3*i]; + const auto gga_fact_pm = vgamma[3*i+1]; + const auto gga_fact_mm = vgamma[3*i+2]; + + const auto gga_fact_1 = 0.5*(gga_fact_pp + gga_fact_pm + gga_fact_mm); + const auto gga_fact_2 = 0.5*(gga_fact_pp - gga_fact_mm); + const auto gga_fact_3 = 0.5*(gga_fact_pp - gga_fact_pm + gga_fact_mm); + + const auto x_fact_s = gga_fact_1 * dden_x_eval[2*i] + gga_fact_2 * dden_x_eval[2*i+1]; + const auto y_fact_s = gga_fact_1 * dden_y_eval[2*i] + gga_fact_2 * dden_y_eval[2*i+1]; + const auto z_fact_s = gga_fact_1 * dden_z_eval[2*i] + gga_fact_2 * dden_z_eval[2*i+1]; + + const auto x_fact_z = gga_fact_3 * dden_x_eval[2*i+1] + gga_fact_2 * dden_x_eval[2*i]; + const auto y_fact_z = gga_fact_3 * dden_y_eval[2*i+1] + gga_fact_2 * dden_y_eval[2*i]; + const auto z_fact_z = gga_fact_3 * dden_z_eval[2*i+1] + gga_fact_2 * dden_z_eval[2*i]; + + + blas::axpy( nbf, x_fact_s, bf_x_col, 1, zs_col, 1 ); + blas::axpy( nbf, y_fact_s, bf_y_col, 1, zs_col, 1 ); + blas::axpy( nbf, z_fact_s, bf_z_col, 1, zs_col, 1 ); + + blas::axpy( nbf, x_fact_z, bf_x_col, 1, zz_col, 1 ); + blas::axpy( nbf, y_fact_z, bf_y_col, 1, zz_col, 1 ); + blas::axpy( nbf, z_fact_z, bf_z_col, 1, zz_col, 1 ); + + if (vlapl != nullptr) { + const auto lfactp = vlapl[2*i]; + const auto lfactm = vlapl[2*i+1]; + blas::axpy( nbf, 0.5*(lfactp + lfactm), lbf_col, 1, zs_col, 1); + blas::axpy( nbf, 0.5*(lfactp - lfactm), lbf_col, 1, zz_col, 1); + } + + } + } + + void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_rks(size_t npts, size_t nbf, + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, + double* mmat_x, double* mmat_y, double* mmat_z, size_t ldm ) { + + if( ldm != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + + blas::lacpy( 'A', nbf, npts, dbasis_x_eval, nbf, mmat_x, ldm); + blas::lacpy( 'A', nbf, npts, dbasis_y_eval, nbf, mmat_y, ldm); + blas::lacpy( 'A', nbf, npts, dbasis_z_eval, nbf, mmat_z, ldm); + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const int32_t ioff = i * nbf; + auto* mmat_x_col = mmat_x + ioff; + auto* mmat_y_col = mmat_y + ioff; + auto* mmat_z_col = mmat_z + ioff; + auto* bf_x_col = dbasis_x_eval + ioff; + auto* bf_y_col = dbasis_y_eval + ioff; + auto* bf_z_col = dbasis_z_eval + ioff; + + const auto tfact = 0.25 * vtau[i]; + + blas::scal( nbf, tfact, mmat_x_col, 1); + blas::scal( nbf, tfact, mmat_y_col, 1); + blas::scal( nbf, tfact, mmat_z_col, 1); + + if ( vlapl != nullptr ) { + const auto lfact = vlapl[i]; + blas::axpy( nbf, lfact, bf_x_col, 1, mmat_x_col, 1); + blas::axpy( nbf, lfact, bf_y_col, 1, mmat_y_col, 1); + blas::axpy( nbf, lfact, bf_z_col, 1, mmat_z_col, 1); + } + } + } + + void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks(size_t npts, size_t nbf, + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, + double* mmat_xs, double* mmat_ys, double* mmat_zs, size_t ldms, + double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz) { + + if( ldms != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + if( ldmz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + + blas::lacpy( 'A', nbf, npts, dbasis_x_eval, nbf, mmat_xs, ldms); + blas::lacpy( 'A', nbf, npts, dbasis_y_eval, nbf, mmat_ys, ldms); + blas::lacpy( 'A', nbf, npts, dbasis_z_eval, nbf, mmat_zs, ldms); + blas::lacpy( 'A', nbf, npts, dbasis_x_eval, nbf, mmat_xz, ldmz); + blas::lacpy( 'A', nbf, npts, dbasis_y_eval, nbf, mmat_yz, ldmz); + blas::lacpy( 'A', nbf, npts, dbasis_z_eval, nbf, mmat_zz, ldmz); + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const int32_t ioff = i * nbf; + auto* xs_col = mmat_xs + ioff; + auto* ys_col = mmat_ys + ioff; + auto* zs_col = mmat_zs + ioff; + auto* xz_col = mmat_xz + ioff; + auto* yz_col = mmat_yz + ioff; + auto* zz_col = mmat_zz + ioff; + auto* bf_x_col = dbasis_x_eval + ioff; + auto* bf_y_col = dbasis_y_eval + ioff; + auto* bf_z_col = dbasis_z_eval + ioff; + + const auto tfactp = 0.25 * vtau[2*i]; + const auto tfactm = 0.25 * vtau[2*i+1]; + const auto tfacts = 0.5*(tfactp + tfactm); + const auto tfactz = 0.5*(tfactp - tfactm); + + blas::scal( nbf, tfacts, xs_col, 1); + blas::scal( nbf, tfacts, ys_col, 1); + blas::scal( nbf, tfacts, zs_col, 1); + blas::scal( nbf, tfactz, xz_col, 1); + blas::scal( nbf, tfactz, yz_col, 1); + blas::scal( nbf, tfactz, zz_col, 1); + + if ( vlapl != nullptr ) { + const auto lfactp = vlapl[2*i]; + const auto lfactm = vlapl[2*i+1]; + const auto lfacts = 0.5*(lfactp + lfactm); + const auto lfactz = 0.5*(lfactp - lfactm); + blas::axpy( nbf, lfacts, bf_x_col, 1, xs_col, 1); + blas::axpy( nbf, lfacts, bf_y_col, 1, ys_col, 1); + blas::axpy( nbf, lfacts, bf_z_col, 1, zs_col, 1); + blas::axpy( nbf, lfactz, bf_x_col, 1, xz_col, 1); + blas::axpy( nbf, lfactz, bf_y_col, 1, yz_col, 1); + blas::axpy( nbf, lfactz, bf_z_col, 1, zz_col, 1); + } + + } + } + + // Increment VXC by Z void ReferenceLocalHostWorkDriver::inc_vxc( size_t npts, size_t nbf, size_t nbe, - const double* basis_eval, const submat_map_t& submat_map, const double* Z, + const double* basis_eval, const submat_map_t& submat_map, const double* Z, size_t ldz, double* VXC, size_t ldvxc, double* scr ) { if( submat_map.size() > 1 ) { diff --git a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp index 55fa27c5..fcaa84cd 100644 --- a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp +++ b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp @@ -43,6 +43,16 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL { double* dbasis_z_eval, double* d2basis_xx_eval, double* d2basis_xy_eval, double* d2basis_xz_eval, double* d2basis_yy_eval, double* d2basis_yz_eval, double* d2basis_zz_eval ) override; + void eval_collocation_der3( size_t npts, size_t nshells, size_t nbe, + const double* pts, const BasisSet& basis, const int32_t* shell_list, + double* basis_eval, double* dbasis_x_eval, double* dbasis_y_eval, + double* dbasis_z_eval, double* d2basis_xx_eval, double* d2basis_xy_eval, + double* d2basis_xz_eval, double* d2basis_yy_eval, double* d2basis_yz_eval, + double* d2basis_zz_eval, double* d3basis_xxx_eval, double* d3basis_xxy_eval, + double* d3basis_xxz_eval, double* d3basis_xyy_eval, double* d3basis_xyz_eval, + double* d3basis_xzz_eval, double* d3basis_yyy_eval, double* d3basis_yyz_eval, + double* d3basis_yzz_eval, double* d3basis_zzz_eval) override; + void eval_xmat( size_t npts, size_t nbf, size_t nbe, const submat_map_t& submat_map, double fac, const double* P, size_t ldp, @@ -86,6 +96,24 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL { double* gamma ) override; + void eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* lbasis_eval, + const double* X, size_t ldx, const double* mmat_x, const double* mmat_y, + const double* mmat_z, size_t ldm, double* den_eval, + double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, + double* gamma, double* tau, double* lapl ) override; + void eval_uvvar_mgga_uks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eval, const double *dbasis_y_eval, + const double* dbasis_z_eval, const double *lbasis_eval, + const double* Xs, size_t ldxs, + const double* Xz, size_t ldxz, + const double* mmat_xs, const double* mmat_ys, const double* mmat_zs, size_t ldms, + const double* mmat_xz, const double* mmat_yz, const double* mmat_zz, size_t ldmz, + double* den_eval, + double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, + double* gamma, double* tau, double* lapl ) override; + void eval_zmat_lda_vxc_rks( size_t npts, size_t nbe, const double* vrho, const double* basis_eval, double* Z, size_t ldz ) override; void eval_zmat_lda_vxc_uks( size_t npts, size_t nbe, const double* vrho, @@ -104,6 +132,24 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL { double* Zs, size_t ldzs, double* Zz, size_t ldzz ) override; + void eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* vlapl, const double* basis_eval, const double* dbasis_x_eval, + const double* dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Z, size_t ldz ) override; + void eval_zmat_mgga_vxc_uks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* vlapl, const double* basis_eval, const double* dbasis_x_eval, + const double* dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Zs, size_t ldzs, double* Zz, size_t ldzz ) override; + void eval_mmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vtau, + const double* vlapl, const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, double* mmat_x, double* mmat_y, double* mmat_z, + size_t ldm ) override; + void eval_mmat_mgga_vxc_uks( size_t npts, size_t nbe, const double* vtau, + const double* vlapl, const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, double* mmat_xs, double* mmat_ys, double* mmat_zs, + size_t ldms, double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz ) override; void inc_vxc( size_t npts, size_t nbf, size_t nbe, diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_grad.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_grad.hpp index 1f01f7d8..56e27b99 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_grad.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_grad.hpp @@ -10,6 +10,7 @@ #include "reference_replicated_xc_host_integrator.hpp" #include "integrator_util/integrator_common.hpp" #include "host/local_host_work_driver.hpp" +#include "host/blas.hpp" #include namespace GauXC { @@ -66,6 +67,10 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& basis = this->load_balancer_->basis(); const auto& mol = this->load_balancer_->molecule(); + // MGGA constants + const size_t mmga_dim_scal = func.is_mgga() ? 4 : 1; + const bool needs_laplacian = func.is_mgga() ? true : false; // TODO: Check for Laplacian dependence + // // Get basis map BasisSetMap basis_map(basis,mol); @@ -134,6 +139,23 @@ void ReferenceReplicatedXCHostIntegrator:: host_data.vgamma .resize( npts ); } +#if 0 + if( func.is_mgga() ) { + host_data.basis_eval .resize( 11 * npts * nbe ); // basis + grad(3) + hess(6) + lapl + host_data.zmat .resize( 7 * npts * nbe ); // basis + grad(3) + grad(3) + host_data.mmat .resize( npts * nbe ); + host_data.gamma .resize( npts ); + host_data.vgamma .resize( npts ); + host_data.tau .resize( npts ); + host_data.vtau .resize( npts ); + if ( needs_laplacian ) { + host_data.basis_eval.resize( 24 * npts * nbe ); + host_data.lapl .resize( npts ); + host_data.vlapl .resize( npts ); + } + } +#endif + // Alias/Partition out scratch memory auto* basis_eval = host_data.basis_eval.data(); auto* den_eval = host_data.den_scr.data(); @@ -149,6 +171,16 @@ void ReferenceReplicatedXCHostIntegrator:: auto* vrho = host_data.vrho.data(); auto* vgamma = host_data.vgamma.data(); +#if 0 + auto* tau = host_data.tau.data(); + auto* lapl = host_data.lapl.data(); + auto* vtau = host_data.vtau.data(); + auto* vlapl = host_data.vlapl.data(); + auto* mmat_x = mmat; + auto* mmat_y = mmat_x + npts * nbe; + auto* mmat_z = mmat_y + npts * nbe; +#endif + auto* dbasis_x_eval = basis_eval + npts * nbe; auto* dbasis_y_eval = dbasis_x_eval + npts * nbe; auto* dbasis_z_eval = dbasis_y_eval + npts * nbe; @@ -162,6 +194,22 @@ void ReferenceReplicatedXCHostIntegrator:: value_type* d2basis_yy_eval = nullptr; value_type* d2basis_yz_eval = nullptr; value_type* d2basis_zz_eval = nullptr; +#if 0 + value_type* lbasis_eval = nullptr; + value_type* d3basis_xxx_eval = nullptr; + value_type* d3basis_xxy_eval = nullptr; + value_type* d3basis_xxz_eval = nullptr; + value_type* d3basis_xyy_eval = nullptr; + value_type* d3basis_xyz_eval = nullptr; + value_type* d3basis_xzz_eval = nullptr; + value_type* d3basis_yyy_eval = nullptr; + value_type* d3basis_yyz_eval = nullptr; + value_type* d3basis_yzz_eval = nullptr; + value_type* d3basis_zzz_eval = nullptr; + value_type* dlbasis_x_eval = nullptr; + value_type* dlbasis_y_eval = nullptr; + value_type* dlbasis_z_eval = nullptr; +#endif if( func.is_gga() ) { d2basis_xx_eval = dbasis_z_eval + npts * nbe; @@ -172,12 +220,51 @@ void ReferenceReplicatedXCHostIntegrator:: d2basis_zz_eval = d2basis_yz_eval + npts * nbe; } +#if 0 + if( func.is_mgga() ) { + d2basis_xx_eval = dbasis_z_eval + npts * nbe; + d2basis_xy_eval = d2basis_xx_eval + npts * nbe; + d2basis_xz_eval = d2basis_xy_eval + npts * nbe; + d2basis_yy_eval = d2basis_xz_eval + npts * nbe; + d2basis_yz_eval = d2basis_yy_eval + npts * nbe; + d2basis_zz_eval = d2basis_yz_eval + npts * nbe; + if ( true ) { + lbasis_eval = d2basis_zz_eval + npts * nbe; + d3basis_xxx_eval = lbasis_eval + npts * nbe; + d3basis_xxy_eval = d3basis_xxx_eval + npts * nbe; + d3basis_xxz_eval = d3basis_xxy_eval + npts * nbe; + d3basis_xyy_eval = d3basis_xxz_eval + npts * nbe; + d3basis_xyz_eval = d3basis_xyy_eval + npts * nbe; + d3basis_xzz_eval = d3basis_xyz_eval + npts * nbe; + d3basis_yyy_eval = d3basis_xzz_eval + npts * nbe; + d3basis_yyz_eval = d3basis_yyy_eval + npts * nbe; + d3basis_yzz_eval = d3basis_yyz_eval + npts * nbe; + d3basis_zzz_eval = d3basis_yzz_eval + npts * nbe; + dlbasis_x_eval = d3basis_zzz_eval + npts * nbe; + dlbasis_y_eval = dlbasis_x_eval + npts * nbe; + dlbasis_z_eval = dlbasis_y_eval + npts * nbe; + } + } +#endif + // Get the submatrix map for batch auto [submat_map, foo] = gen_compressed_submat_map( basis_map, task.bfn_screening.shell_list, nbf, nbf ); // Evaluate Collocation Gradient (+ Hessian) +#if 0 + if( func.is_mgga() ) { + lwd->eval_collocation_der3( npts, nshells, nbe, points, basis, shell_list, + basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, d2basis_xx_eval, + d2basis_xy_eval, d2basis_xz_eval, d2basis_yy_eval, d2basis_yz_eval, + d2basis_zz_eval, d3basis_xxx_eval, d3basis_xxy_eval, d3basis_xxz_eval, + d3basis_xyy_eval, d3basis_xyz_eval, d3basis_xzz_eval, d3basis_yyy_eval, + d3basis_yyz_eval, d3basis_yzz_eval, d3basis_zzz_eval); + + } + else if( func.is_gga() ) +#endif if( func.is_gga() ) lwd->eval_collocation_hessian( npts, nshells, nbe, points, basis, shell_list, basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, d2basis_xx_eval, @@ -190,7 +277,7 @@ void ReferenceReplicatedXCHostIntegrator:: // Evaluate X matrix (2 * P * B/Bx/By/Bz) -> store in Z // XXX: This assumes that bfn + gradients are contiguous in memory - if( func.is_gga() ) { + if( func.is_gga() or func.is_mgga() ) { lwd->eval_xmat( 4*npts, nbf, nbe, submat_map, 2.0, P, ldp, basis_eval, nbe, zmat, nbe, nbe_scr ); } else { @@ -198,8 +285,33 @@ void ReferenceReplicatedXCHostIntegrator:: zmat, nbe, nbe_scr ); } - // Evaluate U and V variables +#if 0 + if( func.is_mgga() ) { + if ( needs_laplacian ) { + blas::lacpy( 'A', nbe, npts, d2basis_xx_eval, nbe, lbasis_eval, nbe ); + blas::axpy( nbe * npts, 1., d2basis_yy_eval, 1, lbasis_eval, 1); + blas::axpy( nbe * npts, 1., d2basis_zz_eval, 1, lbasis_eval, 1); + + blas::lacpy( 'A', nbe, npts, d3basis_xxx_eval, nbe, dlbasis_x_eval, nbe ); + blas::axpy( nbe * npts, 1., d3basis_xyy_eval, 1, dlbasis_x_eval, 1); + blas::axpy( nbe * npts, 1., d3basis_xzz_eval, 1, dlbasis_x_eval, 1); + + blas::lacpy( 'A', nbe, npts, d3basis_xxy_eval, nbe, dlbasis_y_eval, nbe ); + blas::axpy( nbe * npts, 1., d3basis_yyy_eval, 1, dlbasis_y_eval, 1); + blas::axpy( nbe * npts, 1., d3basis_yzz_eval, 1, dlbasis_y_eval, 1); + + blas::lacpy( 'A', nbe, npts, d3basis_xxz_eval, nbe, dlbasis_z_eval, nbe ); + blas::axpy( nbe * npts, 1., d3basis_yyz_eval, 1, dlbasis_z_eval, 1); + blas::axpy( nbe * npts, 1., d3basis_zzz_eval, 1, dlbasis_z_eval, 1); + } + lwd->eval_uvvar_mgga_rks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, lbasis_eval, zmat, nbe, mmat_x, mmat_y, mmat_z, nbe, + den_eval, dden_x_eval, dden_y_eval, dden_z_eval, + gamma, tau, lapl ); + } + else if( func.is_gga() ) +#endif if( func.is_gga() ) lwd->eval_uvvar_gga_rks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, zmat, nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, @@ -209,6 +321,11 @@ void ReferenceReplicatedXCHostIntegrator:: // Evaluate XC functional +#if 0 + if( func.is_mgga() ) + func.eval_exc_vxc( npts, den_eval, gamma, lapl, tau, eps, vrho, vgamma, vlapl, vtau ); + else if(func.is_gga() ) +#endif if( func.is_gga() ) func.eval_exc_vxc( npts, den_eval, gamma, eps, vrho, vgamma ); else @@ -241,7 +358,7 @@ void ReferenceReplicatedXCHostIntegrator:: g_acc_y += vrho_ipt * z * dby; g_acc_z += vrho_ipt * z * dbz; - if( func.is_gga() ) { + if( func.is_gga() or func.is_mgga() ) { // GGA Contributions const double vgamma_ipt = weights[ipt] * vgamma[ipt]; @@ -272,6 +389,45 @@ void ReferenceReplicatedXCHostIntegrator:: g_acc_y += 2 * vgamma_ipt * ( z * d2_term_y + dby * d11_zmat_term ); g_acc_z += 2 * vgamma_ipt * ( z * d2_term_z + dbz * d11_zmat_term ); } +#if 0 + if( func.is_mgga() ) { + + const double vtau_ipt = 0.5 * weights[ipt] * vtau[ipt]; + const double zx = zmat_x[mu_i]; // Z_x = N * B_x + const double zy = zmat_y[mu_i]; // Z_y = N * B_y + const double zz = zmat_z[mu_i]; // Z_z = N * B_z + const double d2bxx = d2basis_xx_eval[mu_i]; // B^2_xx + const double d2bxy = d2basis_xy_eval[mu_i]; // B^2_xy + const double d2bxz = d2basis_xz_eval[mu_i]; // B^2_xz + const double d2byy = d2basis_yy_eval[mu_i]; // B^2_yy + const double d2byz = d2basis_yz_eval[mu_i]; // B^2_yz + const double d2bzz = d2basis_zz_eval[mu_i]; // B^2_zz + double d2_term_x = d2bxx * zx + d2bxy * zy + d2bxz * zz; + double d2_term_y = d2bxy * zx + d2byy * zy + d2byz * zz; + double d2_term_z = d2bxz * zx + d2byz * zy + d2bzz * zz; + + g_acc_x += vtau_ipt * d2_term_x; + g_acc_y += vtau_ipt * d2_term_y; + g_acc_z += vtau_ipt * d2_term_z; + + if ( needs_laplacian ) { + const double vlapl_ipt = weights[ipt] * vlapl[ipt]; + const double lbf = lbasis_eval[mu_i]; + const double dlbx = dlbasis_x_eval[mu_i]; + const double dlby = dlbasis_y_eval[mu_i]; + const double dlbz = dlbasis_z_eval[mu_i]; + d2_term_x = z * dlbx + zx * lbf + 2.0*d2_term_x; + d2_term_y = z * dlby + zy * lbf + 2.0*d2_term_y; + d2_term_z = z * dlbz + zz * lbf + 2.0*d2_term_z; + + g_acc_x += vlapl_ipt * d2_term_x; + g_acc_y += vlapl_ipt * d2_term_y; + g_acc_z += vlapl_ipt * d2_term_z; + + } + + } +#endif } // loop over bfns + grid points diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp index 44afd131..ca13b9b2 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp @@ -10,6 +10,7 @@ #include "reference_replicated_xc_host_integrator.hpp" #include "integrator_util/integrator_common.hpp" #include "host/local_host_work_driver.hpp" +#include "host/blas.hpp" #include namespace GauXC { @@ -138,6 +139,8 @@ void ReferenceReplicatedXCHostIntegrator:: const auto& basis = this->load_balancer_->basis(); const auto& mol = this->load_balancer_->molecule(); + const bool needs_laplacian = func.is_mgga() ? true : false; // TODO: Needs Laplacian Check + // Get basis map BasisSetMap basis_map(basis,mol); @@ -202,9 +205,11 @@ void ReferenceReplicatedXCHostIntegrator:: // Allocate enough memory for batch const size_t spin_dim_scal = is_rks ? 1 : 2; + const size_t mgga_dim_scal = func.is_mgga() ? 4 : 1; // basis + d1basis + // Things that every calc needs host_data.nbe_scr .resize(nbe * nbe); - host_data.zmat .resize(npts * nbe * spin_dim_scal); + host_data.zmat .resize(npts * nbe * spin_dim_scal * mgga_dim_scal); host_data.eps .resize(npts); host_data.vrho .resize(npts * spin_dim_scal); @@ -223,6 +228,23 @@ void ReferenceReplicatedXCHostIntegrator:: host_data.vgamma .resize( gga_dim_scal * npts ); } + if( func.is_mgga() ){ + // TODO: Add check for Laplacian-dependent functionals + if ( needs_laplacian ) { + host_data.basis_eval .resize( 11 * npts * nbe ); // basis + grad (3) + hess (6) + lapl + host_data.lapl .resize( spin_dim_scal * npts ); + host_data.vlapl .resize( spin_dim_scal * npts ); + } else { + host_data.basis_eval .resize( 4 * npts * nbe ); // basis + grad (3) + } + + host_data.den_scr .resize( spin_dim_scal * 4 * npts ); + host_data.gamma .resize( gga_dim_scal * npts ); + host_data.vgamma .resize( gga_dim_scal * npts ); + host_data.tau .resize( npts * spin_dim_scal ); + host_data.vtau .resize( npts * spin_dim_scal ); + } + // Alias/Partition out scratch memory auto* basis_eval = host_data.basis_eval.data(); auto* den_eval = host_data.den_scr.data(); @@ -231,20 +253,38 @@ void ReferenceReplicatedXCHostIntegrator:: decltype(zmat) zmat_z = nullptr; if(!is_rks) { - zmat_z = zmat + nbe * npts; + zmat_z = zmat + mgga_dim_scal * nbe * npts; } auto* eps = host_data.eps.data(); auto* gamma = host_data.gamma.data(); + auto* tau = host_data.tau.data(); + auto* lapl = host_data.lapl.data(); auto* vrho = host_data.vrho.data(); auto* vgamma = host_data.vgamma.data(); + auto* vtau = host_data.vtau.data(); + auto* vlapl = host_data.vlapl.data(); + value_type* dbasis_x_eval = nullptr; value_type* dbasis_y_eval = nullptr; value_type* dbasis_z_eval = nullptr; + value_type* d2basis_xx_eval = nullptr; + value_type* d2basis_xy_eval = nullptr; + value_type* d2basis_xz_eval = nullptr; + value_type* d2basis_yy_eval = nullptr; + value_type* d2basis_yz_eval = nullptr; + value_type* d2basis_zz_eval = nullptr; + value_type* lbasis_eval = nullptr; value_type* dden_x_eval = nullptr; value_type* dden_y_eval = nullptr; value_type* dden_z_eval = nullptr; + value_type* mmat_x = nullptr; + value_type* mmat_y = nullptr; + value_type* mmat_z = nullptr; + value_type* mmat_x_z = nullptr; + value_type* mmat_y_z = nullptr; + value_type* mmat_z_z = nullptr; if( func.is_gga() ) { dbasis_x_eval = basis_eval + npts * nbe; @@ -255,14 +295,56 @@ void ReferenceReplicatedXCHostIntegrator:: dden_z_eval = dden_y_eval + spin_dim_scal * npts; } + if ( func.is_mgga() ) { + dbasis_x_eval = basis_eval + npts * nbe; + dbasis_y_eval = dbasis_x_eval + npts * nbe; + dbasis_z_eval = dbasis_y_eval + npts * nbe; + dden_x_eval = den_eval + spin_dim_scal * npts; + dden_y_eval = dden_x_eval + spin_dim_scal * npts; + dden_z_eval = dden_y_eval + spin_dim_scal * npts; + mmat_x = zmat + npts * nbe; + mmat_y = mmat_x + npts * nbe; + mmat_z = mmat_y + npts * nbe; + if ( needs_laplacian ) { + d2basis_xx_eval = dbasis_z_eval + npts * nbe; + d2basis_xy_eval = d2basis_xx_eval + npts * nbe; + d2basis_xz_eval = d2basis_xy_eval + npts * nbe; + d2basis_yy_eval = d2basis_xz_eval + npts * nbe; + d2basis_yz_eval = d2basis_yy_eval + npts * nbe; + d2basis_zz_eval = d2basis_yz_eval + npts * nbe; + lbasis_eval = d2basis_zz_eval + npts * nbe; + } + if(is_uks) { + mmat_x_z = zmat_z + npts * nbe; + mmat_y_z = mmat_x_z + npts * nbe; + mmat_z_z = mmat_y_z + npts * nbe; + } + } + // Get the submatrix map for batch std::vector< std::array > submat_map; std::tie(submat_map, std::ignore) = gen_compressed_submat_map(basis_map, task.bfn_screening.shell_list, nbf, nbf); + // Evaluate Collocation (+ Grad and Hessian) + if( func.is_mgga() ) { + if ( needs_laplacian ) { + // TODO: Modify gau2grid to compute Laplacian instead of full hessian + lwd->eval_collocation_hessian( npts, nshells, nbe, points, basis, shell_list, + basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, d2basis_xx_eval, + d2basis_xy_eval, d2basis_xz_eval, d2basis_yy_eval, d2basis_yz_eval, + d2basis_zz_eval); + blas::lacpy( 'A', nbe, npts, d2basis_xx_eval, nbe, lbasis_eval, nbe ); + blas::axpy( nbe * npts, 1., d2basis_yy_eval, 1, lbasis_eval, 1); + blas::axpy( nbe * npts, 1., d2basis_zz_eval, 1, lbasis_eval, 1); + } else { + lwd->eval_collocation_gradient( npts, nshells, nbe, points, basis, shell_list, + basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval ); + } + } // Evaluate Collocation (+ Grad) - if( func.is_gga() ) + else if( func.is_gga() ) lwd->eval_collocation_gradient( npts, nshells, nbe, points, basis, shell_list, basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval ); else @@ -272,18 +354,29 @@ void ReferenceReplicatedXCHostIntegrator:: // Evaluate X matrix (fac * P * B) -> store in Z const auto xmat_fac = is_rks ? 2.0 : 1.0; // TODO Fix for spinor RKS input - lwd->eval_xmat( npts, nbf, nbe, submat_map, xmat_fac, Ps, ldps, basis_eval, nbe, + lwd->eval_xmat( mgga_dim_scal * npts, nbf, nbe, submat_map, xmat_fac, Ps, ldps, basis_eval, nbe, zmat, nbe, nbe_scr ); // X matrix for Pz if(not is_rks) { - lwd->eval_xmat( npts, nbf, nbe, submat_map, 1.0, Pz, ldpz, basis_eval, nbe, + lwd->eval_xmat( mgga_dim_scal * npts, nbf, nbe, submat_map, 1.0, Pz, ldpz, basis_eval, nbe, zmat_z, nbe, nbe_scr); } // Evaluate U and V variables - if( func.is_gga() ) { + if( func.is_mgga() ) { + if (is_rks) { + lwd->eval_uvvar_mgga_rks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, lbasis_eval, zmat, nbe, mmat_x, mmat_y, mmat_z, + nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl); + } else if (is_uks) { + lwd->eval_uvvar_mgga_uks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, lbasis_eval, zmat, nbe, zmat_z, nbe, + mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe, + den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl); + } + } else if ( func.is_gga() ) { if(is_rks) { lwd->eval_uvvar_gga_rks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, zmat, nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, @@ -303,7 +396,9 @@ void ReferenceReplicatedXCHostIntegrator:: } // Evaluate XC functional - if( func.is_gga() ) + if( func.is_mgga() ) + func.eval_exc_vxc( npts, den_eval, gamma, lapl, tau, eps, vrho, vgamma, vlapl, vtau); + else if( func.is_gga() ) func.eval_exc_vxc( npts, den_eval, gamma, eps, vrho, vgamma ); else func.eval_exc_vxc( npts, den_eval, eps, vrho ); @@ -325,10 +420,45 @@ void ReferenceReplicatedXCHostIntegrator:: } } + if( func.is_mgga() ){ + for( int32_t i = 0; i < npts; ++i) { + vtau[spin_dim_scal*i] *= weights[i]; + vgamma[gga_dim_scal*i] *= weights[i]; + if(not is_rks) { + vgamma[gga_dim_scal*i+1] *= weights[i]; + vgamma[gga_dim_scal*i+2] *= weights[i]; + vtau[spin_dim_scal*i+1] *= weights[i]; + } + + // TODO: Add checks for Lapacian-dependent functionals + if( needs_laplacian ) { + vlapl[spin_dim_scal*i] *= weights[i]; + if(not is_rks) { + vlapl[spin_dim_scal*i+1] *= weights[i]; + } + } + } + } + // Evaluate Z matrix for VXC - if( func.is_gga() ) { + if( func.is_mgga() ) { + if(is_rks) { + lwd->eval_zmat_mgga_vxc_rks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, lbasis_eval, + dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe); + lwd->eval_mmat_mgga_vxc_rks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, + mmat_x, mmat_y, mmat_z, nbe); + } else if (is_uks) { + lwd->eval_zmat_mgga_vxc_uks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, lbasis_eval, + dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe, zmat_z, nbe); + lwd->eval_mmat_mgga_vxc_uks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, + mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe); + } + } + else if( func.is_gga() ) { if(is_rks) { lwd->eval_zmat_gga_vxc_rks( npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval, @@ -358,11 +488,9 @@ void ReferenceReplicatedXCHostIntegrator:: } // Increment VXC - lwd->inc_vxc( npts, nbf, nbe, basis_eval, submat_map, zmat, nbe, VXCs, ldvxcs, - nbe_scr ); + lwd->inc_vxc( mgga_dim_scal * npts, nbf, nbe, basis_eval, submat_map, zmat, nbe, VXCs, ldvxcs, nbe_scr ); if(not is_rks) { - lwd->inc_vxc( npts, nbf, nbe, basis_eval, submat_map, zmat_z, nbe, VXCz, ldvxcz, - nbe_scr); + lwd->inc_vxc( mgga_dim_scal * npts, nbf, nbe, basis_eval, submat_map, zmat_z, nbe,VXCz, ldvxcz, nbe_scr); } } diff --git a/src/xc_integrator/replicated/host/xc_host_data.hpp b/src/xc_integrator/replicated/host/xc_host_data.hpp index 8772d8ed..75c5ca9c 100644 --- a/src/xc_integrator/replicated/host/xc_host_data.hpp +++ b/src/xc_integrator/replicated/host/xc_host_data.hpp @@ -18,8 +18,12 @@ struct XCHostData { std::vector eps; std::vector gamma; + std::vector tau; + std::vector lapl; std::vector vrho; std::vector vgamma; + std::vector vtau; + std::vector vlapl; std::vector zmat; std::vector gmat; diff --git a/tests/ref_data/cytosine_r2scanl_cc-pvdz_ufg_ssf_robust.hdf5 b/tests/ref_data/cytosine_r2scanl_cc-pvdz_ufg_ssf_robust.hdf5 new file mode 100644 index 00000000..b2a21e2e Binary files /dev/null and b/tests/ref_data/cytosine_r2scanl_cc-pvdz_ufg_ssf_robust.hdf5 differ diff --git a/tests/ref_data/cytosine_r2scanl_cc-pvdz_ufg_ssf_robust_uks.hdf5 b/tests/ref_data/cytosine_r2scanl_cc-pvdz_ufg_ssf_robust_uks.hdf5 new file mode 100644 index 00000000..2e4e62a3 Binary files /dev/null and b/tests/ref_data/cytosine_r2scanl_cc-pvdz_ufg_ssf_robust_uks.hdf5 differ diff --git a/tests/ref_data/cytosine_scan_cc-pvdz_ufg_ssf_robust.hdf5 b/tests/ref_data/cytosine_scan_cc-pvdz_ufg_ssf_robust.hdf5 new file mode 100644 index 00000000..d44d7c0f Binary files /dev/null and b/tests/ref_data/cytosine_scan_cc-pvdz_ufg_ssf_robust.hdf5 differ diff --git a/tests/ref_data/cytosine_scan_cc-pvdz_ufg_ssf_robust_uks.hdf5 b/tests/ref_data/cytosine_scan_cc-pvdz_ufg_ssf_robust_uks.hdf5 new file mode 100644 index 00000000..829f8e96 Binary files /dev/null and b/tests/ref_data/cytosine_scan_cc-pvdz_ufg_ssf_robust_uks.hdf5 differ diff --git a/tests/xc_integrator.cxx b/tests/xc_integrator.cxx index efeb24d6..5dbc90be 100644 --- a/tests/xc_integrator.cxx +++ b/tests/xc_integrator.cxx @@ -22,7 +22,7 @@ using namespace GauXC; void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, std::string reference_file, - ExchCXX::Functional func_key, + functional_type& func, PruningScheme pruning_scheme, size_t quad_pad_value, bool check_grad, @@ -97,6 +97,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, } if(uks and ex == ExecutionSpace::Device) return; + if(func.is_mgga() and ex == ExecutionSpace::Device) return; for( auto& sh : basis ) @@ -117,8 +118,8 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, mw.modify_weights(lb); // Construct XC Functional - auto Spin = uks ? ExchCXX::Spin::Polarized : ExchCXX::Spin::Unpolarized; - functional_type func( ExchCXX::Backend::builtin, func_key, Spin ); + //auto Spin = uks ? ExchCXX::Spin::Polarized : ExchCXX::Spin::Unpolarized; + //functional_type func( ExchCXX::Backend::builtin, func_key, Spin ); // Construct XCIntegrator XCIntegratorFactory integrator_factory( ex, "Replicated", @@ -188,7 +189,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, } -void test_integrator(std::string reference_file, ExchCXX::Functional func, PruningScheme pruning_scheme) { +void test_integrator(std::string reference_file, functional_type& func, PruningScheme pruning_scheme) { #ifdef GAUXC_ENABLE_DEVICE auto rt = DeviceRuntimeEnvironment(GAUXC_MPI_CODE(MPI_COMM_WORLD,) 0.9); @@ -254,46 +255,102 @@ void test_integrator(std::string reference_file, ExchCXX::Functional func, Pruni } +functional_type make_functional(ExchCXX::Functional func_key, ExchCXX::Spin spin) { + return functional_type(ExchCXX::Backend::builtin, func_key, spin); +} + TEST_CASE( "XC Integrator", "[xc-integrator]" ) { + auto pol = ExchCXX::Spin::Polarized; + auto unpol = ExchCXX::Spin::Unpolarized; + auto svwn5 = ExchCXX::Functional::SVWN5; + auto pbe0 = ExchCXX::Functional::PBE0; + auto blyp = ExchCXX::Functional::BLYP; // LDA Test SECTION( "Benzene / SVWN5 / cc-pVDZ" ) { + auto func = make_functional(svwn5, unpol); test_integrator(GAUXC_REF_DATA_PATH "/benzene_svwn5_cc-pvdz_ufg_ssf.hdf5", - ExchCXX::Functional::SVWN5, PruningScheme::Unpruned ); + func, PruningScheme::Unpruned ); } SECTION( "Benzene / SVWN5 / cc-pVDZ (Treutler)" ) { + auto func = make_functional(svwn5, unpol); test_integrator(GAUXC_REF_DATA_PATH "/benzene_svwn5_cc-pvdz_ufg_ssf_treutler_prune.hdf5", - ExchCXX::Functional::SVWN5, PruningScheme::Treutler ); + func, PruningScheme::Treutler ); } SECTION( "Benzene / SVWN5 / cc-pVDZ (Robust)" ) { + auto func = make_functional(svwn5, unpol); test_integrator(GAUXC_REF_DATA_PATH "/benzene_svwn5_cc-pvdz_ufg_ssf_robust_prune.hdf5", - ExchCXX::Functional::SVWN5, PruningScheme::Robust ); + func, PruningScheme::Robust ); } // GGA Test SECTION( "Benzene / PBE0 / cc-pVDZ" ) { + auto func = make_functional(pbe0, unpol); test_integrator(GAUXC_REF_DATA_PATH "/benzene_pbe0_cc-pvdz_ufg_ssf.hdf5", - ExchCXX::Functional::PBE0, PruningScheme::Unpruned ); + func, PruningScheme::Unpruned ); + } + + // MGGA Test (TAU Only) + SECTION( "Cytosine / SCAN / cc-pVDZ") { + functional_type func({ + {1.0, ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_X_SCAN"), unpol)}, + {1.0, ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_C_SCAN"), unpol)}, + }); + test_integrator(GAUXC_REF_DATA_PATH "/cytosine_scan_cc-pvdz_ufg_ssf_robust.hdf5", + func, PruningScheme::Robust ); + } + + // MGGA Test (TAU + LAPL) + SECTION( "Cytosine / R2SCANL / cc-pVDZ") { + functional_type func({ + {1.0, ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_X_R2SCANL"), unpol)}, + {1.0, ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_C_R2SCANL"), unpol)}, + }); + test_integrator(GAUXC_REF_DATA_PATH "/cytosine_r2scanl_cc-pvdz_ufg_ssf_robust.hdf5", + func, PruningScheme::Robust ); } //UKS LDA Test SECTION( "Li / SVWN5 / sto-3g" ) { + auto func = make_functional(svwn5, pol); test_integrator(GAUXC_REF_DATA_PATH "/li_svwn5_sto3g_uks.bin", - ExchCXX::Functional::SVWN5, PruningScheme::Unpruned ); + func, PruningScheme::Unpruned ); } //UKS GGA Test SECTION( "Li / BLYP / sto-3g" ) { + auto func = make_functional(blyp, pol); test_integrator(GAUXC_REF_DATA_PATH "/li_blyp_sto3g_uks.bin", - ExchCXX::Functional::BLYP, PruningScheme::Unpruned ); + func, PruningScheme::Unpruned ); + } + + // UKS MGGA Test (TAU Only) + SECTION( "Cytosine (doublet) / SCAN / cc-pVDZ") { + functional_type func({ + {1.0, ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_X_SCAN"), pol)}, + {1.0, ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_C_SCAN"), pol)}, + }); + test_integrator(GAUXC_REF_DATA_PATH "/cytosine_scan_cc-pvdz_ufg_ssf_robust_uks.hdf5", + func, PruningScheme::Robust ); + } + + // UKS MGGA Test (TAU + LAPL) + SECTION( "Cytosine (doublet) / R2SCANL / cc-pVDZ") { + functional_type func({ + {1.0, ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_X_R2SCANL"), pol)}, + {1.0, ExchCXX::XCKernel(ExchCXX::libxc_name_string("MGGA_C_R2SCANL"), pol)}, + }); + test_integrator(GAUXC_REF_DATA_PATH "/cytosine_r2scanl_cc-pvdz_ufg_ssf_robust_uks.hdf5", + func, PruningScheme::Robust ); } // sn-LinK Test SECTION( "Benzene / PBE0 / 6-31G(d)" ) { + auto func = make_functional(pbe0, unpol); test_integrator(GAUXC_REF_DATA_PATH "/benzene_631gd_pbe0_ufg.hdf5", - ExchCXX::Functional::PBE0, PruningScheme::Unpruned ); + func, PruningScheme::Unpruned ); } }