diff --git a/include/gauxc/xc_integrator.hpp b/include/gauxc/xc_integrator.hpp index 547d07f1..3cffcf7e 100644 --- a/include/gauxc/xc_integrator.hpp +++ b/include/gauxc/xc_integrator.hpp @@ -33,6 +33,7 @@ class XCIntegrator { using exc_vxc_type_rks = std::tuple< value_type, matrix_type >; using exc_vxc_type_uks = std::tuple< value_type, matrix_type, matrix_type >; + using exc_vxc_type_gks = std::tuple< value_type, matrix_type, matrix_type, matrix_type, matrix_type >; using exc_grad_type = std::vector< value_type >; using exx_type = matrix_type; @@ -53,8 +54,12 @@ class XCIntegrator { XCIntegrator( XCIntegrator&& ) noexcept; value_type integrate_den( const MatrixType& ); - exc_vxc_type_rks eval_exc_vxc ( const MatrixType& ); - exc_vxc_type_uks eval_exc_vxc ( const MatrixType&, const MatrixType& ); + exc_vxc_type_rks eval_exc_vxc ( const MatrixType&, + const IntegratorSettingsXC& = IntegratorSettingsXC{} ); + exc_vxc_type_uks eval_exc_vxc ( const MatrixType&, const MatrixType&, + const IntegratorSettingsXC& = IntegratorSettingsXC{} ); + exc_vxc_type_gks eval_exc_vxc ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, + const IntegratorSettingsXC& = IntegratorSettingsXC{}); exc_grad_type eval_exc_grad( const MatrixType& ); exx_type eval_exx ( const MatrixType&, const IntegratorSettingsEXX& = IntegratorSettingsEXX{} ); diff --git a/include/gauxc/xc_integrator/impl.hpp b/include/gauxc/xc_integrator/impl.hpp index 7d839647..c7956b57 100644 --- a/include/gauxc/xc_integrator/impl.hpp +++ b/include/gauxc/xc_integrator/impl.hpp @@ -33,18 +33,26 @@ typename XCIntegrator::value_type template typename XCIntegrator::exc_vxc_type_rks - XCIntegrator::eval_exc_vxc( const MatrixType& P ) { + XCIntegrator::eval_exc_vxc( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) { if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); - return pimpl_->eval_exc_vxc(P); + return pimpl_->eval_exc_vxc(P, ks_settings); }; template typename XCIntegrator::exc_vxc_type_uks - XCIntegrator::eval_exc_vxc( const MatrixType& Pscalar, const MatrixType& Pz ) { + XCIntegrator::eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) { if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); - return pimpl_->eval_exc_vxc(Pscalar, Pz); + return pimpl_->eval_exc_vxc(Ps, Pz, ks_settings); }; +template +typename XCIntegrator::exc_vxc_type_gks + XCIntegrator::eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, + const IntegratorSettingsXC& ks_settings ) { + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + return pimpl_->eval_exc_vxc(Ps, Pz, Py, Px, ks_settings); + }; + template typename XCIntegrator::exc_grad_type XCIntegrator::eval_exc_grad( const MatrixType& P ) { diff --git a/include/gauxc/xc_integrator/replicated/impl.hpp b/include/gauxc/xc_integrator/replicated/impl.hpp index 8d3d23b6..132faa09 100644 --- a/include/gauxc/xc_integrator/replicated/impl.hpp +++ b/include/gauxc/xc_integrator/replicated/impl.hpp @@ -63,14 +63,14 @@ typename ReplicatedXCIntegrator::value_type template typename ReplicatedXCIntegrator::exc_vxc_type_rks - ReplicatedXCIntegrator::eval_exc_vxc_( const MatrixType& P ) { + ReplicatedXCIntegrator::eval_exc_vxc_( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) { if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); matrix_type VXC( P.rows(), P.cols() ); value_type EXC; pimpl_->eval_exc_vxc( P.rows(), P.cols(), P.data(), P.rows(), - VXC.data(), VXC.rows(), &EXC ); + VXC.data(), VXC.rows(), &EXC, ks_settings ); return std::make_tuple( EXC, VXC ); @@ -78,19 +78,44 @@ typename ReplicatedXCIntegrator::exc_vxc_type_rks template typename ReplicatedXCIntegrator::exc_vxc_type_uks - ReplicatedXCIntegrator::eval_exc_vxc_( const MatrixType& Pscalar, const MatrixType& Pz ) { + ReplicatedXCIntegrator::eval_exc_vxc_( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) { if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); - matrix_type VXCscalar( Pscalar.rows(), Pscalar.cols() ); + matrix_type VXCs( Ps.rows(), Ps.cols() ); matrix_type VXCz( Pz.rows(), Pz.cols() ); value_type EXC; - pimpl_->eval_exc_vxc( Pscalar.rows(), Pscalar.cols(), Pscalar.data(), Pscalar.rows(), + pimpl_->eval_exc_vxc( Ps.rows(), Ps.cols(), Ps.data(), Ps.rows(), Pz.data(), Pz.rows(), - VXCscalar.data(), VXCscalar.rows(), - VXCz.data(), VXCz.rows(), &EXC ); + VXCs.data(), VXCs.rows(), + VXCz.data(), VXCz.rows(), &EXC, ks_settings ); - return std::make_tuple( EXC, VXCscalar, VXCz ); + return std::make_tuple( EXC, VXCs, VXCz ); + +} + +template +typename ReplicatedXCIntegrator::exc_vxc_type_gks + ReplicatedXCIntegrator::eval_exc_vxc_( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, + const IntegratorSettingsXC& ks_settings) { + + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + matrix_type VXCs( Ps.rows(), Ps.cols() ); + matrix_type VXCz( Pz.rows(), Pz.cols() ); + matrix_type VXCy( Py.rows(), Py.cols() ); + matrix_type VXCx( Px.rows(), Px.cols() ); + value_type EXC; + + pimpl_->eval_exc_vxc( Ps.rows(), Ps.cols(), Ps.data(), Ps.rows(), + Pz.data(), Pz.rows(), + Py.data(), Py.rows(), + Px.data(), Px.rows(), + VXCs.data(), VXCs.rows(), + VXCz.data(), VXCz.rows(), + VXCy.data(), VXCy.rows(), + VXCx.data(), VXCx.rows(), &EXC, ks_settings ); + + return std::make_tuple( EXC, VXCs, VXCz, VXCy, VXCx); } diff --git a/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp b/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp index 4e0854fd..893db54c 100644 --- a/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp +++ b/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp @@ -40,14 +40,28 @@ class ReplicatedXCIntegratorImpl { int64_t ldp, value_type* N_EL ) = 0; virtual void eval_exc_vxc_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ) = 0; - virtual void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar, - int64_t ldpscalar, + value_type* EXC, const IntegratorSettingsXC& ks_settings ) = 0; + virtual void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ) = 0; + value_type* EXC, const IntegratorSettingsXC& ks_settings ) = 0; + virtual void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& ks_settings ) = 0; + virtual void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* EXC_GRAD ) = 0; virtual void eval_exx_( int64_t m, int64_t n, const value_type* P, @@ -69,15 +83,29 @@ class ReplicatedXCIntegratorImpl { void eval_exc_vxc( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ); + value_type* EXC, const IntegratorSettingsXC& ks_settings ); - void eval_exc_vxc( int64_t m, int64_t n, const value_type* Pscalar, - int64_t ldpscalar, + void eval_exc_vxc( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ); + value_type* EXC, const IntegratorSettingsXC& ks_settings ); + void eval_exc_vxc( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& ks_settings ); + void eval_exc_grad( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* EXC_GRAD ); diff --git a/include/gauxc/xc_integrator/replicated_xc_integrator.hpp b/include/gauxc/xc_integrator/replicated_xc_integrator.hpp index a2a536f5..945f955b 100644 --- a/include/gauxc/xc_integrator/replicated_xc_integrator.hpp +++ b/include/gauxc/xc_integrator/replicated_xc_integrator.hpp @@ -30,6 +30,7 @@ class ReplicatedXCIntegrator : public XCIntegratorImpl { using value_type = typename XCIntegratorImpl::value_type; using exc_vxc_type_rks = typename XCIntegratorImpl::exc_vxc_type_rks; using exc_vxc_type_uks = typename XCIntegratorImpl::exc_vxc_type_uks; + using exc_vxc_type_gks = typename XCIntegratorImpl::exc_vxc_type_gks; using exc_grad_type = typename XCIntegratorImpl::exc_grad_type; using exx_type = typename XCIntegratorImpl::exx_type; @@ -39,8 +40,9 @@ class ReplicatedXCIntegrator : public XCIntegratorImpl { std::unique_ptr< pimpl_type > pimpl_; value_type integrate_den_( const MatrixType& ) override; - exc_vxc_type_rks eval_exc_vxc_ ( const MatrixType& ) override; - exc_vxc_type_uks eval_exc_vxc_ ( const MatrixType&, const MatrixType& ) override; + exc_vxc_type_rks eval_exc_vxc_ ( const MatrixType&, const IntegratorSettingsXC& ) override; + exc_vxc_type_uks eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const IntegratorSettingsXC&) override; + exc_vxc_type_gks eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, const IntegratorSettingsXC& ) override; exc_grad_type eval_exc_grad_( const MatrixType& ) override; exx_type eval_exx_ ( const MatrixType&, const IntegratorSettingsEXX& ) override; const util::Timer& get_timings_() const override; diff --git a/include/gauxc/xc_integrator/xc_integrator_impl.hpp b/include/gauxc/xc_integrator/xc_integrator_impl.hpp index beb615a3..8a9ca618 100644 --- a/include/gauxc/xc_integrator/xc_integrator_impl.hpp +++ b/include/gauxc/xc_integrator/xc_integrator_impl.hpp @@ -22,14 +22,17 @@ class XCIntegratorImpl { using value_type = typename matrix_type::value_type; using exc_vxc_type_rks = typename XCIntegrator::exc_vxc_type_rks; using exc_vxc_type_uks = typename XCIntegrator::exc_vxc_type_uks; + using exc_vxc_type_gks = typename XCIntegrator::exc_vxc_type_gks; using exc_grad_type = typename XCIntegrator::exc_grad_type; using exx_type = typename XCIntegrator::exx_type; protected: virtual value_type integrate_den_( const MatrixType& P ) = 0; - virtual exc_vxc_type_rks eval_exc_vxc_ ( const MatrixType& P ) = 0; - virtual exc_vxc_type_uks eval_exc_vxc_ ( const MatrixType& Pscalar, const MatrixType& Pz ) = 0; + virtual exc_vxc_type_rks eval_exc_vxc_ ( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) = 0; + virtual exc_vxc_type_uks eval_exc_vxc_ ( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) = 0; + virtual exc_vxc_type_gks eval_exc_vxc_ ( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, + const IntegratorSettingsXC& ks_settings ) = 0; virtual exc_grad_type eval_exc_grad_( const MatrixType& P ) = 0; virtual exx_type eval_exx_ ( const MatrixType& P, const IntegratorSettingsEXX& settings ) = 0; @@ -62,12 +65,16 @@ class XCIntegratorImpl { * @param[in] P The alpha density matrix * @returns EXC / VXC in a combined structure */ - exc_vxc_type_rks eval_exc_vxc( const MatrixType& P ) { - return eval_exc_vxc_(P); + exc_vxc_type_rks eval_exc_vxc( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) { + return eval_exc_vxc_(P, ks_settings); } - exc_vxc_type_uks eval_exc_vxc( const MatrixType& Pscalar, const MatrixType& Pz ) { - return eval_exc_vxc_(Pscalar, Pz); + exc_vxc_type_uks eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) { + return eval_exc_vxc_(Ps, Pz, ks_settings); + } + + exc_vxc_type_gks eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, const IntegratorSettingsXC& ks_settings ) { + return eval_exc_vxc_(Ps, Pz, Py, Px, ks_settings); } /** Integrate EXC gradient for RKS diff --git a/include/gauxc/xc_integrator_settings.hpp b/include/gauxc/xc_integrator_settings.hpp index 7a8c5074..11c21609 100644 --- a/include/gauxc/xc_integrator_settings.hpp +++ b/include/gauxc/xc_integrator_settings.hpp @@ -16,4 +16,9 @@ struct IntegratorSettingsSNLinK : public IntegratorSettingsEXX { double k_tol = 1e-10; }; +struct IntegratorSettingsXC { virtual ~IntegratorSettingsXC() noexcept = default; }; +struct IntegratorSettingsKS : public IntegratorSettingsXC { + double gks_dtol = 1e-12; +}; + } diff --git a/src/xc_integrator/local_work_driver/device/local_device_work_driver.cxx b/src/xc_integrator/local_work_driver/device/local_device_work_driver.cxx index 81e21517..49551850 100644 --- a/src/xc_integrator/local_work_driver/device/local_device_work_driver.cxx +++ b/src/xc_integrator/local_work_driver/device/local_device_work_driver.cxx @@ -44,12 +44,18 @@ FWD_TO_PIMPL(eval_uvvar_gga_rks) // U/VVar GGA (density + grad, gamma FWD_TO_PIMPL(eval_uvvar_lda_uks) // U/VVar LDA (density) FWD_TO_PIMPL(eval_uvvar_gga_uks) // U/VVar GGA (density + grad, gamma) +FWD_TO_PIMPL(eval_uvvar_lda_gks) // U/VVar LDA (density) +FWD_TO_PIMPL(eval_uvvar_gga_gks) // U/VVar GGA (density + grad, gamma) + FWD_TO_PIMPL(eval_zmat_lda_vxc_rks) // Eval Z Matrix LDA VXC FWD_TO_PIMPL(eval_zmat_gga_vxc_rks) // Eval Z Matrix GGA VXC FWD_TO_PIMPL(eval_zmat_lda_vxc_uks) // Eval Z Matrix LDA VXC FWD_TO_PIMPL(eval_zmat_gga_vxc_uks) // Eval Z Matrix GGA VXC +FWD_TO_PIMPL(eval_zmat_lda_vxc_gks) // Eval Z Matrix LDA VXC +FWD_TO_PIMPL(eval_zmat_gga_vxc_gks) // Eval Z Matrix GGA VXC + FWD_TO_PIMPL(eval_exx_fmat) // Eval EXX F Matrix //FWD_TO_PIMPL(eval_exx_gmat) // Eval EXX G Matrix diff --git a/src/xc_integrator/local_work_driver/device/local_device_work_driver.hpp b/src/xc_integrator/local_work_driver/device/local_device_work_driver.hpp index 1d8c8336..a2b89e64 100644 --- a/src/xc_integrator/local_work_driver/device/local_device_work_driver.hpp +++ b/src/xc_integrator/local_work_driver/device/local_device_work_driver.hpp @@ -69,6 +69,9 @@ class LocalDeviceWorkDriver : public LocalWorkDriver { void eval_uvvar_lda_uks( XCDeviceData* ); void eval_uvvar_gga_uks( XCDeviceData* ); + void eval_uvvar_lda_gks( XCDeviceData* ); + void eval_uvvar_gga_gks( XCDeviceData* ); + void eval_kern_exc_vxc_lda( const functional_type&, XCDeviceData* ); void eval_kern_exc_vxc_gga( const functional_type&, XCDeviceData* ); @@ -78,6 +81,9 @@ class LocalDeviceWorkDriver : public LocalWorkDriver { void eval_zmat_lda_vxc_uks( XCDeviceData* ); void eval_zmat_gga_vxc_uks( XCDeviceData* ); + void eval_zmat_lda_vxc_gks( XCDeviceData* ); + void eval_zmat_gga_vxc_gks( XCDeviceData* ); + void eval_exx_fmat( XCDeviceData* ); void eval_exx_gmat( XCDeviceData*, const BasisSetMap& ); diff --git a/src/xc_integrator/local_work_driver/device/local_device_work_driver_pimpl.hpp b/src/xc_integrator/local_work_driver/device/local_device_work_driver_pimpl.hpp index f9cfe6ca..4dd5e126 100644 --- a/src/xc_integrator/local_work_driver/device/local_device_work_driver_pimpl.hpp +++ b/src/xc_integrator/local_work_driver/device/local_device_work_driver_pimpl.hpp @@ -41,12 +41,16 @@ struct LocalDeviceWorkDriverPIMPL { virtual void eval_uvvar_gga_rks( XCDeviceData* ) = 0; virtual void eval_uvvar_lda_uks( XCDeviceData* ) = 0; virtual void eval_uvvar_gga_uks( XCDeviceData* ) = 0; + virtual void eval_uvvar_lda_gks( XCDeviceData* ) = 0; + virtual void eval_uvvar_gga_gks( XCDeviceData* ) = 0; virtual void eval_kern_exc_vxc_lda( const functional_type&, XCDeviceData* ) = 0; virtual void eval_kern_exc_vxc_gga( const functional_type&, XCDeviceData* ) = 0; virtual void eval_zmat_lda_vxc_rks( XCDeviceData* ) = 0; virtual void eval_zmat_gga_vxc_rks( XCDeviceData* ) = 0; virtual void eval_zmat_lda_vxc_uks( XCDeviceData* ) = 0; virtual void eval_zmat_gga_vxc_uks( XCDeviceData* ) = 0; + virtual void eval_zmat_lda_vxc_gks( XCDeviceData* ) = 0; + virtual void eval_zmat_gga_vxc_gks( XCDeviceData* ) = 0; virtual void inc_exc( XCDeviceData* ) = 0; virtual void inc_nel( XCDeviceData* ) = 0; virtual void inc_vxc( XCDeviceData* ) = 0; diff --git a/src/xc_integrator/local_work_driver/device/scheme1_base.cxx b/src/xc_integrator/local_work_driver/device/scheme1_base.cxx index 4c5fdcd5..4d072627 100644 --- a/src/xc_integrator/local_work_driver/device/scheme1_base.cxx +++ b/src/xc_integrator/local_work_driver/device/scheme1_base.cxx @@ -288,6 +288,18 @@ void AoSScheme1Base::eval_zmat_gga_vxc_uks( XCDeviceData* ){ } +void AoSScheme1Base::eval_zmat_lda_vxc_gks( XCDeviceData* ){ + + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); + +} + +void AoSScheme1Base::eval_zmat_gga_vxc_gks( XCDeviceData* ){ + + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); + +} + void AoSScheme1Base::eval_collocation( XCDeviceData* _data ) { auto* data = dynamic_cast(_data); @@ -489,6 +501,18 @@ void AoSScheme1Base::eval_uvvar_gga_uks( XCDeviceData* ){ } +void AoSScheme1Base::eval_uvvar_lda_gks( XCDeviceData* ){ + + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); + +} + +void AoSScheme1Base::eval_uvvar_gga_gks( XCDeviceData* ){ + + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); + +} + void AoSScheme1Base::eval_kern_exc_vxc_lda( const functional_type& func, XCDeviceData* _data ) { diff --git a/src/xc_integrator/local_work_driver/device/scheme1_base.hpp b/src/xc_integrator/local_work_driver/device/scheme1_base.hpp index 377272e3..e4a29af7 100644 --- a/src/xc_integrator/local_work_driver/device/scheme1_base.hpp +++ b/src/xc_integrator/local_work_driver/device/scheme1_base.hpp @@ -27,6 +27,11 @@ struct AoSScheme1Base : public detail::LocalDeviceWorkDriverPIMPL { void eval_zmat_lda_vxc_uks( XCDeviceData* ) override final; void eval_zmat_gga_vxc_uks( XCDeviceData* ) override final; + void eval_uvvar_lda_gks( XCDeviceData* ) override final; + void eval_uvvar_gga_gks( XCDeviceData* ) override final; + void eval_zmat_lda_vxc_gks( XCDeviceData* ) override final; + void eval_zmat_gga_vxc_gks( XCDeviceData* ) override final; + void eval_kern_exc_vxc_lda( const functional_type&, XCDeviceData* ) override final; void eval_kern_exc_vxc_gga( const functional_type&, XCDeviceData* ) override final; void inc_exc( XCDeviceData* ) override final; diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx b/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx index fd58656b..bed0ab95 100644 --- a/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx +++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx @@ -167,6 +167,16 @@ void LocalHostWorkDriver::eval_uvvar_lda_uks( size_t npts, size_t nbe, } +void LocalHostWorkDriver::eval_uvvar_lda_gks( size_t npts, size_t nbe, + const double* basis_eval, const double* Xs, size_t ldxs, const double* Xz, + size_t ldxz, const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, + double* den_eval, double* K, const double dtol) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_uvvar_lda_gks(npts, nbe, basis_eval, Xs, ldxs, Xz, ldxz, Xx, ldxx, Xy, ldxy, den_eval, K, dtol); + +} + // U/VVar GGA (density + grad, gamma) void LocalHostWorkDriver::eval_uvvar_gga_rks( size_t npts, size_t nbe, @@ -196,6 +206,21 @@ void LocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe, } +void LocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe, + const double* basis_eval, const double* dbasis_x_eval, + const double *dbasis_y_eval, const double* dbasis_z_eval, const double* Xs, + size_t ldxs, const double* Xz, size_t ldxz, const double* Xx, size_t ldxx, + const double* Xy, size_t ldxy, double* den_eval, double* dden_x_eval, double* dden_y_eval, + double* dden_z_eval, double* gamma, double* K, double* H, const double dtol ) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_uvvar_gga_gks(npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, Xs, ldxs, Xz, ldxz, Xx, ldxx, Xy, ldxy, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, + gamma, K, H, dtol); + +} + + // U/VVar MGGA(density, grad, gamma, tau, lapl) void LocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, @@ -250,6 +275,18 @@ void LocalHostWorkDriver::eval_zmat_lda_vxc_uks( size_t npts, size_t nbe, } +void LocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, + const double* vrho, const double* basis_eval, double* Zs, size_t ldzs, + double* Zz, size_t ldzz,double* Zx, size_t ldzx, double* Zy, size_t ldzy, double* K ) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_zmat_lda_vxc_gks(npts, nbe, vrho, basis_eval, Zs, ldzs, + Zz, ldzz, Zx, ldzx, Zy, ldzy, K); + + +} + + // Eval Z Matrix GGA VXC void LocalHostWorkDriver::eval_zmat_gga_vxc_rks( size_t npts, size_t nbe, const double* vrho, const double* vgamma, const double* basis_eval, @@ -278,6 +315,19 @@ void LocalHostWorkDriver::eval_zmat_gga_vxc_uks( size_t npts, size_t nbe, } +void LocalHostWorkDriver::eval_zmat_gga_vxc_gks( size_t npts, size_t nbe, + const double* vrho, const double* vgamma, const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* dden_x_eval, + const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, + double* Zz, size_t ldzz, double* Zx, size_t ldzx,double* Zy, size_t ldzy, double* K, double* H ) { + + throw_if_invalid_pimpl(pimpl_); + pimpl_->eval_zmat_gga_vxc_gks(npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval, dden_z_eval, + Zs, ldzs, Zz, ldzz, Zx, ldzx, Zy, ldzy, K, H); + +} // Eval Z Matrix MGGA VXC void LocalHostWorkDriver::eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp b/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp index c5be4a87..5f6ee6de 100644 --- a/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp +++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp @@ -247,6 +247,11 @@ class LocalHostWorkDriver : public LocalWorkDriver { const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, double* den_eval); + void eval_uvvar_lda_gks( size_t npts, size_t nbe, const double* basis_eval, + const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, + const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, double* den_eval, double* K, const double dtol); + + /** Evaluate the U and V variavles for RKS GGA * * U = rho + gradient @@ -278,6 +283,13 @@ class LocalHostWorkDriver : public LocalWorkDriver { const double* Xz, size_t ldxz, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma ); + void eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eavl, const double *dbasis_y_eval, + const double* dbasis_z_eval, const double* Xs, size_t ldxs, + const double* Xz, size_t ldxz, const double* Xx, size_t ldxx, + const double* Xy, size_t ldxy, double* den_eval, + double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* K, double* H, const double dtol ); + /** Evaluate the U and V variavles for RKS MGGA * * U = rho + gradient + tau + lapl @@ -342,6 +354,10 @@ class LocalHostWorkDriver : public LocalWorkDriver { const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ); + void eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, const double* vrho, + const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz, + double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K ); + /** Evaluate the VXC Z Matrix for RKS LDA * * Z(mu,i) = 0.5 * vrho(i) * B(mu, i) + @@ -376,6 +392,12 @@ class LocalHostWorkDriver : public LocalWorkDriver { const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ); + void eval_zmat_gga_vxc_gks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, + const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx, + double* Zy, size_t ldzy, double* K, double* H ); /** Evaluate the VXC Z Matrix for RKS MGGA * diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp b/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp index e44bd562..8cd57456 100644 --- a/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp +++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp @@ -80,6 +80,9 @@ struct LocalHostWorkDriverPIMPL { virtual void eval_uvvar_lda_uks( size_t npts, size_t nbe, const double* basis_eval, const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, double* den_eval) = 0; + virtual void eval_uvvar_lda_gks( size_t npts, size_t nbe, const double* basis_eval, + const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, + const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, double* den_eval, double* K, const double dtol) = 0; virtual void eval_uvvar_gga_rks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double *dbasis_y_eval, @@ -92,6 +95,12 @@ struct LocalHostWorkDriverPIMPL { const double* Xz, size_t ldxz, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma ) = 0; + virtual void eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eavl, const double *dbasis_y_eval, + const double* dbasis_z_eval, const double* Xs, size_t ldxs, + const double* Xz, size_t ldxz, const double* Xx, size_t ldxx, + const double* Xy, size_t ldxy, double* den_eval, + double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* K, double* H, const double dtol) = 0; virtual void eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, @@ -115,6 +124,9 @@ struct LocalHostWorkDriverPIMPL { const double* basis_eval, double* Z, size_t ldz ) = 0; virtual void eval_zmat_lda_vxc_uks( size_t npts, size_t nbe, const double* vrho, const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) = 0; + virtual void eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, const double* vrho, + const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz, + double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K ) = 0; virtual void eval_zmat_gga_vxc_rks( size_t npts, size_t nbe, const double* vrho, const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, @@ -126,6 +138,12 @@ struct LocalHostWorkDriverPIMPL { const double* dbasis_y_eval, const double* dbasis_z_eval, const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) = 0; + virtual void eval_zmat_gga_vxc_gks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, + const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx, + double* Zy, size_t ldzy, double* K, double* H ) = 0; virtual void eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vrho, const double* vgamma, const double* vlapl, const double* basis_eval, diff --git a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx index 4f57e7fa..5a184ba9 100644 --- a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx +++ b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx @@ -159,7 +159,57 @@ namespace GauXC { } - + void ReferenceLocalHostWorkDriver::eval_uvvar_lda_gks( size_t npts, size_t nbe, const double* basis_eval, + const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, + const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, double* den_eval, double* K, const double dtol) { + + + auto *KZ = K; // KZ // store K in the Z matrix + auto *KY = KZ + npts; + auto *KX = KY + npts; + + double dtolsq = dtol*dtol; + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const size_t ioffs = size_t(i) * ldxs; + const size_t ioffz = size_t(i) * ldxz; + const size_t ioffx = size_t(i) * ldxx; + const size_t ioffy = size_t(i) * ldxy; + + const auto* Xs_i = Xs + ioffs; + const auto* Xz_i = Xz + ioffz; + const auto* Xx_i = Xx + ioffx; + const auto* Xy_i = Xy + ioffy; + + const double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 ); + const double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 ); + const double rhox = blas::dot( nbe, basis_eval + ioffx, 1, Xx_i, 1 ); + const double rhoy = blas::dot( nbe, basis_eval + ioffy, 1, Xy_i, 1 ); + + double mtemp = rhoz * rhoz + rhox * rhox + rhoy * rhoy; + double mnorm = 0; + + if (mtemp > dtolsq) { + mnorm = sqrt(mtemp); + KZ[i] = rhoz / mnorm; + KY[i] = rhoy / mnorm; + KX[i] = rhox / mnorm; + } else { + mnorm = (1. / 3.) * (rhox + rhoy + rhoz); + KZ[i] = 1. / 3.; + KY[i] = 1. / 3.; + KX[i] = 1. / 3.; + } + + den_eval[2*i] = 0.5*(rhos + mnorm); // rho_+ + den_eval[2*i+1] = 0.5*(rhos - mnorm); // rho_- + + } + + } + + void ReferenceLocalHostWorkDriver::eval_uvvar_gga_rks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double *dbasis_y_eval, const double* dbasis_z_eval, const double* X, @@ -244,6 +294,7 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe, } + void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double *dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, @@ -347,17 +398,151 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, tau[2*i+1] = 0.5*(taus - tauz); if (lapl != nullptr) { - auto lapls = 2. * blas::dot( nbe, lbasis_eval + ioffs, 1, Xs_i, 1) + 4. * taus; - auto laplz = 2. * blas::dot( nbe, lbasis_eval + ioffz, 1, Xz_i, 1) + 4. * tauz; + auto lapls = 2. * blas::dot( nbe, lbasis_eval + ioffs, 1, Xs_i, 1) + 4. * taus; + auto laplz = 2. * blas::dot( nbe, lbasis_eval + ioffz, 1, Xz_i, 1) + 4. * tauz; - lapl[2*i] = 0.5*(lapls + laplz); - lapl[2*i+1] = 0.5*(lapls - laplz); + lapl[2*i] = 0.5*(lapls + laplz); + lapl[2*i+1] = 0.5*(lapls - laplz); } } } + +void ReferenceLocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eval, const double *dbasis_y_eval, + const double* dbasis_z_eval, const double* Xs, size_t ldxs, + const double* Xz, size_t ldxz, const double* Xx, size_t ldxx, + const double* Xy, size_t ldxy, double* den_eval, + double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* K, double* H, const double dtol) { + + auto *KZ = K; // KZ // store K in the Z matrix + auto *KY = KZ + npts; + auto *KX = KY + npts; + + auto *HZ = H; // KZ // store K in the Z matrix + auto *HY = HZ + npts; + auto *HX = HY + npts; + + double dtolsq = dtol*dtol; + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const size_t ioffs = size_t(i) * ldxs; + const size_t ioffz = size_t(i) * ldxz; + const size_t ioffx = size_t(i) * ldxx; + const size_t ioffy = size_t(i) * ldxy; + + const auto* Xs_i = Xs + ioffs; + const auto* Xz_i = Xz + ioffz; + const auto* Xx_i = Xx + ioffx; + const auto* Xy_i = Xy + ioffy; + + const double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 ); + const double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 ); + const double rhox = blas::dot( nbe, basis_eval + ioffx, 1, Xx_i, 1 ); + const double rhoy = blas::dot( nbe, basis_eval + ioffy, 1, Xy_i, 1 ); + + const auto dndx = + 2. * blas::dot( nbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 ); + const auto dndy = + 2. * blas::dot( nbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 ); + const auto dndz = + 2. * blas::dot( nbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 ); + + const auto dMzdx = + 2. * blas::dot( nbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 ); + const auto dMzdy = + 2. * blas::dot( nbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 ); + const auto dMzdz = + 2. * blas::dot( nbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 ); + + const auto dMxdx = + 2. * blas::dot( nbe, dbasis_x_eval + ioffx, 1, Xx_i, 1 ); + const auto dMxdy = + 2. * blas::dot( nbe, dbasis_y_eval + ioffx, 1, Xx_i, 1 ); + const auto dMxdz = + 2. * blas::dot( nbe, dbasis_z_eval + ioffx, 1, Xx_i, 1 ); + + const auto dMydx = + 2. * blas::dot( nbe, dbasis_x_eval + ioffy, 1, Xy_i, 1 ); + const auto dMydy = + 2. * blas::dot( nbe, dbasis_y_eval + ioffy, 1, Xy_i, 1 ); + const auto dMydz = + 2. * blas::dot( nbe, dbasis_z_eval + ioffy, 1, Xy_i, 1 ); + + + dden_x_eval[4 * i] = dndx; + dden_y_eval[4 * i] = dndy; + dden_z_eval[4 * i] = dndz; + + dden_x_eval[4 * i + 1] = dMzdx; + dden_y_eval[4 * i + 1] = dMzdy; + dden_z_eval[4 * i + 1] = dMzdz; + + dden_x_eval[4 * i + 2] = dMydx; + dden_y_eval[4 * i + 2] = dMydy; + dden_z_eval[4 * i + 2] = dMydz; + + dden_x_eval[4 * i + 3] = dMxdx; + dden_y_eval[4 * i + 3] = dMxdy; + dden_z_eval[4 * i + 3] = dMxdz; + + double mtemp = rhoz * rhoz + rhox * rhox + rhoy * rhoy; + double mnorm = 0; + + auto dels_dot_dels = dndx * dndx + dndy * dndy + dndz * dndz; + auto delz_dot_delz = dMzdx * dMzdx + dMzdy * dMzdy + dMzdz * dMzdz; + auto delx_dot_delx = dMxdx * dMxdx + dMxdy * dMxdy + dMxdz * dMxdz; + auto dely_dot_dely = dMydx * dMydx + dMydy * dMydy + dMydz * dMydz; + + auto dels_dot_delz = dndx * dMzdx + dndy * dMzdy + dndz * dMzdz; + auto dels_dot_delx = dndx * dMxdx + dndy * dMxdy + dndz * dMxdz; + auto dels_dot_dely = dndx * dMydx + dndy * dMydy + dndz * dMydz; + + auto sum = delz_dot_delz + delx_dot_delx + dely_dot_dely; + auto s_sum = + dels_dot_delz * rhoz + dels_dot_delx * rhox + dels_dot_dely * rhoy; + + auto sqsum2 = + sqrt(dels_dot_delz * dels_dot_delz + dels_dot_delx * dels_dot_delx + + dels_dot_dely * dels_dot_dely); + + double sign = 1.; + if (std::signbit(s_sum)) + sign = -1.; + + if (mtemp > dtolsq) { + mnorm = sqrt(mtemp); + KZ[i] = rhoz / mnorm; + KY[i] = rhoy / mnorm; + KX[i] = rhox / mnorm; + HZ[i] = sign * dels_dot_delz / sqsum2; + HY[i] = sign * dels_dot_dely / sqsum2; + HX[i] = sign * dels_dot_delx / sqsum2; + } else { + mnorm = (1. / 3.) * (rhox + rhoy + rhoz); + KZ[i] = 1. / 3.; + KY[i] = 1. / 3.; + KX[i] = 1. / 3.; + + HZ[i] = sign / 3.; + HY[i] = sign / 3.; + HX[i] = sign / 3.; + } + + den_eval[2 * i] = 0.5 * (rhos + mnorm); + den_eval[2 * i + 1] = 0.5 * (rhos - mnorm); + + gamma[3 * i] = 0.25 * (dels_dot_dels + sum) + 0.5 * sign * sqsum2; + gamma[3 * i + 1] = 0.25 * (dels_dot_dels - sum); + gamma[3 * i + 2] = 0.25 * (dels_dot_dels + sum) - 0.5 * sign * sqsum2; + + + } + +} // Eval Z Matrix LDA VXC void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_rks( size_t npts, size_t nbf, const double* vrho, const double* basis_eval, double* Z, size_t ldz ) { @@ -402,6 +587,40 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, } +void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, const double* vrho, + const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz, + double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K ) { + + auto *KZ = K; // KZ // store K in the Z matrix + auto *KY = KZ + npts; + auto *KX = KY + npts; + + blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zs, ldzs); + blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zz, ldzz); + blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zx, ldzx); + blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zy, ldzy); + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + auto* zs_col = Zs + i*ldzs; + auto* zz_col = Zz + i*ldzz; + auto* zx_col = Zx + i*ldzx; + auto* zy_col = Zy + i*ldzy; + + const double factp = 0.5 * vrho[2*i]; + const double factm = 0.5 * vrho[2*i+1]; + const double factor = 0.5 * (factp - factm); + + //eq. 56 https://doi.org/10.1140/epjb/e2018-90170-1 + GauXC::blas::scal( nbe, 0.5*(factp + factm), zs_col, 1 ); + GauXC::blas::scal( nbe, KZ[i] * factor, zz_col, 1 ); + GauXC::blas::scal( nbe, KX[i] * factor, zx_col, 1 ); + GauXC::blas::scal( nbe, KY[i] * factor, zy_col, 1 ); + + } + +} + // Eval Z Matrix GGA VXC void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_rks( size_t npts, size_t nbf, const double* vrho, const double* vgamma, const double* basis_eval, @@ -493,15 +712,14 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, } } - // Eval Z Matrix MGGA VXC - void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_rks( size_t npts, size_t nbf, - const double* vrho, const double* vgamma, const double* vlapl, - const double* basis_eval, - const double* dbasis_x_eval, const double* dbasis_y_eval, - const double* dbasis_z_eval, const double* lbasis_eval, - const double* dden_x_eval, - const double* dden_y_eval, const double* dden_z_eval, double* Z, size_t ldz ) { + void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_rks( size_t npts, size_t nbf, + const double* vrho, const double* vgamma, const double* vlapl, + const double* basis_eval, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, const double* lbasis_eval, + const double* dden_x_eval, + const double* dden_y_eval, const double* dden_z_eval, double* Z, size_t ldz ) { if( ldz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Z, nbf ); @@ -511,14 +729,14 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, const int32_t ioff = i * nbf; auto* z_col = Z + ioff; - auto* bf_x_col = dbasis_x_eval + ioff; - auto* bf_y_col = dbasis_y_eval + ioff; - auto* bf_z_col = dbasis_z_eval + ioff; + auto* bf_x_col = dbasis_x_eval + ioff; + auto* bf_y_col = dbasis_y_eval + ioff; + auto* bf_z_col = dbasis_z_eval + ioff; const auto lda_fact = 0.5 * vrho[i]; blas::scal( nbf, lda_fact, z_col, 1 ); - const auto gga_fact = 2. * vgamma[i]; + const auto gga_fact = 2. * vgamma[i]; const auto x_fact = gga_fact * dden_x_eval[i]; const auto y_fact = gga_fact * dden_y_eval[i]; const auto z_fact = gga_fact * dden_z_eval[i]; @@ -528,7 +746,7 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, blas::axpy( nbf, z_fact, bf_z_col, 1, z_col, 1 ); if ( vlapl != nullptr ) { - auto* lbf_col = lbasis_eval + ioff; + auto* lbf_col = lbasis_eval + ioff; const auto lapl_fact = vlapl[i]; blas::axpy( nbf, lapl_fact, lbf_col, 1, z_col, 1 ); } @@ -537,13 +755,12 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, } - - void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t nbf, +void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t nbf, const double* vrho, const double* vgamma, const double* vlapl, - const double* basis_eval, + const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval, - const double* dden_x_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) { @@ -606,10 +823,10 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, } void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_rks(size_t npts, size_t nbf, - const double* vtau, const double* vlapl, - const double* dbasis_x_eval, const double* dbasis_y_eval, - const double* dbasis_z_eval, - double* mmat_x, double* mmat_y, double* mmat_z, size_t ldm ) { + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, + double* mmat_x, double* mmat_y, double* mmat_z, size_t ldm ) { if( ldm != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); @@ -642,12 +859,12 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, } } - void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks(size_t npts, size_t nbf, - const double* vtau, const double* vlapl, - const double* dbasis_x_eval, const double* dbasis_y_eval, - const double* dbasis_z_eval, - double* mmat_xs, double* mmat_ys, double* mmat_zs, size_t ldms, - double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz) { +void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks(size_t npts, size_t nbf, + const double* vtau, const double* vlapl, + const double* dbasis_x_eval, const double* dbasis_y_eval, + const double* dbasis_z_eval, + double* mmat_xs, double* mmat_ys, double* mmat_zs, size_t ldms, + double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz) { if( ldms != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); if( ldmz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); @@ -687,8 +904,8 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, if ( vlapl != nullptr ) { const auto lfactp = vlapl[2*i]; const auto lfactm = vlapl[2*i+1]; - const auto lfacts = 0.5*(lfactp + lfactm); - const auto lfactz = 0.5*(lfactp - lfactm); + const auto lfacts = 0.5*(lfactp + lfactm); + const auto lfactz = 0.5*(lfactp - lfactm); blas::axpy( nbf, lfacts, bf_x_col, 1, xs_col, 1); blas::axpy( nbf, lfacts, bf_y_col, 1, ys_col, 1); blas::axpy( nbf, lfacts, bf_z_col, 1, zs_col, 1); @@ -701,6 +918,116 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe, } +void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_gks( size_t npts, size_t nbf, const double* vrho, + const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, + const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx, + double* Zy, size_t ldzy, double* K, double* H ) { + + auto *KZ = K; // KZ // store K in the Z matrix + auto *KY = KZ + npts; + auto *KX = KY + npts; + + auto *HZ = H; // KZ // store K in the Z matrix + auto *HY = HZ + npts; + auto *HX = HY + npts; + + if( ldzs != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + if( ldzz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + if( ldzx != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + if( ldzy != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS")); + + blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zs, ldzs); + blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zz, ldzz); + blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zx, ldzx); + blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zy, ldzy); + + for( int32_t i = 0; i < (int32_t)npts; ++i ) { + + const int32_t ioff = i * nbf; + + auto* zs_col = Zs + ioff; + auto* zz_col = Zz + ioff; + auto* zx_col = Zx + ioff; + auto* zy_col = Zy + ioff; + + auto* bf_x_col = dbasis_x_eval + ioff; + auto* bf_y_col = dbasis_y_eval + ioff; + auto* bf_z_col = dbasis_z_eval + ioff; + + const double factp = 0.5 * vrho[2*i]; + const double factm = 0.5 * vrho[2*i+1]; + const double factor = 0.5 * (factp - factm); + + GauXC::blas::scal( nbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. " + GauXC::blas::scal( nbf, KZ[i]*factor, zz_col, 1 ); + GauXC::blas::scal( nbf, KX[i]*factor, zx_col, 1 ); + GauXC::blas::scal( nbf, KY[i]*factor, zy_col, 1 ); + + const auto gga_fact_pp = vgamma[3 * i]; + const auto gga_fact_pm = vgamma[3 * i + 1]; + const auto gga_fact_mm = vgamma[3 * i + 2]; + + const auto gga_fact_1 = 0.5 * (gga_fact_pp + gga_fact_pm + gga_fact_mm); + const auto gga_fact_2 = 0.5 * (gga_fact_pp - gga_fact_mm); + const auto gga_fact_3 = 0.5 * (gga_fact_pp - gga_fact_pm + gga_fact_mm); + + const auto x_fact_s = gga_fact_1 * dden_x_eval[4 * i] + + gga_fact_2 * (HZ[i] * dden_x_eval[4 * i + 1] + + HY[i] * dden_x_eval[4 * i + 2] + + HX[i] * dden_x_eval[4 * i + 3]); + const auto y_fact_s = gga_fact_1 * dden_y_eval[4 * i] + + gga_fact_2 * (HZ[i] * dden_y_eval[4 * i + 1] + + HY[i] * dden_y_eval[4 * i + 2] + + HX[i] * dden_y_eval[4 * i + 3]); + const auto z_fact_s = gga_fact_1 * dden_z_eval[4 * i] + + gga_fact_2 * (HZ[i] * dden_z_eval[4 * i + 1] + + HY[i] * dden_z_eval[4 * i + 2] + + HX[i] * dden_z_eval[4 * i + 3]); + + const auto x_fact_z = gga_fact_3 * dden_x_eval[4 * i + 1] + + gga_fact_2 * HZ[i] * dden_x_eval[4 * i]; + const auto y_fact_z = gga_fact_3 * dden_y_eval[4 * i + 1] + + gga_fact_2 * HZ[i] * dden_y_eval[4 * i]; + const auto z_fact_z = gga_fact_3 * dden_z_eval[4 * i + 1] + + gga_fact_2 * HZ[i] * dden_z_eval[4 * i]; + + const auto x_fact_x = gga_fact_3 * dden_x_eval[4 * i + 3] + + gga_fact_2 * HX[i] * dden_x_eval[4 * i]; + const auto y_fact_x = gga_fact_3 * dden_y_eval[4 * i + 3] + + gga_fact_2 * HX[i] * dden_y_eval[4 * i]; + const auto z_fact_x = gga_fact_3 * dden_z_eval[4 * i + 3] + + gga_fact_2 * HX[i] * dden_z_eval[4 * i]; + + const auto x_fact_y = gga_fact_3 * dden_x_eval[4 * i + 2] + + gga_fact_2 * HY[i] * dden_x_eval[4 * i]; + const auto y_fact_y = gga_fact_3 * dden_y_eval[4 * i + 2] + + gga_fact_2 * HY[i] * dden_y_eval[4 * i]; + const auto z_fact_y = gga_fact_3 * dden_z_eval[4 * i + 2] + + gga_fact_2 * HY[i] * dden_z_eval[4 * i]; + + + blas::axpy(nbf, x_fact_s, bf_x_col, 1, zs_col, 1); + blas::axpy(nbf, y_fact_s, bf_y_col, 1, zs_col, 1); + blas::axpy(nbf, z_fact_s, bf_z_col, 1, zs_col, 1); + + blas::axpy(nbf, x_fact_z, bf_x_col, 1, zz_col, 1); + blas::axpy(nbf, y_fact_z, bf_y_col, 1, zz_col, 1); + blas::axpy(nbf, z_fact_z, bf_z_col, 1, zz_col, 1); + + blas::axpy(nbf, x_fact_x, bf_x_col, 1, zx_col, 1); + blas::axpy(nbf, y_fact_x, bf_y_col, 1, zx_col, 1); + blas::axpy(nbf, z_fact_x, bf_z_col, 1, zx_col, 1); + + blas::axpy(nbf, x_fact_y, bf_x_col, 1, zy_col, 1); + blas::axpy(nbf, y_fact_y, bf_y_col, 1, zy_col, 1); + blas::axpy(nbf, z_fact_y, bf_z_col, 1, zy_col, 1); + + } + +} + // Increment VXC by Z void ReferenceLocalHostWorkDriver::inc_vxc( size_t npts, size_t nbf, size_t nbe, const double* basis_eval, const submat_map_t& submat_map, const double* Z, diff --git a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp index fcaa84cd..fece15a8 100644 --- a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp +++ b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp @@ -82,6 +82,10 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL { void eval_uvvar_lda_uks( size_t npts, size_t nbe, const double* basis_eval, const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, double* den_eval) override; + void eval_uvvar_lda_gks( size_t npts, size_t nbe, const double* basis_eval, + const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, + const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, + double* den_eval, double* K, const double dtol) override; void eval_uvvar_gga_rks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double *dbasis_y_eval, @@ -94,7 +98,13 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL { const double* Xz, size_t ldxz, double* den_eval, double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma ) override; - + void eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* basis_eval, + const double* dbasis_x_eavl, const double *dbasis_y_eval, + const double* dbasis_z_eval, const double* Xs, size_t ldxs, + const double* Xz, size_t ldxz, const double* Xx, size_t ldxx, + const double* Xy, size_t ldxy, double* den_eval, + double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, + double* K, double* H, const double dtol ) override; void eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval, const double* dbasis_x_eval, const double* dbasis_y_eval, @@ -118,7 +128,9 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL { const double* basis_eval, double* Z, size_t ldz ) override; void eval_zmat_lda_vxc_uks( size_t npts, size_t nbe, const double* vrho, const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) override; - + void eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, const double* vrho, + const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz, + double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K ) override; void eval_zmat_gga_vxc_rks( size_t npts, size_t nbe, const double* vrho, const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, @@ -130,6 +142,12 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL { const double* dbasis_y_eval, const double* dbasis_z_eval, const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) override; + void eval_zmat_gga_vxc_gks( size_t npts, size_t nbe, const double* vrho, + const double* vgamma, const double* basis_eval, const double* dbasis_x_eval, + const double* dbasis_y_eval, const double* dbasis_z_eval, + const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval, + double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx, + double* Zy, size_t ldzy, double* K, double* H ) override; void eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vrho, diff --git a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp index cedb12df..f2439ad4 100644 --- a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp +++ b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp @@ -33,15 +33,29 @@ class IncoreReplicatedXCDeviceIntegrator : void eval_exc_vxc_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ) override; + value_type* EXC, const IntegratorSettingsXC& settings) override; - void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar, - int64_t ldpscalar, + void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ) override; + value_type* EXC, const IntegratorSettingsXC& settings ) override; + + void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& settings ) override; void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, @@ -66,18 +80,36 @@ class IncoreReplicatedXCDeviceIntegrator : host_task_iterator task_begin, host_task_iterator task_end, XCDeviceData& device_data ); - void exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar, + void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, host_task_iterator task_begin, host_task_iterator task_end, XCDeviceData& device_data ); - void exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar, + void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, value_type* VXC, int64_t ldvxc, value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL, host_task_iterator task_begin, host_task_iterator task_end, XCDeviceData& device_data ); + void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* Py, int64_t ldpy, + const value_type* Px, int64_t ldpx, + host_task_iterator task_begin, host_task_iterator task_end, + XCDeviceData& device_data ); + + void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* Py, int64_t ldpy, + const value_type* Px, int64_t ldpx, + value_type* VXC, int64_t ldvxc, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL, + host_task_iterator task_begin, host_task_iterator task_end, + XCDeviceData& device_data ); + void eval_exc_grad_local_work_( const basis_type& basis, const value_type* P, int64_t ldp, host_task_iterator task_begin, host_task_iterator task_end, XCDeviceData& device_data ); diff --git a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp index 16de808b..3345bf08 100644 --- a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp +++ b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp @@ -19,7 +19,7 @@ template void IncoreReplicatedXCDeviceIntegrator:: eval_exc_vxc_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ) { + value_type* EXC, const IntegratorSettingsXC& settings ) { const auto& basis = this->load_balancer_->basis(); @@ -109,17 +109,37 @@ void IncoreReplicatedXCDeviceIntegrator:: template void IncoreReplicatedXCDeviceIntegrator:: - eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar, - int64_t ldpscalar, + eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ) { - GauXC::util::unused(m,n,Pscalar,ldpscalar,Pz,ldpz,VXCscalar,ldvxcscalar,VXCz,ldvxcz,EXC); + value_type* EXC, const IntegratorSettingsXC& settings ) { + GauXC::util::unused(m,n,Ps,ldps,Pz,ldpz,VXCs,ldvxcs,VXCz,ldvxcz,EXC,settings); GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE"); } +template +void IncoreReplicatedXCDeviceIntegrator:: + eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& settings ) { + GauXC::util::unused(m,n,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,VXCs,ldvxcs,VXCz,ldvxcz,VXCy,ldvxcy,VXCx,ldvxcx,EXC,settings); + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); +} + + template void IncoreReplicatedXCDeviceIntegrator:: exc_vxc_local_work_( const basis_type& basis, const value_type* P, int64_t ldp, @@ -263,27 +283,57 @@ void IncoreReplicatedXCDeviceIntegrator:: template void IncoreReplicatedXCDeviceIntegrator:: - exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar, + exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, host_task_iterator task_begin, host_task_iterator task_end, XCDeviceData& device_data ) { - GauXC::util::unused(basis,Pscalar,ldpscalar,Pz,ldpz,task_begin,task_end,device_data); + GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,task_begin,task_end,device_data); GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE"); } template void IncoreReplicatedXCDeviceIntegrator:: - exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar, + exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL, host_task_iterator task_begin, host_task_iterator task_end, XCDeviceData& device_data ) { - GauXC::util::unused(basis,Pscalar,ldpscalar,Pz,ldpz,VXCscalar,ldvxcscalar,VXCz,ldvxcz,EXC,N_EL,task_begin,task_end,device_data); + GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,VXCs,ldvxcs,VXCz,ldvxcz,EXC,N_EL,task_begin,task_end,device_data); GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE"); } +template +void IncoreReplicatedXCDeviceIntegrator:: + exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* Py, int64_t ldpy, + const value_type* Px, int64_t ldpx, + host_task_iterator task_begin, host_task_iterator task_end, + XCDeviceData& device_data ) { + GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,task_begin,task_end,device_data); + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); +} + +template +void IncoreReplicatedXCDeviceIntegrator:: + exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* Py, int64_t ldpy, + const value_type* Px, int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL, + host_task_iterator task_begin, host_task_iterator task_end, + XCDeviceData& device_data ) { + + GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,VXCs,ldvxcs,VXCz,ldvxcz,VXCy,ldvxcy,VXCx,ldvxcx,EXC,N_EL,task_begin,task_end,device_data); + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); +} + + } } diff --git a/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator.hpp b/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator.hpp index d41e5fcd..e446df35 100644 --- a/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator.hpp +++ b/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator.hpp @@ -44,15 +44,29 @@ class ShellBatchedReplicatedXCDeviceIntegrator : void eval_exc_vxc_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ) override; + value_type* EXC, const IntegratorSettingsXC& settings ) override; - void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar, - int64_t ldpscalar, + void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ) override; + value_type* EXC, const IntegratorSettingsXC& settings ) override; + + void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& settings ) override; void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* EXC_GRAD ) override; @@ -67,13 +81,24 @@ class ShellBatchedReplicatedXCDeviceIntegrator : incore_integrator_type& incore_integrator, XCDeviceData& device_data ); - void exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar, + void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL, host_task_iterator task_begin, host_task_iterator task_end, XCDeviceData& device_data ); + void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* Py, int64_t ldpy, + const value_type* Px, int64_t ldpx, + value_type* VXC, int64_t ldvxc, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL, + host_task_iterator task_begin, host_task_iterator task_end, + XCDeviceData& device_data ); + void eval_exc_grad_local_work_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* EXC_GRAD, host_task_iterator task_begin, host_task_iterator task_end, diff --git a/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator_exc_vxc.hpp index 87093e2b..64f161d8 100644 --- a/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator_exc_vxc.hpp +++ b/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator_exc_vxc.hpp @@ -28,7 +28,7 @@ template void ShellBatchedReplicatedXCDeviceIntegrator:: eval_exc_vxc_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ) { + value_type* EXC, const IntegratorSettingsXC& settings ) { const auto& basis = this->load_balancer_->basis(); @@ -87,18 +87,39 @@ void ShellBatchedReplicatedXCDeviceIntegrator:: template void ShellBatchedReplicatedXCDeviceIntegrator:: - eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar, - int64_t ldpscalar, + eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ) { + value_type* EXC, const IntegratorSettingsXC& settings ) { - GauXC::util::unused(m,n,Pscalar,ldpscalar,Pz,ldpz,VXCscalar,ldvxcscalar,VXCz,ldvxcz,EXC); + GauXC::util::unused(m,n,Ps,ldps,Pz,ldpz,VXCs,ldvxcs,VXCz,ldvxcz,EXC,settings); GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE"); } + +template +void ShellBatchedReplicatedXCDeviceIntegrator:: + eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& settings ) { + GauXC::util::unused(m,n,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,VXCs,ldvxcs,VXCz,ldvxcz,VXCy,ldvxcy,VXCx,ldvxcx,EXC,settings); + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); +} + + template void ShellBatchedReplicatedXCDeviceIntegrator:: exc_vxc_local_work_( const basis_type& basis, const value_type* P, int64_t ldp, @@ -195,17 +216,35 @@ void ShellBatchedReplicatedXCDeviceIntegrator:: template void ShellBatchedReplicatedXCDeviceIntegrator:: - exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar, + exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL, host_task_iterator task_begin, host_task_iterator task_end, XCDeviceData& device_data ) { - GauXC::util::unused(basis,Pscalar,ldpscalar,Pz,ldpz,VXCscalar,ldvxcscalar,VXCz,ldvxcz,EXC,N_EL,task_begin,task_end,device_data); + GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,VXCs,ldvxcs,VXCz,ldvxcz,EXC,N_EL,task_begin,task_end,device_data); GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE"); } +template +void ShellBatchedReplicatedXCDeviceIntegrator:: + exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* Py, int64_t ldpy, + const value_type* Px, int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL, + host_task_iterator task_begin, host_task_iterator task_end, + XCDeviceData& device_data ) { + + GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,VXCs,ldvxcs,VXCz,ldvxcz,VXCy,ldvxcy,VXCx,ldvxcx,EXC,N_EL,task_begin,task_end,device_data); + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE"); +} + + template typename ShellBatchedReplicatedXCDeviceIntegrator::incore_device_task ShellBatchedReplicatedXCDeviceIntegrator:: diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp index 19ad3ccf..cabeece2 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp @@ -30,7 +30,7 @@ class ReferenceReplicatedXCHostIntegrator : void eval_exc_vxc_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ) override; + value_type* EXC, const IntegratorSettingsXC& ks_settings ) override; void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, int64_t ldps, @@ -38,7 +38,22 @@ class ReferenceReplicatedXCHostIntegrator : int64_t ldpz, value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ) override; + value_type* EXC, const IntegratorSettingsXC& ks_settings ) override; + + void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& ks_settings ) override; + void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* EXC_GRAD ) override; @@ -56,6 +71,16 @@ class ReferenceReplicatedXCHostIntegrator : value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL ); + + void exc_vxc_local_work_( const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* Py, int64_t ldpy, + const value_type* Px, int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, value_type *N_EL, const IntegratorSettingsXC& ks_settings ); void exc_grad_local_work_( const value_type* P, int64_t ldp, value_type* EXC_GRAD ); void exx_local_work_( const value_type* P, int64_t ldp, value_type* K, int64_t ldk, diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp index ca13b9b2..e8ee7383 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp @@ -20,7 +20,7 @@ template void ReferenceReplicatedXCHostIntegrator:: eval_exc_vxc_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ) { + value_type* EXC, const IntegratorSettingsXC& ks_settings ) { const auto& basis = this->load_balancer_->basis(); @@ -45,7 +45,8 @@ void ReferenceReplicatedXCHostIntegrator:: // Compute Local contributions to EXC / VXC this->timer_.time_op("XCIntegrator.LocalWork", [&](){ //exc_vxc_local_work_( P, ldp, VXC, ldvxc, EXC, &N_EL ); - exc_vxc_local_work_( P, ldp, nullptr, 0, VXC, ldvxc, nullptr, 0, EXC, &N_EL ); + exc_vxc_local_work_( P, ldp, nullptr, 0, nullptr, 0, nullptr, 0, + VXC, ldvxc, nullptr, 0, nullptr, 0, nullptr, 0, EXC, &N_EL, ks_settings ); }); @@ -71,7 +72,7 @@ void ReferenceReplicatedXCHostIntegrator:: int64_t ldpz, value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ) { + value_type* EXC, const IntegratorSettingsXC& ks_settings) { const auto& basis = this->load_balancer_->basis(); @@ -98,7 +99,8 @@ void ReferenceReplicatedXCHostIntegrator:: // Compute Local contributions to EXC / VXC this->timer_.time_op("XCIntegrator.LocalWork", [&](){ - exc_vxc_local_work_( Ps, ldps, Pz, ldpz, VXCs, ldvxcs, VXCz, ldvxcz, EXC, &N_EL ); + exc_vxc_local_work_( Ps, ldps, Pz, ldpz, nullptr, 0,nullptr, 0, + VXCs, ldvxcs, VXCz, ldvxcz, nullptr, 0, nullptr, 0, EXC, &N_EL, ks_settings ); }); @@ -118,18 +120,108 @@ void ReferenceReplicatedXCHostIntegrator:: } +template +void ReferenceReplicatedXCHostIntegrator:: + eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& ks_settings ) { + + const auto& basis = this->load_balancer_->basis(); + + // Check that P / VXC are sane + const int64_t nbf = basis.nbf(); + if( m != n ) + GAUXC_GENERIC_EXCEPTION("P/VXC Must Be Square"); + if( m != nbf ) + GAUXC_GENERIC_EXCEPTION("P/VXC Must Have Same Dimension as Basis"); + if( ldps < nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDPSCALAR"); + if( ldpz < nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDPZ"); + if( ldpy < nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDPX"); + if( ldpx < nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDPY"); + if( ldvxcs < nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDVXCSCALAR"); + if( ldvxcz < nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDVXCZ"); + if( ldvxcy < nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDVXCX"); + if( ldvxcx < nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDVXCY"); + + // Get Tasks + this->load_balancer_->get_tasks(); + + // Temporary electron count to judge integrator accuracy + value_type N_EL; + + // Compute Local contributions to EXC / VXC + this->timer_.time_op("XCIntegrator.LocalWork", [&](){ + exc_vxc_local_work_( Ps, ldps, Pz, ldpz, Py, ldpy, Px, ldpx, + VXCs, ldvxcs, VXCz, ldvxcz, + VXCy, ldvxcy, VXCx, ldvxcx, EXC, &N_EL, ks_settings ); + }); + + + // Reduce Results + this->timer_.time_op("XCIntegrator.Allreduce", [&](){ + + if( not this->reduction_driver_->takes_host_memory() ) + GAUXC_GENERIC_EXCEPTION("This Module Only Works With Host Reductions"); + + this->reduction_driver_->allreduce_inplace( VXCs, nbf*nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( VXCz, nbf*nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( VXCy, nbf*nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( VXCx, nbf*nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( EXC, 1 , ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( &N_EL, 1 , ReductionOp::Sum ); + + }); + + +} + + template void ReferenceReplicatedXCHostIntegrator:: exc_vxc_local_work_( const value_type* Ps, int64_t ldps, const value_type* Pz, int64_t ldpz, + const value_type* Py, int64_t ldpy, + const value_type* Px, int64_t ldpx, value_type* VXCs, int64_t ldvxcs, - value_type* VXCz, int64_t ldvxcz, - value_type* EXC, value_type *N_EL ) { + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, value_type *N_EL, const IntegratorSettingsXC& settings) { + + const bool is_gks = (Pz != nullptr) and (VXCz != nullptr) and (VXCy != nullptr) and (VXCx != nullptr); + const bool is_uks = (Pz != nullptr) and (VXCz != nullptr) and (VXCy == nullptr) and (VXCx == nullptr); + const bool is_rks = not is_uks and not is_gks; + if (not is_rks and not is_uks and not is_gks) { + GAUXC_GENERIC_EXCEPTION("MUST BE EITHER RKS, UKS, or GKS!"); + } + + // Misc KS settings + IntegratorSettingsKS ks_settings; + if( auto* tmp = dynamic_cast(&settings) ) { + ks_settings = *tmp; + } - const bool is_uks = (Pz != nullptr) and (VXCz != nullptr); - const bool is_rks = not is_uks; // TODO: GKS + const double gks_dtol = ks_settings.gks_dtol; // Cast LWD to LocalHostWorkDriver auto* lwd = dynamic_cast(this->local_work_driver_.get()); @@ -141,6 +233,10 @@ void ReferenceReplicatedXCHostIntegrator:: const bool needs_laplacian = func.is_mgga() ? true : false; // TODO: Needs Laplacian Check + if (func.is_mgga() and is_gks) { + GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED WITH mGGA FUNCTIONALS!"); + } + // Get basis map BasisSetMap basis_map(basis,mol); @@ -168,13 +264,21 @@ void ReferenceReplicatedXCHostIntegrator:: VXCs[i + j*ldvxcs] = 0.; } } - if(is_uks) { + if(not is_rks) { for( auto j = 0; j < nbf; ++j ) { for( auto i = 0; i < nbf; ++i ) { VXCz[i + j*ldvxcz] = 0.; } } } + if(is_gks) { + for( auto j = 0; j < nbf; ++j ) { + for( auto i = 0; i < nbf; ++i ) { + VXCy[i + j*ldvxcy] = 0.; + VXCx[i + j*ldvxcx] = 0.; + } + } + } *EXC = 0.; @@ -188,7 +292,7 @@ void ReferenceReplicatedXCHostIntegrator:: #pragma omp for schedule(dynamic) for( size_t iT = 0; iT < ntasks; ++iT ) { - + //std::cout << iT << "/" << ntasks << std::endl; // Alias current task const auto& task = tasks[iT]; @@ -203,13 +307,15 @@ void ReferenceReplicatedXCHostIntegrator:: const int32_t* shell_list = task.bfn_screening.shell_list.data(); // Allocate enough memory for batch - - const size_t spin_dim_scal = is_rks ? 1 : 2; + + const size_t spin_dim_scal = is_rks ? 1 : is_uks ? 2 : 4; // last case is_gks + const size_t sds = is_rks ? 1 : 2; + const size_t gks_mod_KH = is_gks ? 6*npts : 0; // used to store H and H const size_t mgga_dim_scal = func.is_mgga() ? 4 : 1; // basis + d1basis // Things that every calc needs host_data.nbe_scr .resize(nbe * nbe); - host_data.zmat .resize(npts * nbe * spin_dim_scal * mgga_dim_scal); + host_data.zmat .resize(npts * nbe * spin_dim_scal * mgga_dim_scal + gks_mod_KH); host_data.eps .resize(npts); host_data.vrho .resize(npts * spin_dim_scal); @@ -218,7 +324,7 @@ void ReferenceReplicatedXCHostIntegrator:: host_data.basis_eval .resize( npts * nbe ); host_data.den_scr .resize( npts * spin_dim_scal); } - + // GGA data requirements const size_t gga_dim_scal = is_rks ? 1 : 3; if( func.is_gga() ){ @@ -232,8 +338,8 @@ void ReferenceReplicatedXCHostIntegrator:: // TODO: Add check for Laplacian-dependent functionals if ( needs_laplacian ) { host_data.basis_eval .resize( 11 * npts * nbe ); // basis + grad (3) + hess (6) + lapl - host_data.lapl .resize( spin_dim_scal * npts ); - host_data.vlapl .resize( spin_dim_scal * npts ); + host_data.lapl .resize( spin_dim_scal * npts ); + host_data.vlapl .resize( spin_dim_scal * npts ); } else { host_data.basis_eval .resize( 4 * npts * nbe ); // basis + grad (3) } @@ -252,10 +358,16 @@ void ReferenceReplicatedXCHostIntegrator:: auto* zmat = host_data.zmat.data(); decltype(zmat) zmat_z = nullptr; + decltype(zmat) zmat_x = nullptr; + decltype(zmat) zmat_y = nullptr; if(!is_rks) { zmat_z = zmat + mgga_dim_scal * nbe * npts; } - + if(is_gks) { + zmat_x = zmat_z + nbe * npts; + zmat_y = zmat_x + nbe * npts; + } + auto* eps = host_data.eps.data(); auto* gamma = host_data.gamma.data(); auto* tau = host_data.tau.data(); @@ -279,6 +391,9 @@ void ReferenceReplicatedXCHostIntegrator:: value_type* dden_x_eval = nullptr; value_type* dden_y_eval = nullptr; value_type* dden_z_eval = nullptr; + value_type* K = nullptr; + value_type* H = nullptr; + if (is_gks) { K = zmat + npts * nbe * 4; } value_type* mmat_x = nullptr; value_type* mmat_y = nullptr; value_type* mmat_z = nullptr; @@ -293,6 +408,7 @@ void ReferenceReplicatedXCHostIntegrator:: dden_x_eval = den_eval + spin_dim_scal * npts; dden_y_eval = dden_x_eval + spin_dim_scal * npts; dden_z_eval = dden_y_eval + spin_dim_scal * npts; + if (is_gks) { H = K + 3*npts;} } if ( func.is_mgga() ) { @@ -306,18 +422,18 @@ void ReferenceReplicatedXCHostIntegrator:: mmat_y = mmat_x + npts * nbe; mmat_z = mmat_y + npts * nbe; if ( needs_laplacian ) { - d2basis_xx_eval = dbasis_z_eval + npts * nbe; - d2basis_xy_eval = d2basis_xx_eval + npts * nbe; - d2basis_xz_eval = d2basis_xy_eval + npts * nbe; - d2basis_yy_eval = d2basis_xz_eval + npts * nbe; - d2basis_yz_eval = d2basis_yy_eval + npts * nbe; - d2basis_zz_eval = d2basis_yz_eval + npts * nbe; - lbasis_eval = d2basis_zz_eval + npts * nbe; + d2basis_xx_eval = dbasis_z_eval + npts * nbe; + d2basis_xy_eval = d2basis_xx_eval + npts * nbe; + d2basis_xz_eval = d2basis_xy_eval + npts * nbe; + d2basis_yy_eval = d2basis_xz_eval + npts * nbe; + d2basis_yz_eval = d2basis_yy_eval + npts * nbe; + d2basis_zz_eval = d2basis_yz_eval + npts * nbe; + lbasis_eval = d2basis_zz_eval + npts * nbe; } if(is_uks) { - mmat_x_z = zmat_z + npts * nbe; - mmat_y_z = mmat_x_z + npts * nbe; - mmat_z_z = mmat_y_z + npts * nbe; + mmat_x_z = zmat_z + npts * nbe; + mmat_y_z = mmat_x_z + npts * nbe; + mmat_z_z = mmat_y_z + npts * nbe; } } @@ -330,11 +446,11 @@ void ReferenceReplicatedXCHostIntegrator:: // Evaluate Collocation (+ Grad and Hessian) if( func.is_mgga() ) { if ( needs_laplacian ) { - // TODO: Modify gau2grid to compute Laplacian instead of full hessian + // TODO: Modify gau2grid to compute Laplacian instead of full hessian lwd->eval_collocation_hessian( npts, nshells, nbe, points, basis, shell_list, basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, d2basis_xx_eval, - d2basis_xy_eval, d2basis_xz_eval, d2basis_yy_eval, d2basis_yz_eval, - d2basis_zz_eval); + d2basis_xy_eval, d2basis_xz_eval, d2basis_yy_eval, d2basis_yz_eval, + d2basis_zz_eval); blas::lacpy( 'A', nbe, npts, d2basis_xx_eval, nbe, lbasis_eval, nbe ); blas::axpy( nbe * npts, 1., d2basis_yy_eval, 1, lbasis_eval, 1); blas::axpy( nbe * npts, 1., d2basis_zz_eval, 1, lbasis_eval, 1); @@ -351,7 +467,7 @@ void ReferenceReplicatedXCHostIntegrator:: lwd->eval_collocation( npts, nshells, nbe, points, basis, shell_list, basis_eval ); - + // Evaluate X matrix (fac * P * B) -> store in Z const auto xmat_fac = is_rks ? 2.0 : 1.0; // TODO Fix for spinor RKS input lwd->eval_xmat( mgga_dim_scal * npts, nbf, nbe, submat_map, xmat_fac, Ps, ldps, basis_eval, nbe, @@ -362,19 +478,25 @@ void ReferenceReplicatedXCHostIntegrator:: lwd->eval_xmat( mgga_dim_scal * npts, nbf, nbe, submat_map, 1.0, Pz, ldpz, basis_eval, nbe, zmat_z, nbe, nbe_scr); } - - + + if(is_gks) { + lwd->eval_xmat( npts, nbf, nbe, submat_map, 1.0, Py, ldpy, basis_eval, nbe, + zmat_x, nbe, nbe_scr); + lwd->eval_xmat( npts, nbf, nbe, submat_map, 1.0, Px, ldpx, basis_eval, nbe, + zmat_y, nbe, nbe_scr); + } + // Evaluate U and V variables if( func.is_mgga() ) { if (is_rks) { lwd->eval_uvvar_mgga_rks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, - dbasis_z_eval, lbasis_eval, zmat, nbe, mmat_x, mmat_y, mmat_z, - nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl); + dbasis_z_eval, lbasis_eval, zmat, nbe, mmat_x, mmat_y, mmat_z, + nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl); } else if (is_uks) { lwd->eval_uvvar_mgga_uks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, - dbasis_z_eval, lbasis_eval, zmat, nbe, zmat_z, nbe, - mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe, - den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl); + dbasis_z_eval, lbasis_eval, zmat, nbe, zmat_z, nbe, + mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe, + den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl); } } else if ( func.is_gga() ) { if(is_rks) { @@ -385,13 +507,21 @@ void ReferenceReplicatedXCHostIntegrator:: lwd->eval_uvvar_gga_uks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, zmat, nbe, zmat_z, nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma ); + } else if(is_gks) { + lwd->eval_uvvar_gga_gks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, zmat, nbe, zmat_z, nbe, zmat_x, nbe, zmat_y, nbe, den_eval, dden_x_eval, + dden_y_eval, dden_z_eval, gamma, K, H, gks_dtol ); } + } else { if(is_rks) { lwd->eval_uvvar_lda_rks( npts, nbe, basis_eval, zmat, nbe, den_eval ); } else if(is_uks) { lwd->eval_uvvar_lda_uks( npts, nbe, basis_eval, zmat, nbe, zmat_z, nbe, den_eval ); + } else if(is_gks) { + lwd->eval_uvvar_lda_gks( npts, nbe, basis_eval, zmat, nbe, zmat_z, nbe, + zmat_x, nbe, zmat_y, nbe, den_eval, K, gks_dtol ); } } @@ -406,10 +536,10 @@ void ReferenceReplicatedXCHostIntegrator:: // Factor weights into XC results for( int32_t i = 0; i < npts; ++i ) { eps[i] *= weights[i]; - vrho[spin_dim_scal*i] *= weights[i]; - if(not is_rks) vrho[spin_dim_scal*i+1] *= weights[i]; + vrho[sds*i] *= weights[i]; + if(not is_rks) vrho[sds*i+1] *= weights[i]; } - + if( func.is_gga() ){ for( int32_t i = 0; i < npts; ++i ) { vgamma[gga_dim_scal*i] *= weights[i]; @@ -422,21 +552,21 @@ void ReferenceReplicatedXCHostIntegrator:: if( func.is_mgga() ){ for( int32_t i = 0; i < npts; ++i) { - vtau[spin_dim_scal*i] *= weights[i]; - vgamma[gga_dim_scal*i] *= weights[i]; - if(not is_rks) { - vgamma[gga_dim_scal*i+1] *= weights[i]; - vgamma[gga_dim_scal*i+2] *= weights[i]; - vtau[spin_dim_scal*i+1] *= weights[i]; - } - - // TODO: Add checks for Lapacian-dependent functionals - if( needs_laplacian ) { + vtau[spin_dim_scal*i] *= weights[i]; + vgamma[gga_dim_scal*i] *= weights[i]; + if(not is_rks) { + vgamma[gga_dim_scal*i+1] *= weights[i]; + vgamma[gga_dim_scal*i+2] *= weights[i]; + vtau[spin_dim_scal*i+1] *= weights[i]; + } + + // TODO: Add checks for Lapacian-dependent functionals + if( needs_laplacian ) { vlapl[spin_dim_scal*i] *= weights[i]; if(not is_rks) { vlapl[spin_dim_scal*i+1] *= weights[i]; - } - } + } + } } } @@ -445,17 +575,17 @@ void ReferenceReplicatedXCHostIntegrator:: // Evaluate Z matrix for VXC if( func.is_mgga() ) { if(is_rks) { - lwd->eval_zmat_mgga_vxc_rks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval, - dbasis_y_eval, dbasis_z_eval, lbasis_eval, - dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe); - lwd->eval_mmat_mgga_vxc_rks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, - mmat_x, mmat_y, mmat_z, nbe); + lwd->eval_zmat_mgga_vxc_rks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, lbasis_eval, + dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe); + lwd->eval_mmat_mgga_vxc_rks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, + mmat_x, mmat_y, mmat_z, nbe); } else if (is_uks) { - lwd->eval_zmat_mgga_vxc_uks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval, - dbasis_y_eval, dbasis_z_eval, lbasis_eval, - dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe, zmat_z, nbe); - lwd->eval_mmat_mgga_vxc_uks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, - mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe); + lwd->eval_zmat_mgga_vxc_uks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, lbasis_eval, + dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe, zmat_z, nbe); + lwd->eval_mmat_mgga_vxc_uks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, + mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe); } } else if( func.is_gga() ) { @@ -467,16 +597,25 @@ void ReferenceReplicatedXCHostIntegrator:: lwd->eval_zmat_gga_vxc_uks( npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe, zmat_z, nbe); + } else if(is_gks) { + lwd->eval_zmat_gga_vxc_gks( npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval, + dden_z_eval, zmat, nbe, zmat_z, nbe, zmat_x, nbe, zmat_y, nbe, + K, H); } + } else { if(is_rks) { lwd->eval_zmat_lda_vxc_rks( npts, nbe, vrho, basis_eval, zmat, nbe ); } else if(is_uks) { lwd->eval_zmat_lda_vxc_uks( npts, nbe, vrho, basis_eval, zmat, nbe, zmat_z, nbe ); + } else if(is_gks) { + lwd->eval_zmat_lda_vxc_gks( npts, nbe, vrho, basis_eval, zmat, nbe, zmat_z, nbe, + zmat_x, nbe, zmat_y, nbe, K); } } - + // Incremeta LT of VXC #pragma omp critical { @@ -492,7 +631,14 @@ void ReferenceReplicatedXCHostIntegrator:: if(not is_rks) { lwd->inc_vxc( mgga_dim_scal * npts, nbf, nbe, basis_eval, submat_map, zmat_z, nbe,VXCz, ldvxcz, nbe_scr); } - + if(is_gks) { + lwd->inc_vxc( npts, nbf, nbe, basis_eval, submat_map, zmat_x, nbe, VXCy, ldvxcy, + nbe_scr); + lwd->inc_vxc( npts, nbf, nbe, basis_eval, submat_map, zmat_y, nbe, VXCx, ldvxcx, + nbe_scr); + } + + } } // Loop over tasks @@ -512,6 +658,14 @@ void ReferenceReplicatedXCHostIntegrator:: } } } + if( is_gks) { + for( int32_t j = 0; j < nbf; ++j ) { + for( int32_t i = j+1; i < nbf; ++i ) { + VXCy[ j + i*ldvxcy ] = VXCy[ i + j*ldvxcy ]; + VXCx[ j + i*ldvxcx ] = VXCx[ i + j*ldvxcx ]; + } + } + } } diff --git a/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx b/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx index 6833e2f5..92c5bcf3 100644 --- a/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx +++ b/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx @@ -36,26 +36,53 @@ template void ReplicatedXCIntegratorImpl:: eval_exc_vxc( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* VXC, int64_t ldvxc, - value_type* EXC ) { + value_type* EXC, const IntegratorSettingsXC& ks_settings ) { - eval_exc_vxc_(m,n,P,ldp,VXC,ldvxc,EXC); + eval_exc_vxc_(m,n,P,ldp,VXC,ldvxc,EXC,ks_settings); } template void ReplicatedXCIntegratorImpl:: - eval_exc_vxc( int64_t m, int64_t n, const value_type* Pscalar, - int64_t ldpscalar, + eval_exc_vxc( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, const value_type* Pz, int64_t ldpz, - value_type* VXCscalar, int64_t ldvxcscalar, + value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC ) { + value_type* EXC, const IntegratorSettingsXC& ks_settings) { - eval_exc_vxc_(m,n,Pscalar,ldpscalar, + eval_exc_vxc_(m,n,Ps,ldps, Pz,ldpz, - VXCscalar,ldvxcscalar, - VXCz,ldvxcz,EXC); + VXCs,ldvxcs, + VXCz,ldvxcz,EXC, ks_settings); + +} + +template +void ReplicatedXCIntegratorImpl:: + eval_exc_vxc( int64_t m, int64_t n, const value_type* Ps, + int64_t ldps, + const value_type* Pz, + int64_t ldpz, + const value_type* Py, + int64_t ldpy, + const value_type* Px, + int64_t ldpx, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* VXCy, int64_t ldvxcy, + value_type* VXCx, int64_t ldvxcx, + value_type* EXC, const IntegratorSettingsXC& ks_settings ) { + + eval_exc_vxc_(m,n,Ps,ldps, + Pz,ldpz, + Py,ldpy, + Px,ldpx, + VXCs,ldvxcs, + VXCz,ldvxcz, + VXCy,ldvxcy, + VXCx,ldvxcx,EXC, ks_settings); } diff --git a/tests/ref_data/h3_blyp_cc-pvdz_ssf_gks.bin b/tests/ref_data/h3_blyp_cc-pvdz_ssf_gks.bin new file mode 100644 index 00000000..a9e4bacc Binary files /dev/null and b/tests/ref_data/h3_blyp_cc-pvdz_ssf_gks.bin differ diff --git a/tests/xc_integrator.cxx b/tests/xc_integrator.cxx index 5dbc90be..f10501c2 100644 --- a/tests/xc_integrator.cxx +++ b/tests/xc_integrator.cxx @@ -36,10 +36,10 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, using matrix_type = Eigen::MatrixXd; Molecule mol; BasisSet basis; - matrix_type P, Pz, VXC_ref, VXCz_ref, K_ref; + matrix_type P, Pz, Py, Px, VXC_ref, VXCz_ref, VXCy_ref, VXCx_ref, K_ref; double EXC_ref; std::vector EXC_GRAD_ref; - bool has_k = false, has_exc_grad = false, uks = false; + bool has_k = false, has_exc_grad = false, rks = true, uks = false, gks = false; { read_hdf5_record( mol, reference_file, "/MOLECULE" ); read_hdf5_record( basis, reference_file, "/BASIS" ); @@ -48,36 +48,66 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, std::string den="/DENSITY"; std::string den2="/DENSITY_Z"; + std::string den3="/DENSITY_Y"; + std::string den4="/DENSITY_X"; std::string vxc="/VXC"; std::string vxc2="VXC_Z"; - - if (file.exist("/DENSITY_Z")) { + std::string vxc3="VXC_Y"; + std::string vxc4="VXC_X"; + + if (file.exist("/DENSITY_Z")) { rks = false; } + + if (file.exist("/DENSITY_Z") and not file.exist("/DENSITY_Y") and not file.exist("/DENSITY_X")) { den="/DENSITY_SCALAR"; vxc="/VXC_SCALAR"; uks=true; } - + + if (file.exist("/DENSITY_X") and file.exist("/DENSITY_Y") and file.exist("/DENSITY_Z")) { + den="/DENSITY_SCALAR"; + vxc="/VXC_SCALAR"; + gks=true; + } + auto dset = file.getDataSet(den); auto dims = dset.getDimensions(); P = matrix_type( dims[0], dims[1] ); VXC_ref = matrix_type( dims[0], dims[1] ); - if (uks) { + if (not rks) { Pz = matrix_type( dims[0], dims[1] ); VXCz_ref = matrix_type( dims[0], dims[1] ); + } + if (gks) { + Py = matrix_type( dims[0], dims[1] ); + VXCy_ref = matrix_type( dims[0], dims[1] ); + Px = matrix_type( dims[0], dims[1] ); + VXCx_ref = matrix_type( dims[0], dims[1] ); } + dset.read( P.data() ); dset = file.getDataSet(vxc); dset.read( VXC_ref.data() ); - if (uks) { + if (not rks) { dset = file.getDataSet(den2); dset.read( Pz.data() ); dset = file.getDataSet(vxc2); dset.read( VXCz_ref.data() ); } - + + if (gks) { + dset = file.getDataSet(den3); + dset.read( Py.data() ); + dset = file.getDataSet(vxc3); + dset.read( VXCy_ref.data() ); + dset = file.getDataSet(den4); + dset.read( Px.data() ); + dset = file.getDataSet(vxc4); + dset.read( VXCx_ref.data() ); + } + dset = file.getDataSet("/EXC"); dset.read( &EXC_ref ); @@ -97,6 +127,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, } if(uks and ex == ExecutionSpace::Device) return; + if(gks and ex == ExecutionSpace::Device) return; if(func.is_mgga() and ex == ExecutionSpace::Device) return; @@ -127,7 +158,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, auto integrator = integrator_factory.get_instance( func, lb ); // Integrate Density - if( check_integrate_den and not uks ) { + if( check_integrate_den and rks) { auto N_EL_ref = std::accumulate( mol.begin(), mol.end(), 0ul, [](const auto& a, const auto &b) { return a + b.Z.get(); }); auto N_EL = integrator.integrate_den( P ); @@ -136,7 +167,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, } // Integrate EXC/VXC - if ( not uks ) { + if ( rks ) { auto [ EXC, VXC ] = integrator.eval_exc_vxc( P ); // Check EXC/VXC @@ -150,7 +181,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, auto VXC1_diff_nrm = ( VXC1 - VXC_ref ).norm(); CHECK( VXC1_diff_nrm / basis.nbf() < 1e-10 ); } - } else { + } else if (uks) { auto [ EXC, VXC, VXCz ] = integrator.eval_exc_vxc( P, Pz ); // Check EXC/VXC @@ -169,9 +200,40 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, CHECK( VXCz1_diff_nrm / basis.nbf() < 1e-10 ); } + } else if (gks) { + auto [ EXC, VXC, VXCz, VXCy, VXCx ] = integrator.eval_exc_vxc( P, Pz, Py, Px ); + + // Check EXC/VXC + auto VXC_diff_nrm = ( VXC - VXC_ref ).norm(); + auto VXCz_diff_nrm = ( VXCz - VXCz_ref ).norm(); + auto VXCy_diff_nrm = ( VXCy - VXCy_ref ).norm(); + auto VXCx_diff_nrm = ( VXCx - VXCx_ref ).norm(); + + CHECK( EXC == Approx( EXC_ref ) ); + CHECK( VXC_diff_nrm / basis.nbf() < 1e-10 ); + CHECK( VXCz_diff_nrm / basis.nbf() < 1e-10 ); + CHECK( VXCy_diff_nrm / basis.nbf() < 1e-10 ); + CHECK( VXCx_diff_nrm / basis.nbf() < 1e-10 ); + // Check if the integrator propagates state correctly + { + auto [ EXC1, VXC1, VXCz1, VXCy1, VXCx1] = integrator.eval_exc_vxc( P, Pz, Py, Px ); + CHECK( EXC1 == Approx( EXC_ref ) ); + auto VXC1_diff_nrm = ( VXC1 - VXC_ref ).norm(); + auto VXCz1_diff_nrm = ( VXCz1 - VXCz_ref ).norm(); + auto VXCy1_diff_nrm = ( VXCy1 - VXCy_ref ).norm(); + auto VXCx1_diff_nrm = ( VXCx1 - VXCx_ref ).norm(); + CHECK( VXC1_diff_nrm / basis.nbf() < 1e-10 ); + CHECK( VXCz1_diff_nrm / basis.nbf() < 1e-10 ); + CHECK( VXCy1_diff_nrm / basis.nbf() < 1e-10 ); + CHECK( VXCx1_diff_nrm / basis.nbf() < 1e-10 ); + } + } + + + // Check EXC Grad - if( check_grad and has_exc_grad and not uks) { + if( check_grad and has_exc_grad and rks) { auto EXC_GRAD = integrator.eval_exc_grad( P ); using map_type = Eigen::Map; map_type EXC_GRAD_ref_map( EXC_GRAD_ref.data(), mol.size(), 3 ); @@ -181,7 +243,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, } // Check K - if( has_k and check_k and not uks ) { + if( has_k and check_k and rks ) { auto K = integrator.eval_exx( P ); CHECK((K - K.transpose()).norm() < std::numeric_limits::epsilon()); // Symmetric CHECK( (K - K_ref).norm() / basis.nbf() < 1e-7 ); @@ -346,6 +408,12 @@ TEST_CASE( "XC Integrator", "[xc-integrator]" ) { func, PruningScheme::Robust ); } + //GKS GGA Test + SECTION( "H3 / BLYP / cc-pvdz" ) { + auto func = make_functional(blyp, pol); + test_integrator(GAUXC_REF_DATA_PATH "/h3_blyp_cc-pvdz_ssf_gks.bin", + func, PruningScheme::Unpruned ); + } // sn-LinK Test SECTION( "Benzene / PBE0 / 6-31G(d)" ) {