diff --git a/include/gauxc/xc_integrator.hpp b/include/gauxc/xc_integrator.hpp
index 547d07f1..3cffcf7e 100644
--- a/include/gauxc/xc_integrator.hpp
+++ b/include/gauxc/xc_integrator.hpp
@@ -33,6 +33,7 @@ class XCIntegrator {
 
   using exc_vxc_type_rks  = std::tuple< value_type, matrix_type >;
   using exc_vxc_type_uks  = std::tuple< value_type, matrix_type, matrix_type >;  
+  using exc_vxc_type_gks  = std::tuple< value_type, matrix_type, matrix_type, matrix_type, matrix_type >;
   using exc_grad_type = std::vector< value_type >;
   using exx_type      = matrix_type;
 
@@ -53,8 +54,12 @@ class XCIntegrator {
   XCIntegrator( XCIntegrator&& ) noexcept;
 
   value_type    integrate_den( const MatrixType& );
-  exc_vxc_type_rks  eval_exc_vxc ( const MatrixType& );
-  exc_vxc_type_uks  eval_exc_vxc ( const MatrixType&, const MatrixType& );
+  exc_vxc_type_rks  eval_exc_vxc ( const MatrixType&, 
+                                   const IntegratorSettingsXC& = IntegratorSettingsXC{} );
+  exc_vxc_type_uks  eval_exc_vxc ( const MatrixType&, const MatrixType&,
+                                   const IntegratorSettingsXC& = IntegratorSettingsXC{} );
+  exc_vxc_type_gks  eval_exc_vxc ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&,
+                                   const IntegratorSettingsXC& = IntegratorSettingsXC{});
   exc_grad_type eval_exc_grad( const MatrixType& );
   exx_type      eval_exx     ( const MatrixType&, 
                                const IntegratorSettingsEXX& = IntegratorSettingsEXX{} );
diff --git a/include/gauxc/xc_integrator/impl.hpp b/include/gauxc/xc_integrator/impl.hpp
index 7d839647..c7956b57 100644
--- a/include/gauxc/xc_integrator/impl.hpp
+++ b/include/gauxc/xc_integrator/impl.hpp
@@ -33,18 +33,26 @@ typename XCIntegrator<MatrixType>::value_type
 
 template <typename MatrixType>
 typename XCIntegrator<MatrixType>::exc_vxc_type_rks
-  XCIntegrator<MatrixType>::eval_exc_vxc( const MatrixType& P ) {
+  XCIntegrator<MatrixType>::eval_exc_vxc( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) {
   if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();
-  return pimpl_->eval_exc_vxc(P);
+  return pimpl_->eval_exc_vxc(P, ks_settings);
 };
 
 template <typename MatrixType>
 typename XCIntegrator<MatrixType>::exc_vxc_type_uks
-  XCIntegrator<MatrixType>::eval_exc_vxc( const MatrixType& Pscalar, const MatrixType& Pz ) {
+  XCIntegrator<MatrixType>::eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) {
   if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();
-  return pimpl_->eval_exc_vxc(Pscalar, Pz);
+  return pimpl_->eval_exc_vxc(Ps, Pz, ks_settings);
 };
 
+template <typename MatrixType>
+typename XCIntegrator<MatrixType>::exc_vxc_type_gks
+  XCIntegrator<MatrixType>::eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, 
+                                          const IntegratorSettingsXC& ks_settings ) {
+      if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();
+        return pimpl_->eval_exc_vxc(Ps, Pz, Py, Px, ks_settings);
+  };
+
 template <typename MatrixType>
 typename XCIntegrator<MatrixType>::exc_grad_type
   XCIntegrator<MatrixType>::eval_exc_grad( const MatrixType& P ) {
diff --git a/include/gauxc/xc_integrator/replicated/impl.hpp b/include/gauxc/xc_integrator/replicated/impl.hpp
index 8d3d23b6..132faa09 100644
--- a/include/gauxc/xc_integrator/replicated/impl.hpp
+++ b/include/gauxc/xc_integrator/replicated/impl.hpp
@@ -63,14 +63,14 @@ typename ReplicatedXCIntegrator<MatrixType>::value_type
 
 template <typename MatrixType>
 typename ReplicatedXCIntegrator<MatrixType>::exc_vxc_type_rks 
-  ReplicatedXCIntegrator<MatrixType>::eval_exc_vxc_( const MatrixType& P ) {
+  ReplicatedXCIntegrator<MatrixType>::eval_exc_vxc_( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) {
 
   if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();
   matrix_type VXC( P.rows(), P.cols() );
   value_type  EXC;
 
   pimpl_->eval_exc_vxc( P.rows(), P.cols(), P.data(), P.rows(),
-                        VXC.data(), VXC.rows(), &EXC );
+                        VXC.data(), VXC.rows(), &EXC, ks_settings );
 
   return std::make_tuple( EXC, VXC );
 
@@ -78,19 +78,44 @@ typename ReplicatedXCIntegrator<MatrixType>::exc_vxc_type_rks
 
 template <typename MatrixType>
 typename ReplicatedXCIntegrator<MatrixType>::exc_vxc_type_uks
-  ReplicatedXCIntegrator<MatrixType>::eval_exc_vxc_( const MatrixType& Pscalar, const MatrixType& Pz ) {
+  ReplicatedXCIntegrator<MatrixType>::eval_exc_vxc_( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) {
 
   if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();
-  matrix_type VXCscalar( Pscalar.rows(), Pscalar.cols() );
+  matrix_type VXCs( Ps.rows(), Ps.cols() );
   matrix_type VXCz( Pz.rows(), Pz.cols() );
   value_type  EXC;
 
-  pimpl_->eval_exc_vxc( Pscalar.rows(), Pscalar.cols(), Pscalar.data(), Pscalar.rows(),
+  pimpl_->eval_exc_vxc( Ps.rows(), Ps.cols(), Ps.data(), Ps.rows(),
                         Pz.data(), Pz.rows(),
-                        VXCscalar.data(), VXCscalar.rows(),
-                        VXCz.data(), VXCz.rows(), &EXC );
+                        VXCs.data(), VXCs.rows(),
+                        VXCz.data(), VXCz.rows(), &EXC, ks_settings );
 
-  return std::make_tuple( EXC, VXCscalar, VXCz );
+  return std::make_tuple( EXC, VXCs, VXCz );
+
+}
+
+template <typename MatrixType>
+typename ReplicatedXCIntegrator<MatrixType>::exc_vxc_type_gks
+  ReplicatedXCIntegrator<MatrixType>::eval_exc_vxc_( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px,
+                                                     const IntegratorSettingsXC& ks_settings) {
+
+  if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED();
+  matrix_type VXCs( Ps.rows(), Ps.cols() );
+  matrix_type VXCz( Pz.rows(), Pz.cols() );
+  matrix_type VXCy( Py.rows(), Py.cols() );
+  matrix_type VXCx( Px.rows(), Px.cols() );
+  value_type  EXC;
+
+  pimpl_->eval_exc_vxc( Ps.rows(), Ps.cols(), Ps.data(), Ps.rows(),
+                        Pz.data(), Pz.rows(),
+                        Py.data(), Py.rows(),
+                        Px.data(), Px.rows(),
+                        VXCs.data(), VXCs.rows(),
+                        VXCz.data(), VXCz.rows(),
+                        VXCy.data(), VXCy.rows(),
+                        VXCx.data(), VXCx.rows(), &EXC, ks_settings );
+
+  return std::make_tuple( EXC, VXCs, VXCz, VXCy, VXCx);
 
 }
 
diff --git a/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp b/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp
index 4e0854fd..893db54c 100644
--- a/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp
+++ b/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp
@@ -40,14 +40,28 @@ class ReplicatedXCIntegratorImpl {
                                int64_t ldp, value_type* N_EL ) = 0;
   virtual void eval_exc_vxc_( int64_t m, int64_t n, const value_type* P,
                               int64_t ldp, value_type* VXC, int64_t ldvxc,
-                              value_type* EXC ) = 0;
-  virtual void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar,
-                              int64_t ldpscalar,
+                              value_type* EXC, const IntegratorSettingsXC& ks_settings ) = 0;
+  virtual void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                              int64_t ldps,
                               const value_type* Pz,
                               int64_t ldpz,
-                              value_type* VXCscalar, int64_t ldvxcscalar,
+                              value_type* VXCs, int64_t ldvxcs,
                               value_type* VXCz, int64_t ldvxcz,
-                              value_type* EXC ) = 0;
+                              value_type* EXC, const IntegratorSettingsXC& ks_settings ) = 0;
+  virtual void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                              int64_t ldps,
+                              const value_type* Pz,
+                              int64_t ldpz,
+                              const value_type* Py,
+                              int64_t ldpy,
+                              const value_type* Px,
+                              int64_t ldpx,
+                              value_type* VXCs, int64_t ldvxcs,
+                              value_type* VXCz, int64_t ldvxcz,
+                              value_type* VXCy, int64_t ldvxcy,
+                              value_type* VXCx, int64_t ldvxcx,
+                              value_type* EXC, const IntegratorSettingsXC& ks_settings ) = 0;
+
   virtual void eval_exc_grad_( int64_t m, int64_t n, const value_type* P,
                                int64_t ldp, value_type* EXC_GRAD ) = 0;
   virtual void eval_exx_( int64_t m, int64_t n, const value_type* P,
@@ -69,15 +83,29 @@ class ReplicatedXCIntegratorImpl {
 
   void eval_exc_vxc( int64_t m, int64_t n, const value_type* P,
                      int64_t ldp, value_type* VXC, int64_t ldvxc,
-                     value_type* EXC ); 
+                     value_type* EXC, const IntegratorSettingsXC& ks_settings ); 
 
-  void eval_exc_vxc( int64_t m, int64_t n, const value_type* Pscalar,
-                     int64_t ldpscalar,
+  void eval_exc_vxc( int64_t m, int64_t n, const value_type* Ps,
+                     int64_t ldps,
                      const value_type* Pz,
                      int64_t ldpz,
-                     value_type* VXCscalar, int64_t ldvxcscalar,
+                     value_type* VXCs, int64_t ldvxcs,
                      value_type* VXCz, int64_t ldvxcz,
-                     value_type* EXC );
+                     value_type* EXC, const IntegratorSettingsXC& ks_settings );
+  void eval_exc_vxc( int64_t m, int64_t n, const value_type* Ps,
+                     int64_t ldps,
+                     const value_type* Pz,
+                     int64_t ldpz,
+                     const value_type* Py,
+                     int64_t ldpy,
+                     const value_type* Px,
+                     int64_t ldpx,
+                     value_type* VXCs, int64_t ldvxcs,
+                     value_type* VXCz, int64_t ldvxcz,
+                     value_type* VXCy, int64_t ldvxcy,
+                     value_type* VXCx, int64_t ldvxcx,
+                     value_type* EXC, const IntegratorSettingsXC& ks_settings );
+
 
   void eval_exc_grad( int64_t m, int64_t n, const value_type* P,
                       int64_t ldp, value_type* EXC_GRAD );
diff --git a/include/gauxc/xc_integrator/replicated_xc_integrator.hpp b/include/gauxc/xc_integrator/replicated_xc_integrator.hpp
index a2a536f5..945f955b 100644
--- a/include/gauxc/xc_integrator/replicated_xc_integrator.hpp
+++ b/include/gauxc/xc_integrator/replicated_xc_integrator.hpp
@@ -30,6 +30,7 @@ class ReplicatedXCIntegrator : public XCIntegratorImpl<MatrixType> {
   using value_type     = typename XCIntegratorImpl<MatrixType>::value_type;
   using exc_vxc_type_rks   = typename XCIntegratorImpl<MatrixType>::exc_vxc_type_rks;
   using exc_vxc_type_uks   = typename XCIntegratorImpl<MatrixType>::exc_vxc_type_uks;
+  using exc_vxc_type_gks   = typename XCIntegratorImpl<MatrixType>::exc_vxc_type_gks;
   using exc_grad_type  = typename XCIntegratorImpl<MatrixType>::exc_grad_type;
   using exx_type       = typename XCIntegratorImpl<MatrixType>::exx_type;
 
@@ -39,8 +40,9 @@ class ReplicatedXCIntegrator : public XCIntegratorImpl<MatrixType> {
   std::unique_ptr< pimpl_type > pimpl_;
 
   value_type    integrate_den_( const MatrixType& ) override;
-  exc_vxc_type_rks  eval_exc_vxc_ ( const MatrixType& ) override;
-  exc_vxc_type_uks  eval_exc_vxc_ ( const MatrixType&, const MatrixType& ) override;
+  exc_vxc_type_rks  eval_exc_vxc_ ( const MatrixType&, const IntegratorSettingsXC& ) override;
+  exc_vxc_type_uks  eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const IntegratorSettingsXC&) override;
+  exc_vxc_type_gks  eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, const IntegratorSettingsXC& ) override;
   exc_grad_type eval_exc_grad_( const MatrixType& ) override;
   exx_type      eval_exx_     ( const MatrixType&, const IntegratorSettingsEXX& ) override;
   const util::Timer& get_timings_() const override;
diff --git a/include/gauxc/xc_integrator/xc_integrator_impl.hpp b/include/gauxc/xc_integrator/xc_integrator_impl.hpp
index beb615a3..8a9ca618 100644
--- a/include/gauxc/xc_integrator/xc_integrator_impl.hpp
+++ b/include/gauxc/xc_integrator/xc_integrator_impl.hpp
@@ -22,14 +22,17 @@ class XCIntegratorImpl {
   using value_type     = typename matrix_type::value_type;
   using exc_vxc_type_rks   = typename XCIntegrator<MatrixType>::exc_vxc_type_rks;
   using exc_vxc_type_uks   = typename XCIntegrator<MatrixType>::exc_vxc_type_uks;
+  using exc_vxc_type_gks   = typename XCIntegrator<MatrixType>::exc_vxc_type_gks;
   using exc_grad_type  = typename XCIntegrator<MatrixType>::exc_grad_type;
   using exx_type       = typename XCIntegrator<MatrixType>::exx_type;
 
 protected:
 
   virtual value_type    integrate_den_( const MatrixType& P ) = 0;
-  virtual exc_vxc_type_rks  eval_exc_vxc_ ( const MatrixType& P ) = 0;
-  virtual exc_vxc_type_uks  eval_exc_vxc_ ( const MatrixType& Pscalar, const MatrixType& Pz ) = 0;
+  virtual exc_vxc_type_rks  eval_exc_vxc_ ( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) = 0;
+  virtual exc_vxc_type_uks  eval_exc_vxc_ ( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) = 0;
+  virtual exc_vxc_type_gks  eval_exc_vxc_ ( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, 
+                                            const IntegratorSettingsXC& ks_settings ) = 0;
   virtual exc_grad_type eval_exc_grad_( const MatrixType& P ) = 0;
   virtual exx_type      eval_exx_     ( const MatrixType&     P, 
                                         const IntegratorSettingsEXX& settings ) = 0;
@@ -62,12 +65,16 @@ class XCIntegratorImpl {
    *  @param[in] P The alpha density matrix
    *  @returns EXC / VXC in a combined structure
    */
-  exc_vxc_type_rks eval_exc_vxc( const MatrixType& P ) {
-    return eval_exc_vxc_(P);
+  exc_vxc_type_rks eval_exc_vxc( const MatrixType& P, const IntegratorSettingsXC& ks_settings ) {
+    return eval_exc_vxc_(P, ks_settings);
   }
 
-  exc_vxc_type_uks eval_exc_vxc( const MatrixType& Pscalar, const MatrixType& Pz ) {
-    return eval_exc_vxc_(Pscalar, Pz);
+  exc_vxc_type_uks eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) {
+    return eval_exc_vxc_(Ps, Pz, ks_settings);
+  }
+
+  exc_vxc_type_gks eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, const IntegratorSettingsXC& ks_settings ) {
+    return eval_exc_vxc_(Ps, Pz, Py, Px, ks_settings);
   }
 
   /** Integrate EXC gradient for RKS
diff --git a/include/gauxc/xc_integrator_settings.hpp b/include/gauxc/xc_integrator_settings.hpp
index 7a8c5074..11c21609 100644
--- a/include/gauxc/xc_integrator_settings.hpp
+++ b/include/gauxc/xc_integrator_settings.hpp
@@ -16,4 +16,9 @@ struct IntegratorSettingsSNLinK : public IntegratorSettingsEXX {
   double k_tol      = 1e-10;
 };
 
+struct IntegratorSettingsXC { virtual ~IntegratorSettingsXC() noexcept = default; };
+struct IntegratorSettingsKS : public IntegratorSettingsXC {
+  double gks_dtol = 1e-12;
+};
+
 }
diff --git a/src/xc_integrator/local_work_driver/device/local_device_work_driver.cxx b/src/xc_integrator/local_work_driver/device/local_device_work_driver.cxx
index 81e21517..49551850 100644
--- a/src/xc_integrator/local_work_driver/device/local_device_work_driver.cxx
+++ b/src/xc_integrator/local_work_driver/device/local_device_work_driver.cxx
@@ -44,12 +44,18 @@ FWD_TO_PIMPL(eval_uvvar_gga_rks)            // U/VVar GGA (density + grad, gamma
 FWD_TO_PIMPL(eval_uvvar_lda_uks)            // U/VVar LDA (density)
 FWD_TO_PIMPL(eval_uvvar_gga_uks)            // U/VVar GGA (density + grad, gamma)
 
+FWD_TO_PIMPL(eval_uvvar_lda_gks)            // U/VVar LDA (density)
+FWD_TO_PIMPL(eval_uvvar_gga_gks)            // U/VVar GGA (density + grad, gamma)
+
 FWD_TO_PIMPL(eval_zmat_lda_vxc_rks)         // Eval Z Matrix LDA VXC
 FWD_TO_PIMPL(eval_zmat_gga_vxc_rks)         // Eval Z Matrix GGA VXC
 
 FWD_TO_PIMPL(eval_zmat_lda_vxc_uks)         // Eval Z Matrix LDA VXC
 FWD_TO_PIMPL(eval_zmat_gga_vxc_uks)         // Eval Z Matrix GGA VXC
 
+FWD_TO_PIMPL(eval_zmat_lda_vxc_gks)         // Eval Z Matrix LDA VXC
+FWD_TO_PIMPL(eval_zmat_gga_vxc_gks)         // Eval Z Matrix GGA VXC
+
 FWD_TO_PIMPL(eval_exx_fmat)             // Eval EXX F Matrix
 //FWD_TO_PIMPL(eval_exx_gmat)             // Eval EXX G Matrix
 
diff --git a/src/xc_integrator/local_work_driver/device/local_device_work_driver.hpp b/src/xc_integrator/local_work_driver/device/local_device_work_driver.hpp
index 1d8c8336..a2b89e64 100644
--- a/src/xc_integrator/local_work_driver/device/local_device_work_driver.hpp
+++ b/src/xc_integrator/local_work_driver/device/local_device_work_driver.hpp
@@ -69,6 +69,9 @@ class LocalDeviceWorkDriver : public LocalWorkDriver {
   void eval_uvvar_lda_uks( XCDeviceData* );
   void eval_uvvar_gga_uks( XCDeviceData* );
 
+  void eval_uvvar_lda_gks( XCDeviceData* );
+  void eval_uvvar_gga_gks( XCDeviceData* );
+
   void eval_kern_exc_vxc_lda( const functional_type&, XCDeviceData* );
   void eval_kern_exc_vxc_gga( const functional_type&, XCDeviceData* );
 
@@ -78,6 +81,9 @@ class LocalDeviceWorkDriver : public LocalWorkDriver {
   void eval_zmat_lda_vxc_uks( XCDeviceData* );
   void eval_zmat_gga_vxc_uks( XCDeviceData* );
 
+  void eval_zmat_lda_vxc_gks( XCDeviceData* );
+  void eval_zmat_gga_vxc_gks( XCDeviceData* );
+
   void eval_exx_fmat( XCDeviceData* );
   void eval_exx_gmat( XCDeviceData*, const BasisSetMap& );
 
diff --git a/src/xc_integrator/local_work_driver/device/local_device_work_driver_pimpl.hpp b/src/xc_integrator/local_work_driver/device/local_device_work_driver_pimpl.hpp
index f9cfe6ca..4dd5e126 100644
--- a/src/xc_integrator/local_work_driver/device/local_device_work_driver_pimpl.hpp
+++ b/src/xc_integrator/local_work_driver/device/local_device_work_driver_pimpl.hpp
@@ -41,12 +41,16 @@ struct LocalDeviceWorkDriverPIMPL {
   virtual void eval_uvvar_gga_rks( XCDeviceData* ) = 0;
   virtual void eval_uvvar_lda_uks( XCDeviceData* ) = 0;
   virtual void eval_uvvar_gga_uks( XCDeviceData* ) = 0;
+  virtual void eval_uvvar_lda_gks( XCDeviceData* ) = 0;  
+  virtual void eval_uvvar_gga_gks( XCDeviceData* ) = 0;
   virtual void eval_kern_exc_vxc_lda( const functional_type&, XCDeviceData* ) = 0;
   virtual void eval_kern_exc_vxc_gga( const functional_type&, XCDeviceData* ) = 0;
   virtual void eval_zmat_lda_vxc_rks( XCDeviceData* ) = 0;
   virtual void eval_zmat_gga_vxc_rks( XCDeviceData* ) = 0;
   virtual void eval_zmat_lda_vxc_uks( XCDeviceData* ) = 0;
   virtual void eval_zmat_gga_vxc_uks( XCDeviceData* ) = 0;
+  virtual void eval_zmat_lda_vxc_gks( XCDeviceData* ) = 0;
+  virtual void eval_zmat_gga_vxc_gks( XCDeviceData* ) = 0;
   virtual void inc_exc( XCDeviceData* ) = 0;
   virtual void inc_nel( XCDeviceData* ) = 0;
   virtual void inc_vxc( XCDeviceData* ) = 0;
diff --git a/src/xc_integrator/local_work_driver/device/scheme1_base.cxx b/src/xc_integrator/local_work_driver/device/scheme1_base.cxx
index 4c5fdcd5..4d072627 100644
--- a/src/xc_integrator/local_work_driver/device/scheme1_base.cxx
+++ b/src/xc_integrator/local_work_driver/device/scheme1_base.cxx
@@ -288,6 +288,18 @@ void AoSScheme1Base::eval_zmat_gga_vxc_uks( XCDeviceData* ){
 
 }
 
+void AoSScheme1Base::eval_zmat_lda_vxc_gks( XCDeviceData* ){
+
+  GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+
+}
+
+void AoSScheme1Base::eval_zmat_gga_vxc_gks( XCDeviceData* ){
+
+  GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+
+}
+
 void AoSScheme1Base::eval_collocation( XCDeviceData* _data ) {
 
   auto* data = dynamic_cast<Data*>(_data);
@@ -489,6 +501,18 @@ void AoSScheme1Base::eval_uvvar_gga_uks( XCDeviceData* ){
 }
 
 
+void AoSScheme1Base::eval_uvvar_lda_gks( XCDeviceData* ){
+
+    GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+
+}
+
+void AoSScheme1Base::eval_uvvar_gga_gks( XCDeviceData* ){
+
+    GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+
+}
+
 void AoSScheme1Base::eval_kern_exc_vxc_lda( const functional_type& func, 
   XCDeviceData* _data ) {
 
diff --git a/src/xc_integrator/local_work_driver/device/scheme1_base.hpp b/src/xc_integrator/local_work_driver/device/scheme1_base.hpp
index 377272e3..e4a29af7 100644
--- a/src/xc_integrator/local_work_driver/device/scheme1_base.hpp
+++ b/src/xc_integrator/local_work_driver/device/scheme1_base.hpp
@@ -27,6 +27,11 @@ struct AoSScheme1Base : public detail::LocalDeviceWorkDriverPIMPL {
   void eval_zmat_lda_vxc_uks( XCDeviceData* ) override final;
   void eval_zmat_gga_vxc_uks( XCDeviceData* ) override final;
 
+  void eval_uvvar_lda_gks( XCDeviceData* ) override final;
+  void eval_uvvar_gga_gks( XCDeviceData* ) override final;
+  void eval_zmat_lda_vxc_gks( XCDeviceData* ) override final;
+  void eval_zmat_gga_vxc_gks( XCDeviceData* ) override final;
+
   void eval_kern_exc_vxc_lda( const functional_type&, XCDeviceData* ) override final;
   void eval_kern_exc_vxc_gga( const functional_type&, XCDeviceData* ) override final;
   void inc_exc( XCDeviceData* ) override final;
diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx b/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx
index fd58656b..bed0ab95 100644
--- a/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx
+++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver.cxx
@@ -167,6 +167,16 @@ void LocalHostWorkDriver::eval_uvvar_lda_uks( size_t npts, size_t nbe,
 
 }
 
+void LocalHostWorkDriver::eval_uvvar_lda_gks( size_t npts, size_t nbe,
+ const double* basis_eval, const double* Xs, size_t ldxs, const double* Xz,
+ size_t ldxz, const double* Xx, size_t ldxx, const double* Xy, size_t ldxy,
+ double* den_eval, double* K, const double dtol) {
+
+  throw_if_invalid_pimpl(pimpl_);
+  pimpl_->eval_uvvar_lda_gks(npts, nbe, basis_eval, Xs, ldxs, Xz, ldxz, Xx, ldxx, Xy, ldxy, den_eval, K, dtol);
+
+}
+
 
 // U/VVar GGA (density + grad, gamma)
 void LocalHostWorkDriver::eval_uvvar_gga_rks( size_t npts, size_t nbe, 
@@ -196,6 +206,21 @@ void LocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe,
 
 }
 
+void LocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe,
+  const double* basis_eval, const double* dbasis_x_eval,
+  const double *dbasis_y_eval, const double* dbasis_z_eval, const double* Xs,
+  size_t ldxs, const double* Xz, size_t ldxz, const double* Xx, size_t ldxx,
+  const double* Xy, size_t ldxy, double* den_eval, double* dden_x_eval, double* dden_y_eval,
+  double* dden_z_eval, double* gamma, double* K, double* H, const double dtol ) {
+
+  throw_if_invalid_pimpl(pimpl_);
+  pimpl_->eval_uvvar_gga_gks(npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval,
+    dbasis_z_eval, Xs, ldxs, Xz, ldxz, Xx, ldxx, Xy, ldxy, den_eval, dden_x_eval, dden_y_eval, dden_z_eval,
+    gamma, K, H, dtol);
+
+}
+
+
 // U/VVar MGGA(density, grad, gamma, tau, lapl)
 void LocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe,
   const double* basis_eval, const double* dbasis_x_eval, 
@@ -250,6 +275,18 @@ void LocalHostWorkDriver::eval_zmat_lda_vxc_uks( size_t npts, size_t nbe,
 
 }
 
+void LocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nbe,
+  const double* vrho, const double* basis_eval, double* Zs, size_t ldzs,
+  double* Zz, size_t ldzz,double* Zx, size_t ldzx, double* Zy, size_t ldzy, double* K ) {
+
+  throw_if_invalid_pimpl(pimpl_);
+  pimpl_->eval_zmat_lda_vxc_gks(npts, nbe, vrho, basis_eval, Zs, ldzs,
+    Zz, ldzz, Zx, ldzx, Zy, ldzy, K);
+
+
+}
+
+
 // Eval Z Matrix GGA VXC
 void LocalHostWorkDriver::eval_zmat_gga_vxc_rks( size_t npts, size_t nbe, 
   const double* vrho, const double* vgamma, const double* basis_eval, 
@@ -278,6 +315,19 @@ void LocalHostWorkDriver::eval_zmat_gga_vxc_uks( size_t npts, size_t nbe,
 
 }
 
+void LocalHostWorkDriver::eval_zmat_gga_vxc_gks( size_t npts, size_t nbe,
+  const double* vrho, const double* vgamma, const double* basis_eval,
+  const double* dbasis_x_eval, const double* dbasis_y_eval,
+  const double* dbasis_z_eval, const double* dden_x_eval,
+  const double* dden_y_eval, const double* dden_z_eval, double* Zs, size_t ldzs,
+  double* Zz, size_t ldzz, double* Zx, size_t ldzx,double* Zy, size_t ldzy, double* K, double* H ) {
+
+  throw_if_invalid_pimpl(pimpl_);
+  pimpl_->eval_zmat_gga_vxc_gks(npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval,
+    dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval, dden_z_eval,
+    Zs, ldzs, Zz, ldzz, Zx, ldzx, Zy, ldzy, K, H);
+
+}
 
 // Eval Z Matrix MGGA VXC
 void LocalHostWorkDriver::eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, 
diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp b/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp
index c5be4a87..5f6ee6de 100644
--- a/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp
+++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver.hpp
@@ -247,6 +247,11 @@ class LocalHostWorkDriver : public LocalWorkDriver {
     const double* Xs, size_t ldxs, const double* Xz, size_t ldxz,
     double* den_eval);
 
+  void eval_uvvar_lda_gks( size_t npts, size_t nbe, const double* basis_eval,
+    const double* Xs, size_t ldxs, const double* Xz, size_t ldxz,
+    const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, double* den_eval, double* K, const double dtol);
+
+
   /** Evaluate the U and V variavles for RKS GGA
    *
    *  U = rho + gradient
@@ -278,6 +283,13 @@ class LocalHostWorkDriver : public LocalWorkDriver {
     const double* Xz, size_t ldxz, double* den_eval,
     double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma );
 
+  void eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* basis_eval,
+    const double* dbasis_x_eavl, const double *dbasis_y_eval,
+    const double* dbasis_z_eval, const double* Xs, size_t ldxs,
+    const double* Xz, size_t ldxz, const double* Xx, size_t ldxx,
+    const double* Xy, size_t ldxy, double* den_eval,
+    double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* K, double* H, const double dtol );
+  
   /** Evaluate the U and V variavles for RKS MGGA
    *
    *  U = rho + gradient + tau + lapl
@@ -342,6 +354,10 @@ class LocalHostWorkDriver : public LocalWorkDriver {
     const double* basis_eval, double* Zs, size_t ldzs, double* Zz,
     size_t ldzz );
 
+  void eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, const double* vrho,
+    const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz,
+    double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K );
+
   /** Evaluate the VXC Z Matrix for RKS LDA
    *
    *  Z(mu,i) = 0.5 * vrho(i)   * B(mu, i) +
@@ -376,6 +392,12 @@ class LocalHostWorkDriver : public LocalWorkDriver {
     const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval,
     double* Zs, size_t ldzs, double* Zz, size_t ldzz );
 
+  void eval_zmat_gga_vxc_gks( size_t npts, size_t nbe, const double* vrho,
+    const double* vgamma, const double* basis_eval, const double* dbasis_x_eval,
+    const double* dbasis_y_eval, const double* dbasis_z_eval,
+    const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval,
+    double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx, 
+    double* Zy, size_t ldzy, double* K, double* H );
 
   /** Evaluate the VXC Z Matrix for RKS MGGA
    *
diff --git a/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp b/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp
index e44bd562..8cd57456 100644
--- a/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp
+++ b/src/xc_integrator/local_work_driver/host/local_host_work_driver_pimpl.hpp
@@ -80,6 +80,9 @@ struct LocalHostWorkDriverPIMPL {
   virtual void eval_uvvar_lda_uks( size_t npts, size_t nbe, const double* basis_eval,
     const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, 
     double* den_eval) = 0;
+  virtual void eval_uvvar_lda_gks( size_t npts, size_t nbe, const double* basis_eval,
+    const double* Xs, size_t ldxs, const double* Xz, size_t ldxz,
+    const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, double* den_eval, double* K, const double dtol) = 0;
 
   virtual void eval_uvvar_gga_rks( size_t npts, size_t nbe, const double* basis_eval,
     const double* dbasis_x_eval, const double *dbasis_y_eval, 
@@ -92,6 +95,12 @@ struct LocalHostWorkDriverPIMPL {
     const double* Xz, size_t ldxz, double* den_eval,
     double* dden_x_eval, double* dden_y_eval, double* dden_z_eval,
     double* gamma ) = 0;
+  virtual void eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* basis_eval,
+    const double* dbasis_x_eavl, const double *dbasis_y_eval,
+    const double* dbasis_z_eval, const double* Xs, size_t ldxs,
+    const double* Xz, size_t ldxz, const double* Xx, size_t ldxx,
+    const double* Xy, size_t ldxy, double* den_eval,
+    double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* K, double* H, const double dtol) = 0; 
 
   virtual void eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval,
       const double* dbasis_x_eval, const double* dbasis_y_eval,
@@ -115,6 +124,9 @@ struct LocalHostWorkDriverPIMPL {
     const double* basis_eval, double* Z, size_t ldz ) = 0;
   virtual void eval_zmat_lda_vxc_uks( size_t npts, size_t nbe, const double* vrho,
     const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) = 0;
+  virtual void eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, const double* vrho,
+    const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz,
+    double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K ) = 0;
 
   virtual void eval_zmat_gga_vxc_rks( size_t npts, size_t nbe, const double* vrho, 
     const double* vgamma, const double* basis_eval, const double* dbasis_x_eval,
@@ -126,6 +138,12 @@ struct LocalHostWorkDriverPIMPL {
     const double* dbasis_y_eval, const double* dbasis_z_eval,
     const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval,
     double* Zs, size_t ldzs, double* Zz, size_t ldzz ) = 0;
+  virtual void eval_zmat_gga_vxc_gks( size_t npts, size_t nbe, const double* vrho,
+    const double* vgamma, const double* basis_eval, const double* dbasis_x_eval,
+    const double* dbasis_y_eval, const double* dbasis_z_eval,
+    const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval,
+    double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx,
+    double* Zy, size_t ldzy, double* K, double* H ) = 0;
 
   virtual void eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vrho,
       const double* vgamma, const double* vlapl, const double* basis_eval,
diff --git a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx
index 4f57e7fa..5a184ba9 100644
--- a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx
+++ b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.cxx
@@ -159,7 +159,57 @@ namespace GauXC {
  
   }
   
-  
+  void ReferenceLocalHostWorkDriver::eval_uvvar_lda_gks( size_t npts, size_t nbe, const double* basis_eval,
+    const double* Xs, size_t ldxs, const double* Xz, size_t ldxz,
+    const double* Xx, size_t ldxx, const double* Xy, size_t ldxy, double* den_eval, double* K, const double dtol) {
+
+
+    auto *KZ = K; // KZ // store K in the Z matrix
+    auto *KY = KZ + npts;
+    auto *KX = KY + npts;
+
+    double dtolsq = dtol*dtol;
+ 
+    for( int32_t i = 0; i < (int32_t)npts; ++i ) {
+
+      const size_t ioffs = size_t(i) * ldxs;
+      const size_t ioffz = size_t(i) * ldxz;
+      const size_t ioffx = size_t(i) * ldxx;
+      const size_t ioffy = size_t(i) * ldxy;
+
+      const auto*   Xs_i = Xs + ioffs;
+      const auto*   Xz_i = Xz + ioffz;
+      const auto*   Xx_i = Xx + ioffx;
+      const auto*   Xy_i = Xy + ioffy;
+
+      const double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 );
+      const double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 );
+      const double rhox = blas::dot( nbe, basis_eval + ioffx, 1, Xx_i, 1 );
+      const double rhoy = blas::dot( nbe, basis_eval + ioffy, 1, Xy_i, 1 );
+ 
+      double mtemp = rhoz * rhoz + rhox * rhox + rhoy * rhoy;
+      double mnorm = 0;
+
+      if (mtemp > dtolsq) {
+        mnorm = sqrt(mtemp);
+        KZ[i] = rhoz / mnorm;
+        KY[i] = rhoy / mnorm;
+        KX[i] = rhox / mnorm;
+      } else {
+        mnorm = (1. / 3.) * (rhox + rhoy + rhoz);
+        KZ[i] = 1. / 3.;
+        KY[i] = 1. / 3.;
+        KX[i] = 1. / 3.;
+      }
+
+      den_eval[2*i]   = 0.5*(rhos + mnorm); // rho_+
+      den_eval[2*i+1] = 0.5*(rhos - mnorm); // rho_-
+
+    }
+
+  }
+
+
   void ReferenceLocalHostWorkDriver::eval_uvvar_gga_rks( size_t npts, size_t nbe, 
 						     const double* basis_eval, const double* dbasis_x_eval, 
 						     const double *dbasis_y_eval, const double* dbasis_z_eval, const double* X, 
@@ -244,6 +294,7 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_gga_uks( size_t npts, size_t nbe,
 
 }
 
+
 void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_rks( size_t npts, size_t nbe,
   const double* basis_eval, const double* dbasis_x_eval,
   const double *dbasis_y_eval, const double* dbasis_z_eval, const double* lbasis_eval,
@@ -347,17 +398,151 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
       tau[2*i+1] = 0.5*(taus - tauz);
 
       if (lapl != nullptr) {
-	auto lapls = 2. * blas::dot( nbe, lbasis_eval + ioffs, 1, Xs_i, 1) + 4. * taus;
-	auto laplz = 2. * blas::dot( nbe, lbasis_eval + ioffz, 1, Xz_i, 1) + 4. * tauz;
+        auto lapls = 2. * blas::dot( nbe, lbasis_eval + ioffs, 1, Xs_i, 1) + 4. * taus;
+        auto laplz = 2. * blas::dot( nbe, lbasis_eval + ioffz, 1, Xz_i, 1) + 4. * tauz;
 
-	lapl[2*i]   = 0.5*(lapls + laplz);
-	lapl[2*i+1] = 0.5*(lapls - laplz);
+        lapl[2*i]   = 0.5*(lapls + laplz);
+        lapl[2*i+1] = 0.5*(lapls - laplz);
       }
 
    }
 }
 
 
+
+void ReferenceLocalHostWorkDriver::eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* basis_eval,
+    const double* dbasis_x_eval, const double *dbasis_y_eval,
+    const double* dbasis_z_eval, const double* Xs, size_t ldxs,
+    const double* Xz, size_t ldxz, const double* Xx, size_t ldxx,
+    const double* Xy, size_t ldxy, double* den_eval,
+    double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma, double* K, double* H, const double dtol) {
+
+   auto *KZ = K; // KZ // store K in the Z matrix
+   auto *KY = KZ + npts;
+   auto *KX = KY + npts;
+
+   auto *HZ = H; // KZ // store K in the Z matrix
+   auto *HY = HZ + npts;
+   auto *HX = HY + npts;
+
+   double dtolsq = dtol*dtol;
+
+   for( int32_t i = 0; i < (int32_t)npts; ++i ) {
+
+      const size_t ioffs = size_t(i) * ldxs;
+      const size_t ioffz = size_t(i) * ldxz;
+      const size_t ioffx = size_t(i) * ldxx;
+      const size_t ioffy = size_t(i) * ldxy;
+
+      const auto*   Xs_i = Xs + ioffs;
+      const auto*   Xz_i = Xz + ioffz;
+      const auto*   Xx_i = Xx + ioffx;
+      const auto*   Xy_i = Xy + ioffy;
+
+      const double rhos = blas::dot( nbe, basis_eval + ioffs, 1, Xs_i, 1 );
+      const double rhoz = blas::dot( nbe, basis_eval + ioffz, 1, Xz_i, 1 );
+      const double rhox = blas::dot( nbe, basis_eval + ioffx, 1, Xx_i, 1 );
+      const double rhoy = blas::dot( nbe, basis_eval + ioffy, 1, Xy_i, 1 );
+
+      const auto dndx =
+        2. * blas::dot( nbe, dbasis_x_eval + ioffs, 1, Xs_i, 1 );
+      const auto dndy =
+        2. * blas::dot( nbe, dbasis_y_eval + ioffs, 1, Xs_i, 1 );
+      const auto dndz =
+        2. * blas::dot( nbe, dbasis_z_eval + ioffs, 1, Xs_i, 1 );
+
+      const auto dMzdx =
+        2. * blas::dot( nbe, dbasis_x_eval + ioffz, 1, Xz_i, 1 );
+      const auto dMzdy =
+        2. * blas::dot( nbe, dbasis_y_eval + ioffz, 1, Xz_i, 1 );
+      const auto dMzdz =
+        2. * blas::dot( nbe, dbasis_z_eval + ioffz, 1, Xz_i, 1 );
+
+      const auto dMxdx =
+        2. * blas::dot( nbe, dbasis_x_eval + ioffx, 1, Xx_i, 1 );
+      const auto dMxdy =
+        2. * blas::dot( nbe, dbasis_y_eval + ioffx, 1, Xx_i, 1 );
+      const auto dMxdz =
+        2. * blas::dot( nbe, dbasis_z_eval + ioffx, 1, Xx_i, 1 );
+
+      const auto dMydx =
+        2. * blas::dot( nbe, dbasis_x_eval + ioffy, 1, Xy_i, 1 );
+      const auto dMydy =
+        2. * blas::dot( nbe, dbasis_y_eval + ioffy, 1, Xy_i, 1 );
+      const auto dMydz =
+        2. * blas::dot( nbe, dbasis_z_eval + ioffy, 1, Xy_i, 1 );
+
+
+      dden_x_eval[4 * i] = dndx;
+      dden_y_eval[4 * i] = dndy;
+      dden_z_eval[4 * i] = dndz;
+
+      dden_x_eval[4 * i + 1] = dMzdx;
+      dden_y_eval[4 * i + 1] = dMzdy;
+      dden_z_eval[4 * i + 1] = dMzdz;
+
+      dden_x_eval[4 * i + 2] = dMydx;
+      dden_y_eval[4 * i + 2] = dMydy;
+      dden_z_eval[4 * i + 2] = dMydz;
+
+      dden_x_eval[4 * i + 3] = dMxdx;
+      dden_y_eval[4 * i + 3] = dMxdy;
+      dden_z_eval[4 * i + 3] = dMxdz;
+
+      double mtemp = rhoz * rhoz + rhox * rhox + rhoy * rhoy;
+      double mnorm = 0;
+
+      auto dels_dot_dels = dndx * dndx + dndy * dndy + dndz * dndz;
+      auto delz_dot_delz = dMzdx * dMzdx + dMzdy * dMzdy + dMzdz * dMzdz;
+      auto delx_dot_delx = dMxdx * dMxdx + dMxdy * dMxdy + dMxdz * dMxdz;
+      auto dely_dot_dely = dMydx * dMydx + dMydy * dMydy + dMydz * dMydz;
+
+      auto dels_dot_delz = dndx * dMzdx + dndy * dMzdy + dndz * dMzdz;
+      auto dels_dot_delx = dndx * dMxdx + dndy * dMxdy + dndz * dMxdz;
+      auto dels_dot_dely = dndx * dMydx + dndy * dMydy + dndz * dMydz;
+
+      auto sum = delz_dot_delz + delx_dot_delx + dely_dot_dely;
+      auto s_sum =
+          dels_dot_delz * rhoz + dels_dot_delx * rhox + dels_dot_dely * rhoy;
+
+      auto sqsum2 =
+          sqrt(dels_dot_delz * dels_dot_delz + dels_dot_delx * dels_dot_delx +
+               dels_dot_dely * dels_dot_dely);
+
+      double sign = 1.;
+      if (std::signbit(s_sum))
+        sign = -1.;
+
+      if (mtemp > dtolsq) {
+        mnorm = sqrt(mtemp);
+        KZ[i] = rhoz / mnorm;
+        KY[i] = rhoy / mnorm;
+        KX[i] = rhox / mnorm;
+        HZ[i] = sign * dels_dot_delz / sqsum2;
+        HY[i] = sign * dels_dot_dely / sqsum2;
+        HX[i] = sign * dels_dot_delx / sqsum2;
+      } else {
+        mnorm = (1. / 3.) * (rhox + rhoy + rhoz);
+        KZ[i] = 1. / 3.;
+        KY[i] = 1. / 3.;
+        KX[i] = 1. / 3.;
+
+        HZ[i] = sign / 3.;
+        HY[i] = sign / 3.;
+        HX[i] = sign / 3.;
+      }
+      
+      den_eval[2 * i] = 0.5 * (rhos + mnorm);
+      den_eval[2 * i + 1] = 0.5 * (rhos - mnorm);
+      
+      gamma[3 * i] = 0.25 * (dels_dot_dels + sum) + 0.5 * sign * sqsum2;
+      gamma[3 * i + 1] = 0.25 * (dels_dot_dels - sum);
+      gamma[3 * i + 2] = 0.25 * (dels_dot_dels + sum) - 0.5 * sign * sqsum2;
+
+
+    }
+
+}
   // Eval Z Matrix LDA VXC
   void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_rks( size_t npts, size_t nbf, 
 							const double* vrho, const double* basis_eval, double* Z, size_t ldz ) {
@@ -402,6 +587,40 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
 
   }
 
+void ReferenceLocalHostWorkDriver::eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, const double* vrho,
+    const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz,
+    double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K ) {
+
+  auto *KZ = K; // KZ // store K in the Z matrix
+  auto *KY = KZ + npts;
+  auto *KX = KY + npts;
+
+    blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zs, ldzs);
+    blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zz, ldzz);
+    blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zx, ldzx);
+    blas::lacpy( 'A', nbe, npts, basis_eval, nbe, Zy, ldzy);
+
+    for( int32_t i = 0; i < (int32_t)npts; ++i ) {
+
+      auto* zs_col = Zs + i*ldzs;
+      auto* zz_col = Zz + i*ldzz;
+      auto* zx_col = Zx + i*ldzx;
+      auto* zy_col = Zy + i*ldzy;
+
+      const double factp = 0.5 * vrho[2*i];
+      const double factm = 0.5 * vrho[2*i+1];
+      const double factor = 0.5 * (factp - factm);
+
+      //eq. 56 https://doi.org/10.1140/epjb/e2018-90170-1
+      GauXC::blas::scal( nbe, 0.5*(factp + factm), zs_col, 1 );
+      GauXC::blas::scal( nbe, KZ[i] * factor, zz_col, 1 );
+      GauXC::blas::scal( nbe, KX[i] * factor, zx_col, 1 );
+      GauXC::blas::scal( nbe, KY[i] * factor, zy_col, 1 );
+   
+    }
+
+}
+
   // Eval Z Matrix GGA VXC
   void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_rks( size_t npts, size_t nbf, 
 							const double* vrho, const double* vgamma, const double* basis_eval, 
@@ -493,15 +712,14 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
     }
   }
 
-
   // Eval Z Matrix MGGA VXC
-  void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_rks( size_t npts, size_t nbf, 
-							const double* vrho, const double* vgamma, const double* vlapl, 
-							const double* basis_eval, 
-							const double* dbasis_x_eval, const double* dbasis_y_eval, 
-							const double* dbasis_z_eval, const double* lbasis_eval,
-							const double* dden_x_eval, 
-							const double* dden_y_eval, const double* dden_z_eval, double* Z, size_t ldz ) {
+  void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_rks( size_t npts, size_t nbf,
+              const double* vrho, const double* vgamma, const double* vlapl,
+              const double* basis_eval,
+              const double* dbasis_x_eval, const double* dbasis_y_eval,
+              const double* dbasis_z_eval, const double* lbasis_eval,
+              const double* dden_x_eval,
+              const double* dden_y_eval, const double* dden_z_eval, double* Z, size_t ldz ) {
 
     if( ldz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS"));
     blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Z, nbf );
@@ -511,14 +729,14 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
       const int32_t ioff = i * nbf;
 
       auto* z_col    = Z + ioff;
-      auto* bf_x_col = dbasis_x_eval + ioff; 
-      auto* bf_y_col = dbasis_y_eval + ioff; 
-      auto* bf_z_col = dbasis_z_eval + ioff; 
+      auto* bf_x_col = dbasis_x_eval + ioff;
+      auto* bf_y_col = dbasis_y_eval + ioff;
+      auto* bf_z_col = dbasis_z_eval + ioff;
 
       const auto lda_fact = 0.5 * vrho[i];
       blas::scal( nbf, lda_fact, z_col, 1 );
 
-      const auto gga_fact = 2. * vgamma[i]; 
+      const auto gga_fact = 2. * vgamma[i];
       const auto x_fact = gga_fact * dden_x_eval[i];
       const auto y_fact = gga_fact * dden_y_eval[i];
       const auto z_fact = gga_fact * dden_z_eval[i];
@@ -528,7 +746,7 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
       blas::axpy( nbf, z_fact, bf_z_col, 1, z_col, 1 );
 
       if ( vlapl != nullptr ) {
-	auto* lbf_col = lbasis_eval + ioff;
+  auto* lbf_col = lbasis_eval + ioff;
         const auto lapl_fact = vlapl[i];
         blas::axpy( nbf, lapl_fact, lbf_col, 1, z_col, 1 );
       }
@@ -537,13 +755,12 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
 
   }
 
-
-  void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t nbf,
+void ReferenceLocalHostWorkDriver::eval_zmat_mgga_vxc_uks( size_t npts, size_t nbf,
               const double* vrho, const double* vgamma, const double* vlapl, 
-	      const double* basis_eval,
+        const double* basis_eval,
               const double* dbasis_x_eval, const double* dbasis_y_eval,
               const double* dbasis_z_eval, const double* lbasis_eval,
-	      const double* dden_x_eval,
+        const double* dden_x_eval,
               const double* dden_y_eval, const double* dden_z_eval, double* Zs, 
               size_t ldzs, double* Zz, size_t ldzz ) {
 
@@ -606,10 +823,10 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
   }
 
   void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_rks(size_t npts, size_t nbf, 
-							const double* vtau, const double* vlapl, 
-							const double* dbasis_x_eval, const double* dbasis_y_eval, 
-							const double* dbasis_z_eval,
-							double* mmat_x, double* mmat_y, double* mmat_z, size_t ldm ) {
+              const double* vtau, const double* vlapl, 
+              const double* dbasis_x_eval, const double* dbasis_y_eval, 
+              const double* dbasis_z_eval,
+              double* mmat_x, double* mmat_y, double* mmat_z, size_t ldm ) {
 
     if( ldm != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS"));
     
@@ -642,12 +859,12 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
     }
   }
 
-  void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks(size_t npts, size_t nbf, 
-							const double* vtau, const double* vlapl, 
-							const double* dbasis_x_eval, const double* dbasis_y_eval, 
-							const double* dbasis_z_eval,
-							double* mmat_xs, double* mmat_ys, double* mmat_zs, size_t ldms,
-							double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz) {
+void ReferenceLocalHostWorkDriver::eval_mmat_mgga_vxc_uks(size_t npts, size_t nbf, 
+              const double* vtau, const double* vlapl, 
+              const double* dbasis_x_eval, const double* dbasis_y_eval, 
+              const double* dbasis_z_eval,
+              double* mmat_xs, double* mmat_ys, double* mmat_zs, size_t ldms,
+              double* mmat_xz, double* mmat_yz, double* mmat_zz, size_t ldmz) {
 
     if( ldms != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS"));
     if( ldmz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS"));
@@ -687,8 +904,8 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
       if ( vlapl != nullptr ) {
         const auto lfactp = vlapl[2*i];
         const auto lfactm = vlapl[2*i+1];
-	const auto lfacts = 0.5*(lfactp + lfactm);
-	const auto lfactz = 0.5*(lfactp - lfactm);
+  const auto lfacts = 0.5*(lfactp + lfactm);
+  const auto lfactz = 0.5*(lfactp - lfactm);
         blas::axpy( nbf, lfacts, bf_x_col, 1, xs_col, 1);
         blas::axpy( nbf, lfacts, bf_y_col, 1, ys_col, 1);
         blas::axpy( nbf, lfacts, bf_z_col, 1, zs_col, 1);
@@ -701,6 +918,116 @@ void ReferenceLocalHostWorkDriver::eval_uvvar_mgga_uks( size_t npts, size_t nbe,
   }
 
 
+void ReferenceLocalHostWorkDriver::eval_zmat_gga_vxc_gks( size_t npts, size_t nbf, const double* vrho,
+    const double* vgamma, const double* basis_eval, const double* dbasis_x_eval,
+    const double* dbasis_y_eval, const double* dbasis_z_eval,
+    const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval,
+    double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx,
+    double* Zy, size_t ldzy, double* K, double* H ) {
+
+    auto *KZ = K; // KZ // store K in the Z matrix
+    auto *KY = KZ + npts;
+    auto *KX = KY + npts;
+
+    auto *HZ = H; // KZ // store K in the Z matrix
+    auto *HY = HZ + npts;
+    auto *HX = HY + npts;
+
+    if( ldzs != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS"));
+    if( ldzz != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS"));
+    if( ldzx != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS"));
+    if( ldzy != nbf ) GAUXC_GENERIC_EXCEPTION(std::string("INVALID DIMS"));
+
+    blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zs, ldzs);
+    blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zz, ldzz);
+    blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zx, ldzx);
+    blas::lacpy( 'A', nbf, npts, basis_eval, nbf, Zy, ldzy);   
+
+    for( int32_t i = 0; i < (int32_t)npts; ++i ) {
+
+      const int32_t ioff = i * nbf;
+
+      auto* zs_col = Zs + ioff;
+      auto* zz_col = Zz + ioff;
+      auto* zx_col = Zx + ioff;
+      auto* zy_col = Zy + ioff;
+
+      auto* bf_x_col = dbasis_x_eval + ioff;
+      auto* bf_y_col = dbasis_y_eval + ioff;
+      auto* bf_z_col = dbasis_z_eval + ioff;
+
+      const double factp = 0.5 * vrho[2*i];
+      const double factm = 0.5 * vrho[2*i+1];
+      const double factor = 0.5 * (factp - factm);
+
+      GauXC::blas::scal( nbf, 0.5*(factp + factm), zs_col, 1 ); //additional 0.5 is from eq 56 in petrone 2018 eur phys journal b "an efficent implementation of .. "
+      GauXC::blas::scal( nbf, KZ[i]*factor, zz_col, 1 );
+      GauXC::blas::scal( nbf, KX[i]*factor, zx_col, 1 );
+      GauXC::blas::scal( nbf, KY[i]*factor, zy_col, 1 );
+
+      const auto gga_fact_pp = vgamma[3 * i];
+      const auto gga_fact_pm = vgamma[3 * i + 1];
+      const auto gga_fact_mm = vgamma[3 * i + 2];
+
+      const auto gga_fact_1 = 0.5 * (gga_fact_pp + gga_fact_pm + gga_fact_mm);
+      const auto gga_fact_2 = 0.5 * (gga_fact_pp - gga_fact_mm);
+      const auto gga_fact_3 = 0.5 * (gga_fact_pp - gga_fact_pm + gga_fact_mm);
+
+      const auto x_fact_s = gga_fact_1 * dden_x_eval[4 * i] +
+                            gga_fact_2 * (HZ[i] * dden_x_eval[4 * i + 1] +
+                                          HY[i] * dden_x_eval[4 * i + 2] +
+                                          HX[i] * dden_x_eval[4 * i + 3]);
+      const auto y_fact_s = gga_fact_1 * dden_y_eval[4 * i] +
+                            gga_fact_2 * (HZ[i] * dden_y_eval[4 * i + 1] +
+                                          HY[i] * dden_y_eval[4 * i + 2] +
+                                          HX[i] * dden_y_eval[4 * i + 3]);
+      const auto z_fact_s = gga_fact_1 * dden_z_eval[4 * i] +
+                            gga_fact_2 * (HZ[i] * dden_z_eval[4 * i + 1] +
+                                          HY[i] * dden_z_eval[4 * i + 2] +
+                                          HX[i] * dden_z_eval[4 * i + 3]);
+
+      const auto x_fact_z = gga_fact_3 * dden_x_eval[4 * i + 1] +
+                            gga_fact_2 * HZ[i] * dden_x_eval[4 * i];
+      const auto y_fact_z = gga_fact_3 * dden_y_eval[4 * i + 1] +
+                            gga_fact_2 * HZ[i] * dden_y_eval[4 * i];
+      const auto z_fact_z = gga_fact_3 * dden_z_eval[4 * i + 1] +
+                            gga_fact_2 * HZ[i] * dden_z_eval[4 * i];
+
+      const auto x_fact_x = gga_fact_3 * dden_x_eval[4 * i + 3] +
+                            gga_fact_2 * HX[i] * dden_x_eval[4 * i];
+      const auto y_fact_x = gga_fact_3 * dden_y_eval[4 * i + 3] +
+                            gga_fact_2 * HX[i] * dden_y_eval[4 * i];
+      const auto z_fact_x = gga_fact_3 * dden_z_eval[4 * i + 3] +
+                            gga_fact_2 * HX[i] * dden_z_eval[4 * i];
+
+      const auto x_fact_y = gga_fact_3 * dden_x_eval[4 * i + 2] +
+                            gga_fact_2 * HY[i] * dden_x_eval[4 * i];
+      const auto y_fact_y = gga_fact_3 * dden_y_eval[4 * i + 2] +
+                            gga_fact_2 * HY[i] * dden_y_eval[4 * i];
+      const auto z_fact_y = gga_fact_3 * dden_z_eval[4 * i + 2] +
+                            gga_fact_2 * HY[i] * dden_z_eval[4 * i];
+
+
+      blas::axpy(nbf, x_fact_s, bf_x_col, 1, zs_col, 1);
+      blas::axpy(nbf, y_fact_s, bf_y_col, 1, zs_col, 1);
+      blas::axpy(nbf, z_fact_s, bf_z_col, 1, zs_col, 1);
+
+      blas::axpy(nbf, x_fact_z, bf_x_col, 1, zz_col, 1);
+      blas::axpy(nbf, y_fact_z, bf_y_col, 1, zz_col, 1);
+      blas::axpy(nbf, z_fact_z, bf_z_col, 1, zz_col, 1);
+
+      blas::axpy(nbf, x_fact_x, bf_x_col, 1, zx_col, 1);
+      blas::axpy(nbf, y_fact_x, bf_y_col, 1, zx_col, 1);
+      blas::axpy(nbf, z_fact_x, bf_z_col, 1, zx_col, 1);
+
+      blas::axpy(nbf, x_fact_y, bf_x_col, 1, zy_col, 1);
+      blas::axpy(nbf, y_fact_y, bf_y_col, 1, zy_col, 1);
+      blas::axpy(nbf, z_fact_y, bf_z_col, 1, zy_col, 1);
+
+    }
+
+}
+
   // Increment VXC by Z
   void ReferenceLocalHostWorkDriver::inc_vxc( size_t npts, size_t nbf, size_t nbe, 
 					      const double* basis_eval, const submat_map_t& submat_map, const double* Z,
diff --git a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp
index fcaa84cd..fece15a8 100644
--- a/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp
+++ b/src/xc_integrator/local_work_driver/host/reference_local_host_work_driver.hpp
@@ -82,6 +82,10 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL {
   void eval_uvvar_lda_uks( size_t npts, size_t nbe, const double* basis_eval,
     const double* Xs, size_t ldxs, const double* Xz, size_t ldxz, 
     double* den_eval) override;
+  void eval_uvvar_lda_gks( size_t npts, size_t nbe, const double* basis_eval,
+    const double* Xs, size_t ldxs, const double* Xz, size_t ldxz,
+    const double* Xx, size_t ldxx, const double* Xy, size_t ldxy,
+    double* den_eval, double* K, const double dtol) override;
 
   void eval_uvvar_gga_rks( size_t npts, size_t nbe, const double* basis_eval,
     const double* dbasis_x_eval, const double *dbasis_y_eval, 
@@ -94,7 +98,13 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL {
     const double* Xz, size_t ldxz, double* den_eval,
     double* dden_x_eval, double* dden_y_eval, double* dden_z_eval,
     double* gamma ) override;
-
+  void eval_uvvar_gga_gks( size_t npts, size_t nbe, const double* basis_eval,
+    const double* dbasis_x_eavl, const double *dbasis_y_eval,
+    const double* dbasis_z_eval, const double* Xs, size_t ldxs,
+    const double* Xz, size_t ldxz, const double* Xx, size_t ldxx,
+    const double* Xy, size_t ldxy, double* den_eval,
+    double* dden_x_eval, double* dden_y_eval, double* dden_z_eval, double* gamma,
+    double* K, double* H, const double dtol ) override;
 
   void eval_uvvar_mgga_rks( size_t npts, size_t nbe, const double* basis_eval,
     const double* dbasis_x_eval, const double* dbasis_y_eval, 
@@ -118,7 +128,9 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL {
     const double* basis_eval, double* Z, size_t ldz ) override;
   void eval_zmat_lda_vxc_uks( size_t npts, size_t nbe, const double* vrho,
     const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz ) override;
-
+  void eval_zmat_lda_vxc_gks( size_t npts, size_t nbe, const double* vrho,
+    const double* basis_eval, double* Zs, size_t ldzs, double* Zz, size_t ldzz,
+    double* Zx, size_t ldzx,double* Zy, size_t ldzy, double *K ) override;
 
   void eval_zmat_gga_vxc_rks( size_t npts, size_t nbe, const double* vrho, 
     const double* vgamma, const double* basis_eval, const double* dbasis_x_eval,
@@ -130,6 +142,12 @@ struct ReferenceLocalHostWorkDriver : public detail::LocalHostWorkDriverPIMPL {
     const double* dbasis_y_eval, const double* dbasis_z_eval,
     const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval,
     double* Zs, size_t ldzs, double* Zz, size_t ldzz ) override;
+  void eval_zmat_gga_vxc_gks( size_t npts, size_t nbe, const double* vrho,
+    const double* vgamma, const double* basis_eval, const double* dbasis_x_eval,
+    const double* dbasis_y_eval, const double* dbasis_z_eval,
+    const double* dden_x_eval, const double* dden_y_eval, const double* dden_z_eval,
+    double* Zs, size_t ldzs, double* Zz, size_t ldzz, double* Zx, size_t ldzx,
+    double* Zy, size_t ldzy, double* K, double* H ) override;
 
 
   void eval_zmat_mgga_vxc_rks( size_t npts, size_t nbe, const double* vrho, 
diff --git a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp
index cedb12df..f2439ad4 100644
--- a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp
+++ b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp
@@ -33,15 +33,29 @@ class IncoreReplicatedXCDeviceIntegrator :
 
   void eval_exc_vxc_( int64_t m, int64_t n, const value_type* P,
                       int64_t ldp, value_type* VXC, int64_t ldvxc,
-                      value_type* EXC ) override;
+                      value_type* EXC, const IntegratorSettingsXC& settings) override;
 
-  void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar,
-                      int64_t ldpscalar,
+  void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
                       const value_type* Pz,
                       int64_t ldpz,
-                      value_type* VXCscalar, int64_t ldvxcscalar,
+                      value_type* VXCs, int64_t ldvxcs,
                       value_type* VXCz, int64_t ldvxcz,
-                      value_type* EXC ) override;
+                      value_type* EXC, const IntegratorSettingsXC& settings ) override;
+
+  void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
+                      const value_type* Pz,
+                      int64_t ldpz,
+                      const value_type* Py,
+                      int64_t ldpy,
+                      const value_type* Px,
+                      int64_t ldpx,
+                      value_type* VXCs, int64_t ldvxcs,
+                      value_type* VXCz, int64_t ldvxcz,
+                      value_type* VXCy, int64_t ldvxcy,
+                      value_type* VXCx, int64_t ldvxcx,
+                      value_type* EXC, const IntegratorSettingsXC& settings ) override;
 
 
   void eval_exc_grad_( int64_t m, int64_t n, const value_type* P,
@@ -66,18 +80,36 @@ class IncoreReplicatedXCDeviceIntegrator :
                             host_task_iterator task_begin, host_task_iterator task_end,
                             XCDeviceData& device_data );
 
-  void exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar,
+  void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
                                 const value_type* Pz, int64_t ldpz,
                             host_task_iterator task_begin, host_task_iterator task_end,
                             XCDeviceData& device_data );
 
-  void exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar,
+  void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
                             const value_type* Pz, int64_t ldpz,
                             value_type* VXC, int64_t ldvxc,
                             value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL,
                             host_task_iterator task_begin, host_task_iterator task_end,
                             XCDeviceData& device_data );
 
+  void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
+                            const value_type* Pz, int64_t ldpz,
+                            const value_type* Py, int64_t ldpy,
+                            const value_type* Px, int64_t ldpx,
+                            host_task_iterator task_begin, host_task_iterator task_end,
+                            XCDeviceData& device_data );
+
+  void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
+                            const value_type* Pz, int64_t ldpz,
+                            const value_type* Py, int64_t ldpy,
+                            const value_type* Px, int64_t ldpx,
+                            value_type* VXC, int64_t ldvxc,
+                            value_type* VXCz, int64_t ldvxcz,
+                            value_type* VXCy, int64_t ldvxcy,
+                            value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL,
+                            host_task_iterator task_begin, host_task_iterator task_end,
+                            XCDeviceData& device_data );
+
   void eval_exc_grad_local_work_( const basis_type& basis, const value_type* P, int64_t ldp, 
                                   host_task_iterator task_begin, host_task_iterator task_end,
                                   XCDeviceData& device_data );
diff --git a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp
index 16de808b..3345bf08 100644
--- a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp
+++ b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp
@@ -19,7 +19,7 @@ template <typename ValueType>
 void IncoreReplicatedXCDeviceIntegrator<ValueType>::
   eval_exc_vxc_( int64_t m, int64_t n, const value_type* P,
                  int64_t ldp, value_type* VXC, int64_t ldvxc,
-                 value_type* EXC ) {
+                 value_type* EXC, const IntegratorSettingsXC& settings ) {
 
 
   const auto& basis = this->load_balancer_->basis();
@@ -109,17 +109,37 @@ void IncoreReplicatedXCDeviceIntegrator<ValueType>::
 
 template <typename ValueType>
 void IncoreReplicatedXCDeviceIntegrator<ValueType>::
-  eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar,
-                      int64_t ldpscalar,
+  eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
                       const value_type* Pz,
                       int64_t ldpz,
-                      value_type* VXCscalar, int64_t ldvxcscalar,
+                      value_type* VXCs, int64_t ldvxcs,
                       value_type* VXCz, int64_t ldvxcz,
-                      value_type* EXC ) {
-  GauXC::util::unused(m,n,Pscalar,ldpscalar,Pz,ldpz,VXCscalar,ldvxcscalar,VXCz,ldvxcz,EXC);
+                      value_type* EXC, const IntegratorSettingsXC& settings ) {
+  GauXC::util::unused(m,n,Ps,ldps,Pz,ldpz,VXCs,ldvxcs,VXCz,ldvxcz,EXC,settings);
   GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE");
 }
 
+template <typename ValueType>
+void IncoreReplicatedXCDeviceIntegrator<ValueType>::
+  eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
+                      const value_type* Pz,
+                      int64_t ldpz,
+                      const value_type* Py,
+                      int64_t ldpy,
+                      const value_type* Px,
+                      int64_t ldpx,
+                      value_type* VXCs, int64_t ldvxcs,
+                      value_type* VXCz, int64_t ldvxcz,
+                      value_type* VXCy, int64_t ldvxcy,
+                      value_type* VXCx, int64_t ldvxcx,
+                      value_type* EXC, const IntegratorSettingsXC& settings ) {
+  GauXC::util::unused(m,n,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,VXCs,ldvxcs,VXCz,ldvxcz,VXCy,ldvxcy,VXCx,ldvxcx,EXC,settings);
+  GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+}
+
+
 template <typename ValueType>
 void IncoreReplicatedXCDeviceIntegrator<ValueType>::
   exc_vxc_local_work_( const basis_type& basis, const value_type* P, int64_t ldp, 
@@ -263,27 +283,57 @@ void IncoreReplicatedXCDeviceIntegrator<ValueType>::
 
 template <typename ValueType>
 void IncoreReplicatedXCDeviceIntegrator<ValueType>::
-  exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar,
+  exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
                                 const value_type* Pz, int64_t ldpz,
                             host_task_iterator task_begin, host_task_iterator task_end,
                             XCDeviceData& device_data ) {
-  GauXC::util::unused(basis,Pscalar,ldpscalar,Pz,ldpz,task_begin,task_end,device_data);
+  GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,task_begin,task_end,device_data);
   GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE");
 }
 
 template <typename ValueType>
 void IncoreReplicatedXCDeviceIntegrator<ValueType>::
-  exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar,
+  exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
                             const value_type* Pz, int64_t ldpz,
-                            value_type* VXCscalar, int64_t ldvxcscalar,
+                            value_type* VXCs, int64_t ldvxcs,
                             value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL,
                             host_task_iterator task_begin, host_task_iterator task_end,
                             XCDeviceData& device_data ) {
 
-  GauXC::util::unused(basis,Pscalar,ldpscalar,Pz,ldpz,VXCscalar,ldvxcscalar,VXCz,ldvxcz,EXC,N_EL,task_begin,task_end,device_data);
+  GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,VXCs,ldvxcs,VXCz,ldvxcz,EXC,N_EL,task_begin,task_end,device_data);
   GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE");
 }
 
+template <typename ValueType>
+void IncoreReplicatedXCDeviceIntegrator<ValueType>::
+  exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
+                            const value_type* Pz, int64_t ldpz,
+                            const value_type* Py, int64_t ldpy,
+                            const value_type* Px, int64_t ldpx,
+                            host_task_iterator task_begin, host_task_iterator task_end,
+                            XCDeviceData& device_data ) {
+  GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,task_begin,task_end,device_data);
+  GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+}
+
+template <typename ValueType>
+void IncoreReplicatedXCDeviceIntegrator<ValueType>::
+  exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
+                            const value_type* Pz, int64_t ldpz,
+                            const value_type* Py, int64_t ldpy,
+                            const value_type* Px, int64_t ldpx,   
+                            value_type* VXCs, int64_t ldvxcs,
+                            value_type* VXCz, int64_t ldvxcz,
+                            value_type* VXCy, int64_t ldvxcy,
+                            value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL,
+                            host_task_iterator task_begin, host_task_iterator task_end,
+                            XCDeviceData& device_data ) {
+
+  GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,VXCs,ldvxcs,VXCz,ldvxcz,VXCy,ldvxcy,VXCx,ldvxcx,EXC,N_EL,task_begin,task_end,device_data);
+  GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+}
+
+
 }
 }
 
diff --git a/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator.hpp b/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator.hpp
index d41e5fcd..e446df35 100644
--- a/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator.hpp
+++ b/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator.hpp
@@ -44,15 +44,29 @@ class ShellBatchedReplicatedXCDeviceIntegrator :
 
   void eval_exc_vxc_( int64_t m, int64_t n, const value_type* P,
                       int64_t ldp, value_type* VXC, int64_t ldvxc,
-                      value_type* EXC ) override;
+                      value_type* EXC, const IntegratorSettingsXC& settings ) override;
 
-  void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar,
-                      int64_t ldpscalar,
+  void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
                       const value_type* Pz,
                       int64_t ldpz,
-                      value_type* VXCscalar, int64_t ldvxcscalar,
+                      value_type* VXCs, int64_t ldvxcs,
                       value_type* VXCz, int64_t ldvxcz,
-                      value_type* EXC ) override;
+                      value_type* EXC, const IntegratorSettingsXC& settings ) override;
+
+  void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
+                      const value_type* Pz,
+                      int64_t ldpz,
+                      const value_type* Py,
+                      int64_t ldpy,
+                      const value_type* Px,
+                      int64_t ldpx,
+                      value_type* VXCs, int64_t ldvxcs,
+                      value_type* VXCz, int64_t ldvxcz,
+                      value_type* VXCy, int64_t ldvxcy,
+                      value_type* VXCx, int64_t ldvxcx,
+                      value_type* EXC, const IntegratorSettingsXC& settings ) override;
 
   void eval_exc_grad_( int64_t m, int64_t n, const value_type* P,
                        int64_t ldp, value_type* EXC_GRAD ) override;
@@ -67,13 +81,24 @@ class ShellBatchedReplicatedXCDeviceIntegrator :
                             incore_integrator_type& incore_integrator,
                             XCDeviceData& device_data );
 
-  void exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar,
+  void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
                             const value_type* Pz, int64_t ldpz,
-                            value_type* VXCscalar, int64_t ldvxcscalar,
+                            value_type* VXCs, int64_t ldvxcs,
                             value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL,
                             host_task_iterator task_begin, host_task_iterator task_end,
                             XCDeviceData& device_data );
 
+  void exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
+                            const value_type* Pz, int64_t ldpz,
+                            const value_type* Py, int64_t ldpy,
+                            const value_type* Px, int64_t ldpx,
+                            value_type* VXC, int64_t ldvxc,
+                            value_type* VXCz, int64_t ldvxcz,
+                            value_type* VXCy, int64_t ldvxcy,
+                            value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL,
+                            host_task_iterator task_begin, host_task_iterator task_end,
+                            XCDeviceData& device_data );
+
   void eval_exc_grad_local_work_( int64_t m, int64_t n, const value_type* P,
                                   int64_t ldp, value_type* EXC_GRAD, 
                                   host_task_iterator task_begin, host_task_iterator task_end,
diff --git a/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator_exc_vxc.hpp
index 87093e2b..64f161d8 100644
--- a/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator_exc_vxc.hpp
+++ b/src/xc_integrator/replicated/device/shellbatched_replicated_xc_device_integrator_exc_vxc.hpp
@@ -28,7 +28,7 @@ template <typename ValueType>
 void ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
   eval_exc_vxc_( int64_t m, int64_t n, const value_type* P,
                  int64_t ldp, value_type* VXC, int64_t ldvxc,
-                 value_type* EXC ) {
+                 value_type* EXC, const IntegratorSettingsXC& settings ) {
 
 
   const auto& basis = this->load_balancer_->basis();
@@ -87,18 +87,39 @@ void ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
 
 template <typename ValueType>
 void ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
-  eval_exc_vxc_( int64_t m, int64_t n, const value_type* Pscalar,
-                      int64_t ldpscalar,
+  eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
                       const value_type* Pz,
                       int64_t ldpz,
-                      value_type* VXCscalar, int64_t ldvxcscalar,
+                      value_type* VXCs, int64_t ldvxcs,
                       value_type* VXCz, int64_t ldvxcz,
-                      value_type* EXC ) {
+                      value_type* EXC, const IntegratorSettingsXC& settings ) {
 
-  GauXC::util::unused(m,n,Pscalar,ldpscalar,Pz,ldpz,VXCscalar,ldvxcscalar,VXCz,ldvxcz,EXC);
+  GauXC::util::unused(m,n,Ps,ldps,Pz,ldpz,VXCs,ldvxcs,VXCz,ldvxcz,EXC,settings);
   GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE");
 }
 
+
+template <typename ValueType>
+void ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
+  eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
+                      const value_type* Pz,
+                      int64_t ldpz,
+                      const value_type* Py,
+                      int64_t ldpy,
+                      const value_type* Px,
+                      int64_t ldpx,
+                      value_type* VXCs, int64_t ldvxcs,
+                      value_type* VXCz, int64_t ldvxcz,
+                      value_type* VXCy, int64_t ldvxcy,
+                      value_type* VXCx, int64_t ldvxcx,
+                      value_type* EXC, const IntegratorSettingsXC& settings ) {
+  GauXC::util::unused(m,n,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,VXCs,ldvxcs,VXCz,ldvxcz,VXCy,ldvxcy,VXCx,ldvxcx,EXC,settings);
+  GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+}
+
+
 template <typename ValueType>
 void ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
   exc_vxc_local_work_( const basis_type& basis, const value_type* P, int64_t ldp, 
@@ -195,17 +216,35 @@ void ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
 
 template <typename ValueType>
 void ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
-  exc_vxc_local_work_( const basis_type& basis, const value_type* Pscalar, int64_t ldpscalar,
+  exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
                             const value_type* Pz, int64_t ldpz,
-                            value_type* VXCscalar, int64_t ldvxcscalar,
+                            value_type* VXCs, int64_t ldvxcs,
                             value_type* VXCz, int64_t ldvxcz, value_type* EXC, value_type *N_EL,
                             host_task_iterator task_begin, host_task_iterator task_end,
                             XCDeviceData& device_data ) {
 
-  GauXC::util::unused(basis,Pscalar,ldpscalar,Pz,ldpz,VXCscalar,ldvxcscalar,VXCz,ldvxcz,EXC,N_EL,task_begin,task_end,device_data);
+  GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,VXCs,ldvxcs,VXCz,ldvxcz,EXC,N_EL,task_begin,task_end,device_data);
   GAUXC_GENERIC_EXCEPTION("UKS NOT YET IMPLEMENTED FOR DEVICE");
 }
 
+template <typename ValueType>
+void ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
+  exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps,
+                            const value_type* Pz, int64_t ldpz,
+                            const value_type* Py, int64_t ldpy,
+                            const value_type* Px, int64_t ldpx,
+                            value_type* VXCs, int64_t ldvxcs,
+                            value_type* VXCz, int64_t ldvxcz,
+                            value_type* VXCy, int64_t ldvxcy,
+                            value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL,
+                            host_task_iterator task_begin, host_task_iterator task_end,
+                            XCDeviceData& device_data ) {
+
+  GauXC::util::unused(basis,Ps,ldps,Pz,ldpz,Py,ldpy,Px,ldpx,VXCs,ldvxcs,VXCz,ldvxcz,VXCy,ldvxcy,VXCx,ldvxcx,EXC,N_EL,task_begin,task_end,device_data);
+  GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED FOR DEVICE");
+}
+
+
 template <typename ValueType>
 typename ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::incore_device_task 
   ShellBatchedReplicatedXCDeviceIntegrator<ValueType>::
diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp
index 19ad3ccf..cabeece2 100644
--- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp
+++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp
@@ -30,7 +30,7 @@ class ReferenceReplicatedXCHostIntegrator :
 
   void eval_exc_vxc_( int64_t m, int64_t n, const value_type* P,
                       int64_t ldp, value_type* VXC, int64_t ldvxc,
-                      value_type* EXC ) override;
+                      value_type* EXC, const IntegratorSettingsXC& ks_settings ) override;
 
   void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
                       int64_t ldps,
@@ -38,7 +38,22 @@ class ReferenceReplicatedXCHostIntegrator :
                       int64_t ldpz,
                       value_type* VXCs, int64_t ldvxcs,
                       value_type* VXCz, int64_t ldvxcz,
-                      value_type* EXC ) override;
+                      value_type* EXC, const IntegratorSettingsXC& ks_settings ) override;
+
+  void eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
+                      const value_type* Pz,
+                      int64_t ldpz,
+                      const value_type* Py,
+                      int64_t ldpy,
+                      const value_type* Px,
+                      int64_t ldpx,
+                      value_type* VXCs, int64_t ldvxcs,
+                      value_type* VXCz, int64_t ldvxcz,
+                      value_type* VXCy, int64_t ldvxcy,
+                      value_type* VXCx, int64_t ldvxcx,
+                      value_type* EXC, const IntegratorSettingsXC& ks_settings ) override;
+
 
   void eval_exc_grad_( int64_t m, int64_t n, const value_type* P,
                        int64_t ldp, value_type* EXC_GRAD ) override;
@@ -56,6 +71,16 @@ class ReferenceReplicatedXCHostIntegrator :
                             value_type* VXCs, int64_t ldvxcs,
                             value_type* VXCz, int64_t ldvxcz, 
                             value_type* EXC, value_type *N_EL );
+
+  void exc_vxc_local_work_( const value_type* Ps, int64_t ldps,
+                            const value_type* Pz, int64_t ldpz,
+                            const value_type* Py, int64_t ldpy,
+                            const value_type* Px, int64_t ldpx,
+                            value_type* VXCs, int64_t ldvxcs,
+                            value_type* VXCz, int64_t ldvxcz,
+                            value_type* VXCy, int64_t ldvxcy,
+                            value_type* VXCx, int64_t ldvxcx,
+                            value_type* EXC, value_type *N_EL, const IntegratorSettingsXC& ks_settings );
                             
   void exc_grad_local_work_( const value_type* P, int64_t ldp, value_type* EXC_GRAD );
   void exx_local_work_( const value_type* P, int64_t ldp, value_type* K, int64_t ldk,
diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp
index ca13b9b2..e8ee7383 100644
--- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp
+++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc.hpp
@@ -20,7 +20,7 @@ template <typename ValueType>
 void ReferenceReplicatedXCHostIntegrator<ValueType>::
   eval_exc_vxc_( int64_t m, int64_t n, const value_type* P,
                  int64_t ldp, value_type* VXC, int64_t ldvxc,
-                 value_type* EXC ) {
+                 value_type* EXC, const IntegratorSettingsXC& ks_settings ) {
 
   const auto& basis = this->load_balancer_->basis();
 
@@ -45,7 +45,8 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
   // Compute Local contributions to EXC / VXC
   this->timer_.time_op("XCIntegrator.LocalWork", [&](){
     //exc_vxc_local_work_( P, ldp, VXC, ldvxc, EXC, &N_EL );
-    exc_vxc_local_work_( P, ldp, nullptr, 0, VXC, ldvxc, nullptr, 0, EXC, &N_EL );
+    exc_vxc_local_work_( P, ldp, nullptr, 0, nullptr, 0, nullptr, 0,
+                         VXC, ldvxc, nullptr, 0, nullptr, 0, nullptr, 0, EXC, &N_EL, ks_settings );
   });
 
 
@@ -71,7 +72,7 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
                       int64_t ldpz,
                       value_type* VXCs, int64_t ldvxcs,
                       value_type* VXCz, int64_t ldvxcz,
-                      value_type* EXC ) {
+                      value_type* EXC, const IntegratorSettingsXC& ks_settings) {
 
   const auto& basis = this->load_balancer_->basis();
 
@@ -98,7 +99,8 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
 
   // Compute Local contributions to EXC / VXC
   this->timer_.time_op("XCIntegrator.LocalWork", [&](){
-    exc_vxc_local_work_( Ps, ldps, Pz, ldpz, VXCs, ldvxcs, VXCz, ldvxcz, EXC, &N_EL );
+    exc_vxc_local_work_( Ps, ldps, Pz, ldpz, nullptr, 0,nullptr, 0,
+                         VXCs, ldvxcs, VXCz, ldvxcz, nullptr, 0, nullptr, 0, EXC, &N_EL, ks_settings );
   });
 
 
@@ -118,18 +120,108 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
 
 }
 
+template <typename ValueType>
+void ReferenceReplicatedXCHostIntegrator<ValueType>::
+  eval_exc_vxc_( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
+                      const value_type* Pz,
+                      int64_t ldpz,
+                      const value_type* Py,
+                      int64_t ldpy,
+                      const value_type* Px,
+                      int64_t ldpx,
+                      value_type* VXCs, int64_t ldvxcs,
+                      value_type* VXCz, int64_t ldvxcz,
+                      value_type* VXCy, int64_t ldvxcy,
+                      value_type* VXCx, int64_t ldvxcx,
+                      value_type* EXC, const IntegratorSettingsXC& ks_settings ) {
+
+  const auto& basis = this->load_balancer_->basis();
+
+  // Check that P / VXC are sane
+  const int64_t nbf = basis.nbf();
+  if( m != n )
+    GAUXC_GENERIC_EXCEPTION("P/VXC Must Be Square");
+  if( m != nbf )
+    GAUXC_GENERIC_EXCEPTION("P/VXC Must Have Same Dimension as Basis");
+  if( ldps < nbf )
+    GAUXC_GENERIC_EXCEPTION("Invalid LDPSCALAR");
+  if( ldpz < nbf )
+    GAUXC_GENERIC_EXCEPTION("Invalid LDPZ");
+  if( ldpy < nbf )
+    GAUXC_GENERIC_EXCEPTION("Invalid LDPX");
+  if( ldpx < nbf )
+    GAUXC_GENERIC_EXCEPTION("Invalid LDPY");
+  if( ldvxcs < nbf )
+    GAUXC_GENERIC_EXCEPTION("Invalid LDVXCSCALAR");
+  if( ldvxcz < nbf )
+    GAUXC_GENERIC_EXCEPTION("Invalid LDVXCZ");
+  if( ldvxcy < nbf )
+    GAUXC_GENERIC_EXCEPTION("Invalid LDVXCX");
+  if( ldvxcx < nbf )
+    GAUXC_GENERIC_EXCEPTION("Invalid LDVXCY");
+
+  // Get Tasks
+  this->load_balancer_->get_tasks();
+
+  // Temporary electron count to judge integrator accuracy
+  value_type N_EL;
+   
+  // Compute Local contributions to EXC / VXC
+  this->timer_.time_op("XCIntegrator.LocalWork", [&](){
+    exc_vxc_local_work_( Ps, ldps, Pz, ldpz, Py, ldpy, Px, ldpx, 
+                         VXCs, ldvxcs, VXCz, ldvxcz,
+                         VXCy, ldvxcy, VXCx, ldvxcx, EXC, &N_EL, ks_settings );
+  });
+
+
+  // Reduce Results
+  this->timer_.time_op("XCIntegrator.Allreduce", [&](){
+
+    if( not this->reduction_driver_->takes_host_memory() )
+      GAUXC_GENERIC_EXCEPTION("This Module Only Works With Host Reductions");
+
+    this->reduction_driver_->allreduce_inplace( VXCs, nbf*nbf, ReductionOp::Sum );
+    this->reduction_driver_->allreduce_inplace( VXCz, nbf*nbf, ReductionOp::Sum );
+    this->reduction_driver_->allreduce_inplace( VXCy, nbf*nbf, ReductionOp::Sum ); 
+    this->reduction_driver_->allreduce_inplace( VXCx, nbf*nbf, ReductionOp::Sum );
+    this->reduction_driver_->allreduce_inplace( EXC,   1    , ReductionOp::Sum );
+    this->reduction_driver_->allreduce_inplace( &N_EL, 1    , ReductionOp::Sum );
+
+  });
+
+
+}
+
+
 
 template <typename ValueType>
 void ReferenceReplicatedXCHostIntegrator<ValueType>::
   exc_vxc_local_work_( const value_type* Ps, int64_t ldps,
                        const value_type* Pz, int64_t ldpz,
+                       const value_type* Py, int64_t ldpy,
+                       const value_type* Px, int64_t ldpx,
                        value_type* VXCs, int64_t ldvxcs,
-                       value_type* VXCz, int64_t ldvxcz, 
-                       value_type* EXC, value_type *N_EL ) {
+                       value_type* VXCz, int64_t ldvxcz,
+                       value_type* VXCy, int64_t ldvxcy,
+                       value_type* VXCx, int64_t ldvxcx,
+                       value_type* EXC, value_type *N_EL, const IntegratorSettingsXC& settings) {
+
+  const bool is_gks = (Pz != nullptr) and (VXCz != nullptr) and (VXCy != nullptr) and (VXCx != nullptr);
+  const bool is_uks = (Pz != nullptr) and (VXCz != nullptr) and (VXCy == nullptr) and (VXCx == nullptr);
+  const bool is_rks = not is_uks and not is_gks;
+  if (not is_rks and not is_uks and not is_gks) {
+    GAUXC_GENERIC_EXCEPTION("MUST BE EITHER RKS, UKS, or GKS!");
+  }
+
 
+  // Misc KS settings
+  IntegratorSettingsKS ks_settings;
+  if( auto* tmp = dynamic_cast<const IntegratorSettingsKS*>(&settings) ) {
+    ks_settings = *tmp;
+  }
 
-  const bool is_uks = (Pz != nullptr) and (VXCz != nullptr);
-  const bool is_rks = not is_uks; // TODO: GKS
+  const double gks_dtol = ks_settings.gks_dtol;
 
   // Cast LWD to LocalHostWorkDriver
   auto* lwd = dynamic_cast<LocalHostWorkDriver*>(this->local_work_driver_.get());
@@ -141,6 +233,10 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
 
   const bool needs_laplacian = func.is_mgga() ? true : false; // TODO: Needs Laplacian Check
   
+  if (func.is_mgga() and is_gks) {
+    GAUXC_GENERIC_EXCEPTION("GKS NOT YET IMPLEMENTED WITH mGGA FUNCTIONALS!");
+  }
+
   // Get basis map
   BasisSetMap basis_map(basis,mol);
 
@@ -168,13 +264,21 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       VXCs[i + j*ldvxcs] = 0.;
     }
   }
-  if(is_uks) {
+  if(not is_rks) {
     for( auto j = 0; j < nbf; ++j ) {
       for( auto i = 0; i < nbf; ++i ) {
         VXCz[i + j*ldvxcz] = 0.;
       }
     }
   }
+  if(is_gks) {
+    for( auto j = 0; j < nbf; ++j ) {
+      for( auto i = 0; i < nbf; ++i ) {
+        VXCy[i + j*ldvxcy] = 0.;
+        VXCx[i + j*ldvxcx] = 0.;
+      }
+    }
+  }
   *EXC = 0.;
  
     
@@ -188,7 +292,7 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
 
   #pragma omp for schedule(dynamic)
   for( size_t iT = 0; iT < ntasks; ++iT ) {
-
+     
     //std::cout << iT << "/" << ntasks << std::endl;
     // Alias current task
     const auto& task = tasks[iT];
@@ -203,13 +307,15 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
     const int32_t* shell_list = task.bfn_screening.shell_list.data();
 
     // Allocate enough memory for batch
-
-    const size_t spin_dim_scal = is_rks ? 1 : 2; 
+   
+    const size_t spin_dim_scal = is_rks ? 1 : is_uks ? 2 : 4; // last case is_gks
+    const size_t sds          = is_rks ? 1 : 2;
+    const size_t gks_mod_KH = is_gks ? 6*npts : 0; // used to store H and H
     const size_t mgga_dim_scal = func.is_mgga() ? 4 : 1; // basis + d1basis
 
     // Things that every calc needs
     host_data.nbe_scr .resize(nbe  * nbe);
-    host_data.zmat    .resize(npts * nbe * spin_dim_scal * mgga_dim_scal); 
+    host_data.zmat    .resize(npts * nbe * spin_dim_scal * mgga_dim_scal + gks_mod_KH); 
     host_data.eps     .resize(npts);
     host_data.vrho    .resize(npts * spin_dim_scal);
 
@@ -218,7 +324,7 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       host_data.basis_eval .resize( npts * nbe );
       host_data.den_scr    .resize( npts * spin_dim_scal);
     }
-
+     
     // GGA data requirements
     const size_t gga_dim_scal = is_rks ? 1 : 3;
     if( func.is_gga() ){
@@ -232,8 +338,8 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       // TODO: Add check for Laplacian-dependent functionals
       if ( needs_laplacian ) {
         host_data.basis_eval .resize( 11 * npts * nbe ); // basis + grad (3) + hess (6) + lapl 
-	host_data.lapl       .resize( spin_dim_scal * npts );
-	host_data.vlapl      .resize( spin_dim_scal * npts );
+        host_data.lapl       .resize( spin_dim_scal * npts );
+        host_data.vlapl      .resize( spin_dim_scal * npts );
       } else {
         host_data.basis_eval .resize( 4 * npts * nbe ); // basis + grad (3)
       }
@@ -252,10 +358,16 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
     auto* zmat       = host_data.zmat.data();
 
     decltype(zmat) zmat_z = nullptr;
+    decltype(zmat) zmat_x = nullptr;
+    decltype(zmat) zmat_y = nullptr;
     if(!is_rks) {
       zmat_z = zmat + mgga_dim_scal * nbe * npts;
     }
-
+    if(is_gks) {
+      zmat_x = zmat_z + nbe * npts;
+      zmat_y = zmat_x + nbe * npts;
+    }
+     
     auto* eps        = host_data.eps.data();
     auto* gamma      = host_data.gamma.data();
     auto* tau        = host_data.tau.data();
@@ -279,6 +391,9 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
     value_type* dden_x_eval = nullptr;
     value_type* dden_y_eval = nullptr;
     value_type* dden_z_eval = nullptr;
+    value_type* K = nullptr;
+    value_type* H = nullptr;
+    if (is_gks) { K = zmat + npts * nbe * 4; }
     value_type* mmat_x      = nullptr;
     value_type* mmat_y      = nullptr;
     value_type* mmat_z      = nullptr;
@@ -293,6 +408,7 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       dden_x_eval   = den_eval    + spin_dim_scal * npts;
       dden_y_eval   = dden_x_eval + spin_dim_scal * npts;
       dden_z_eval   = dden_y_eval + spin_dim_scal * npts;
+      if (is_gks) { H = K + 3*npts;}
     }
 
     if ( func.is_mgga() ) {
@@ -306,18 +422,18 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       mmat_y        = mmat_x + npts * nbe;
       mmat_z        = mmat_y + npts * nbe;
       if ( needs_laplacian ) {
-	d2basis_xx_eval = dbasis_z_eval + npts * nbe;
-	d2basis_xy_eval = d2basis_xx_eval + npts * nbe;
-	d2basis_xz_eval = d2basis_xy_eval + npts * nbe;
-	d2basis_yy_eval = d2basis_xz_eval + npts * nbe;
-	d2basis_yz_eval = d2basis_yy_eval + npts * nbe;
-	d2basis_zz_eval = d2basis_yz_eval + npts * nbe;
-	lbasis_eval     = d2basis_zz_eval + npts * nbe;
+        d2basis_xx_eval = dbasis_z_eval + npts * nbe;
+        d2basis_xy_eval = d2basis_xx_eval + npts * nbe;
+        d2basis_xz_eval = d2basis_xy_eval + npts * nbe;
+        d2basis_yy_eval = d2basis_xz_eval + npts * nbe;
+        d2basis_yz_eval = d2basis_yy_eval + npts * nbe;
+        d2basis_zz_eval = d2basis_yz_eval + npts * nbe;
+        lbasis_eval     = d2basis_zz_eval + npts * nbe;
       }
       if(is_uks) {
-	mmat_x_z = zmat_z + npts * nbe;
-	mmat_y_z = mmat_x_z + npts * nbe;
-	mmat_z_z = mmat_y_z + npts * nbe;
+        mmat_x_z = zmat_z + npts * nbe;
+        mmat_y_z = mmat_x_z + npts * nbe;
+        mmat_z_z = mmat_y_z + npts * nbe;
       }
     }
 
@@ -330,11 +446,11 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
     // Evaluate Collocation (+ Grad and Hessian)
     if( func.is_mgga() ) {
       if ( needs_laplacian ) {
-	// TODO: Modify gau2grid to compute Laplacian instead of full hessian
+  // TODO: Modify gau2grid to compute Laplacian instead of full hessian
         lwd->eval_collocation_hessian( npts, nshells, nbe, points, basis, shell_list,
           basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval, d2basis_xx_eval,
-	  d2basis_xy_eval, d2basis_xz_eval, d2basis_yy_eval, d2basis_yz_eval,
-	  d2basis_zz_eval);
+          d2basis_xy_eval, d2basis_xz_eval, d2basis_yy_eval, d2basis_yz_eval,
+          d2basis_zz_eval);
         blas::lacpy( 'A', nbe, npts, d2basis_xx_eval, nbe, lbasis_eval, nbe );
         blas::axpy( nbe * npts, 1., d2basis_yy_eval, 1, lbasis_eval, 1);
         blas::axpy( nbe * npts, 1., d2basis_zz_eval, 1, lbasis_eval, 1);
@@ -351,7 +467,7 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       lwd->eval_collocation( npts, nshells, nbe, points, basis, shell_list,
         basis_eval );
 
-
+     
     // Evaluate X matrix (fac * P * B) -> store in Z
     const auto xmat_fac = is_rks ? 2.0 : 1.0; // TODO Fix for spinor RKS input
     lwd->eval_xmat( mgga_dim_scal * npts, nbf, nbe, submat_map, xmat_fac, Ps, ldps, basis_eval, nbe,
@@ -362,19 +478,25 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       lwd->eval_xmat( mgga_dim_scal * npts, nbf, nbe, submat_map, 1.0, Pz, ldpz, basis_eval, nbe,
         zmat_z, nbe, nbe_scr);
     }
-
-
+     
+    if(is_gks) {
+      lwd->eval_xmat( npts, nbf, nbe, submat_map, 1.0, Py, ldpy, basis_eval, nbe,
+        zmat_x, nbe, nbe_scr);
+      lwd->eval_xmat( npts, nbf, nbe, submat_map, 1.0, Px, ldpx, basis_eval, nbe,
+        zmat_y, nbe, nbe_scr);
+    }
+     
     // Evaluate U and V variables
     if( func.is_mgga() ) {
       if (is_rks) {
         lwd->eval_uvvar_mgga_rks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval,
-	  dbasis_z_eval, lbasis_eval, zmat, nbe, mmat_x, mmat_y, mmat_z, 
-	  nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl);
+          dbasis_z_eval, lbasis_eval, zmat, nbe, mmat_x, mmat_y, mmat_z, 
+          nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl);
       } else if (is_uks) {
         lwd->eval_uvvar_mgga_uks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval,
-	  dbasis_z_eval, lbasis_eval, zmat, nbe, zmat_z, nbe, 
-	  mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe, 
-	  den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl);
+          dbasis_z_eval, lbasis_eval, zmat, nbe, zmat_z, nbe, 
+          mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe, 
+          den_eval, dden_x_eval, dden_y_eval, dden_z_eval, gamma, tau, lapl);
       }
     } else if ( func.is_gga() ) {
       if(is_rks) {
@@ -385,13 +507,21 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
         lwd->eval_uvvar_gga_uks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval,
           dbasis_z_eval, zmat, nbe, zmat_z, nbe, den_eval, dden_x_eval, 
           dden_y_eval, dden_z_eval, gamma );
+      } else if(is_gks) {
+        lwd->eval_uvvar_gga_gks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval,
+          dbasis_z_eval, zmat, nbe, zmat_z, nbe, zmat_x, nbe, zmat_y, nbe, den_eval, dden_x_eval,
+          dden_y_eval, dden_z_eval, gamma, K, H, gks_dtol );
       }
+       
      } else {
       if(is_rks) {
         lwd->eval_uvvar_lda_rks( npts, nbe, basis_eval, zmat, nbe, den_eval );
       } else if(is_uks) {
         lwd->eval_uvvar_lda_uks( npts, nbe, basis_eval, zmat, nbe, zmat_z, nbe,
           den_eval );
+      } else if(is_gks) {
+        lwd->eval_uvvar_lda_gks( npts, nbe, basis_eval, zmat, nbe, zmat_z, nbe,
+          zmat_x, nbe, zmat_y, nbe, den_eval, K, gks_dtol );
       }
      }
     
@@ -406,10 +536,10 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
     // Factor weights into XC results
     for( int32_t i = 0; i < npts; ++i ) {
       eps[i]  *= weights[i];
-      vrho[spin_dim_scal*i] *= weights[i];
-      if(not is_rks) vrho[spin_dim_scal*i+1] *= weights[i];
+      vrho[sds*i] *= weights[i];
+      if(not is_rks) vrho[sds*i+1] *= weights[i];
     }
-
+     
     if( func.is_gga() ){
       for( int32_t i = 0; i < npts; ++i ) {
          vgamma[gga_dim_scal*i] *= weights[i];
@@ -422,21 +552,21 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
 
     if( func.is_mgga() ){
       for( int32_t i = 0; i < npts; ++i) {
-	vtau[spin_dim_scal*i]  *= weights[i];
-	vgamma[gga_dim_scal*i] *= weights[i];
-	if(not is_rks) {
-	  vgamma[gga_dim_scal*i+1] *= weights[i];
-	  vgamma[gga_dim_scal*i+2] *= weights[i];
-	  vtau[spin_dim_scal*i+1]  *= weights[i];
-	}
-
-	// TODO: Add checks for Lapacian-dependent functionals
-	if( needs_laplacian ) {
+        vtau[spin_dim_scal*i]  *= weights[i];
+        vgamma[gga_dim_scal*i] *= weights[i];
+        if(not is_rks) {
+          vgamma[gga_dim_scal*i+1] *= weights[i];
+          vgamma[gga_dim_scal*i+2] *= weights[i];
+          vtau[spin_dim_scal*i+1]  *= weights[i];
+        }
+
+        // TODO: Add checks for Lapacian-dependent functionals
+        if( needs_laplacian ) {
           vlapl[spin_dim_scal*i] *= weights[i];
           if(not is_rks) {
             vlapl[spin_dim_scal*i+1] *= weights[i];
-	  }
-	}
+          }
+        }
       }
     }
 
@@ -445,17 +575,17 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
     // Evaluate Z matrix for VXC
     if( func.is_mgga() ) {
       if(is_rks) {
-	lwd->eval_zmat_mgga_vxc_rks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval,
-	                        dbasis_y_eval, dbasis_z_eval, lbasis_eval,
-				dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe);
-	lwd->eval_mmat_mgga_vxc_rks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval,
-	                        mmat_x, mmat_y, mmat_z, nbe);
+        lwd->eval_zmat_mgga_vxc_rks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval,
+                                     dbasis_y_eval, dbasis_z_eval, lbasis_eval,
+                                     dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe);
+        lwd->eval_mmat_mgga_vxc_rks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval,
+                                     mmat_x, mmat_y, mmat_z, nbe);
       } else if (is_uks) {
-	lwd->eval_zmat_mgga_vxc_uks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval,
-	                        dbasis_y_eval, dbasis_z_eval, lbasis_eval,
-				dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe, zmat_z, nbe);
-	lwd->eval_mmat_mgga_vxc_uks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval,
-	                        mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe);
+        lwd->eval_zmat_mgga_vxc_uks( npts, nbe, vrho, vgamma, vlapl, basis_eval, dbasis_x_eval,
+                                     dbasis_y_eval, dbasis_z_eval, lbasis_eval,
+                                     dden_x_eval, dden_y_eval, dden_z_eval, zmat, nbe, zmat_z, nbe);
+        lwd->eval_mmat_mgga_vxc_uks( npts, nbe, vtau, vlapl, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval,
+                                     mmat_x, mmat_y, mmat_z, nbe, mmat_x_z, mmat_y_z, mmat_z_z, nbe);
       }
     }
     else if( func.is_gga() ) {
@@ -467,16 +597,25 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
         lwd->eval_zmat_gga_vxc_uks( npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval,
                                 dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval,
                                 dden_z_eval, zmat, nbe, zmat_z, nbe);
+      } else if(is_gks) {
+        lwd->eval_zmat_gga_vxc_gks( npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval,
+                                dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval,
+                                dden_z_eval, zmat, nbe, zmat_z, nbe, zmat_x, nbe, zmat_y, nbe,
+                                K, H);
       }
+       
     } else {
       if(is_rks) {
         lwd->eval_zmat_lda_vxc_rks( npts, nbe, vrho, basis_eval, zmat, nbe );
       } else if(is_uks) {
         lwd->eval_zmat_lda_vxc_uks( npts, nbe, vrho, basis_eval, zmat, nbe, zmat_z, nbe );
+      } else if(is_gks) {
+        lwd->eval_zmat_lda_vxc_gks( npts, nbe, vrho, basis_eval, zmat, nbe, zmat_z, nbe, 
+                                    zmat_x, nbe, zmat_y, nbe, K);
       }
     }
 
-
+     
     // Incremeta LT of VXC
     #pragma omp critical
     {
@@ -492,7 +631,14 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       if(not is_rks) {
         lwd->inc_vxc( mgga_dim_scal * npts, nbf, nbe, basis_eval, submat_map, zmat_z, nbe,VXCz, ldvxcz, nbe_scr);
       }
-
+      if(is_gks) {
+        lwd->inc_vxc( npts, nbf, nbe, basis_eval, submat_map, zmat_x, nbe, VXCy, ldvxcy,
+          nbe_scr);
+        lwd->inc_vxc( npts, nbf, nbe, basis_eval, submat_map, zmat_y, nbe, VXCx, ldvxcx,
+          nbe_scr);
+      }
+       
+ 
     }
 
   } // Loop over tasks
@@ -512,6 +658,14 @@ void ReferenceReplicatedXCHostIntegrator<ValueType>::
       }
     }
   }
+  if( is_gks) {
+    for( int32_t j = 0;   j < nbf; ++j ) {
+      for( int32_t i = j+1; i < nbf; ++i ) {
+        VXCy[ j + i*ldvxcy ] = VXCy[ i + j*ldvxcy ];
+        VXCx[ j + i*ldvxcx ] = VXCx[ i + j*ldvxcx ];
+      }
+    }
+  }
 
 } 
 
diff --git a/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx b/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx
index 6833e2f5..92c5bcf3 100644
--- a/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx
+++ b/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx
@@ -36,26 +36,53 @@ template <typename ValueType>
 void ReplicatedXCIntegratorImpl<ValueType>::
   eval_exc_vxc( int64_t m, int64_t n, const value_type* P,
                 int64_t ldp, value_type* VXC, int64_t ldvxc,
-                value_type* EXC ) {
+                value_type* EXC, const IntegratorSettingsXC& ks_settings ) {
 
-    eval_exc_vxc_(m,n,P,ldp,VXC,ldvxc,EXC);
+    eval_exc_vxc_(m,n,P,ldp,VXC,ldvxc,EXC,ks_settings);
 
 }
 
 template <typename ValueType>
 void ReplicatedXCIntegratorImpl<ValueType>::
-  eval_exc_vxc( int64_t m, int64_t n, const value_type* Pscalar,
-                      int64_t ldpscalar,
+  eval_exc_vxc( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
                       const value_type* Pz,
                       int64_t ldpz,
-                      value_type* VXCscalar, int64_t ldvxcscalar,
+                      value_type* VXCs, int64_t ldvxcs,
                       value_type* VXCz, int64_t ldvxcz,
-                      value_type* EXC ) {
+                      value_type* EXC, const IntegratorSettingsXC& ks_settings) {
 
-    eval_exc_vxc_(m,n,Pscalar,ldpscalar,
+    eval_exc_vxc_(m,n,Ps,ldps,
                       Pz,ldpz,
-                      VXCscalar,ldvxcscalar,
-                      VXCz,ldvxcz,EXC);
+                      VXCs,ldvxcs,
+                      VXCz,ldvxcz,EXC, ks_settings);
+
+}
+
+template <typename ValueType>
+void ReplicatedXCIntegratorImpl<ValueType>::
+  eval_exc_vxc( int64_t m, int64_t n, const value_type* Ps,
+                      int64_t ldps,
+                      const value_type* Pz,
+                      int64_t ldpz,
+                      const value_type* Py,
+                      int64_t ldpy,
+                      const value_type* Px,
+                      int64_t ldpx,
+                      value_type* VXCs, int64_t ldvxcs,
+                      value_type* VXCz, int64_t ldvxcz,
+                      value_type* VXCy, int64_t ldvxcy,
+                      value_type* VXCx, int64_t ldvxcx,
+                      value_type* EXC,  const IntegratorSettingsXC& ks_settings ) {
+
+    eval_exc_vxc_(m,n,Ps,ldps,
+                      Pz,ldpz,
+                      Py,ldpy,
+                      Px,ldpx,
+                      VXCs,ldvxcs,
+                      VXCz,ldvxcz,
+                      VXCy,ldvxcy,
+                      VXCx,ldvxcx,EXC, ks_settings);
 
 }
 
diff --git a/tests/ref_data/h3_blyp_cc-pvdz_ssf_gks.bin b/tests/ref_data/h3_blyp_cc-pvdz_ssf_gks.bin
new file mode 100644
index 00000000..a9e4bacc
Binary files /dev/null and b/tests/ref_data/h3_blyp_cc-pvdz_ssf_gks.bin differ
diff --git a/tests/xc_integrator.cxx b/tests/xc_integrator.cxx
index 5dbc90be..f10501c2 100644
--- a/tests/xc_integrator.cxx
+++ b/tests/xc_integrator.cxx
@@ -36,10 +36,10 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt,
   using matrix_type = Eigen::MatrixXd;
   Molecule mol;
   BasisSet<double> basis;
-  matrix_type P, Pz, VXC_ref, VXCz_ref, K_ref;
+  matrix_type P, Pz, Py, Px, VXC_ref, VXCz_ref, VXCy_ref, VXCx_ref, K_ref;
   double EXC_ref;
   std::vector<double> EXC_GRAD_ref;
-  bool has_k = false, has_exc_grad = false, uks = false;
+  bool has_k = false, has_exc_grad = false, rks = true, uks = false, gks = false;
   {
     read_hdf5_record( mol,   reference_file, "/MOLECULE" );
     read_hdf5_record( basis, reference_file, "/BASIS"    );
@@ -48,36 +48,66 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt,
     
     std::string den="/DENSITY";
     std::string den2="/DENSITY_Z";
+    std::string den3="/DENSITY_Y";
+    std::string den4="/DENSITY_X";
     std::string vxc="/VXC";
     std::string vxc2="VXC_Z";
-    
-    if (file.exist("/DENSITY_Z")) {
+    std::string vxc3="VXC_Y";
+    std::string vxc4="VXC_X";
+
+    if (file.exist("/DENSITY_Z")) { rks = false; }
+
+    if (file.exist("/DENSITY_Z") and not file.exist("/DENSITY_Y") and not file.exist("/DENSITY_X")) {
        den="/DENSITY_SCALAR";
        vxc="/VXC_SCALAR";
        uks=true;
     }
-      
+     
+    if (file.exist("/DENSITY_X") and file.exist("/DENSITY_Y") and file.exist("/DENSITY_Z")) {
+       den="/DENSITY_SCALAR";
+       vxc="/VXC_SCALAR";
+       gks=true;
+    }
+ 
     auto dset = file.getDataSet(den);
     
     auto dims = dset.getDimensions();
     P        = matrix_type( dims[0], dims[1] );
     VXC_ref  = matrix_type( dims[0], dims[1] );
-    if (uks) {
+    if (not rks) {
       Pz       = matrix_type( dims[0], dims[1] );
       VXCz_ref = matrix_type( dims[0], dims[1] );
+    } 
+    if (gks) {
+      Py       = matrix_type( dims[0], dims[1] );
+      VXCy_ref = matrix_type( dims[0], dims[1] );
+      Px       = matrix_type( dims[0], dims[1] );
+      VXCx_ref = matrix_type( dims[0], dims[1] );
     }
 
+
     dset.read( P.data() );
     dset = file.getDataSet(vxc);
     dset.read( VXC_ref.data() );
 
-    if (uks) {
+    if (not rks) {
       dset = file.getDataSet(den2);
       dset.read( Pz.data() );
       dset = file.getDataSet(vxc2);
       dset.read( VXCz_ref.data() );
     }
-    
+
+    if (gks) {
+      dset = file.getDataSet(den3);
+      dset.read( Py.data() );
+      dset = file.getDataSet(vxc3);
+      dset.read( VXCy_ref.data() );
+      dset = file.getDataSet(den4);
+      dset.read( Px.data() );
+      dset = file.getDataSet(vxc4);
+      dset.read( VXCx_ref.data() );
+    }    
+
     dset = file.getDataSet("/EXC");
     dset.read( &EXC_ref );
 
@@ -97,6 +127,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt,
   }
 
   if(uks and ex == ExecutionSpace::Device) return;
+  if(gks and ex == ExecutionSpace::Device) return;
   if(func.is_mgga() and ex == ExecutionSpace::Device) return;
 
 
@@ -127,7 +158,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt,
   auto integrator = integrator_factory.get_instance( func, lb );
 
   // Integrate Density
-  if( check_integrate_den and not uks ) {
+  if( check_integrate_den and rks) {
     auto N_EL_ref = std::accumulate( mol.begin(), mol.end(), 0ul,
       [](const auto& a, const auto &b) { return a + b.Z.get(); });
     auto N_EL = integrator.integrate_den( P );
@@ -136,7 +167,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt,
   }
 
   // Integrate EXC/VXC
-  if ( not uks ) {
+  if ( rks ) {
     auto [ EXC, VXC ] = integrator.eval_exc_vxc( P );
 
     // Check EXC/VXC
@@ -150,7 +181,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt,
       auto VXC1_diff_nrm = ( VXC1 - VXC_ref ).norm();
       CHECK( VXC1_diff_nrm / basis.nbf() < 1e-10 ); 
     }
-  } else {
+  } else if (uks) {
     auto [ EXC, VXC, VXCz ] = integrator.eval_exc_vxc( P, Pz );
 
     // Check EXC/VXC
@@ -169,9 +200,40 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt,
       CHECK( VXCz1_diff_nrm / basis.nbf() < 1e-10 );
     }
 
+  } else if (gks) {
+    auto [ EXC, VXC, VXCz, VXCy, VXCx ] = integrator.eval_exc_vxc( P, Pz, Py, Px );
+
+    // Check EXC/VXC
+    auto VXC_diff_nrm = ( VXC - VXC_ref ).norm();
+    auto VXCz_diff_nrm = ( VXCz - VXCz_ref ).norm();
+    auto VXCy_diff_nrm = ( VXCy - VXCy_ref ).norm();
+    auto VXCx_diff_nrm = ( VXCx - VXCx_ref ).norm();
+
+    CHECK( EXC == Approx( EXC_ref ) );
+    CHECK( VXC_diff_nrm / basis.nbf() < 1e-10 );
+    CHECK( VXCz_diff_nrm / basis.nbf() < 1e-10 );
+    CHECK( VXCy_diff_nrm / basis.nbf() < 1e-10 );
+    CHECK( VXCx_diff_nrm / basis.nbf() < 1e-10 );
+    // Check if the integrator propagates state correctly
+    {
+      auto [ EXC1, VXC1, VXCz1, VXCy1, VXCx1] = integrator.eval_exc_vxc( P, Pz, Py, Px );
+      CHECK( EXC1 == Approx( EXC_ref ) );
+      auto VXC1_diff_nrm = ( VXC1 - VXC_ref ).norm();
+      auto VXCz1_diff_nrm = ( VXCz1 - VXCz_ref ).norm();
+      auto VXCy1_diff_nrm = ( VXCy1 - VXCy_ref ).norm();
+      auto VXCx1_diff_nrm = ( VXCx1 - VXCx_ref ).norm();
+      CHECK( VXC1_diff_nrm / basis.nbf() < 1e-10 );
+      CHECK( VXCz1_diff_nrm / basis.nbf() < 1e-10 );
+      CHECK( VXCy1_diff_nrm / basis.nbf() < 1e-10 );
+      CHECK( VXCx1_diff_nrm / basis.nbf() < 1e-10 );
+    }
+
   }
+
+
+
   // Check EXC Grad
-  if( check_grad and has_exc_grad and not uks) {
+  if( check_grad and has_exc_grad and rks) {
     auto EXC_GRAD = integrator.eval_exc_grad( P );
     using map_type = Eigen::Map<Eigen::MatrixXd>;
     map_type EXC_GRAD_ref_map( EXC_GRAD_ref.data(), mol.size(), 3 );
@@ -181,7 +243,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt,
   }
 
   // Check K
-  if( has_k and check_k and not uks ) {
+  if( has_k and check_k and rks ) {
     auto K = integrator.eval_exx( P );
     CHECK((K - K.transpose()).norm() < std::numeric_limits<double>::epsilon()); // Symmetric
     CHECK( (K - K_ref).norm() / basis.nbf() < 1e-7 );
@@ -346,6 +408,12 @@ TEST_CASE( "XC Integrator", "[xc-integrator]" ) {
         func, PruningScheme::Robust );
   }
 
+  //GKS GGA Test
+  SECTION( "H3 / BLYP / cc-pvdz" ) {
+    auto func = make_functional(blyp, pol);
+    test_integrator(GAUXC_REF_DATA_PATH "/h3_blyp_cc-pvdz_ssf_gks.bin",
+        func, PruningScheme::Unpruned );
+  }
 
   // sn-LinK Test
   SECTION( "Benzene / PBE0 / 6-31G(d)" ) {