diff --git a/src/modglobal.f90 b/src/modglobal.f90 index 188616aa..95080af7 100644 --- a/src/modglobal.f90 +++ b/src/modglobal.f90 @@ -155,12 +155,16 @@ module modglobal logical :: lconstexner = .false. !< switch to use the initial pressure profile in the exner function ! Poisson solver: modpois / modhypre -#ifdef USE_FFTW - integer :: solver_id = 100 ! Identifier for nummerical solver: 0 1 2 3 4 + ! set default solver, can be overridden in namoptions +#if defined(DALES_GPU) + integer :: solver_id = 200 ! cufft (default if OpenACC is used) +#elif defined(USE_FFTW) + integer :: solver_id = 100 ! FFTW (default if FFTW library compiled in and not on GPU) #else - integer :: solver_id = 0 + integer :: solver_id = 0 ! Built-in FFT #endif - ! FFT SMG PFMG BiCGSTAB GMRES + ! solver_id: 0 1 2 3 4 100 200 + ! FFT SMG PFMG BiCGSTAB GMRES FFTW cufft integer :: maxiter = 10000 ! Number of iterations . X X X X real(real64):: tolerance = 1E-8! Convergence threshold . X X X X integer :: n_pre = 1 ! Number of pre and post relaxations . X X X X diff --git a/src/modpois.f90 b/src/modpois.f90 index 31f3dad9..32d8622e 100644 --- a/src/modpois.f90 +++ b/src/modpois.f90 @@ -57,6 +57,12 @@ subroutine initpois implicit none +#ifdef DALES_GPU + if (solver_id /= 200) then + STOP 'Running on GPU requires solver_id = 200 (cufft)' + end if +#endif + if (solver_id == 0) then call fft2dinit(p, Fp, d, xyrt, ps, pe, qs, qe) else if (solver_id == 100) then @@ -130,7 +136,7 @@ subroutine poisson logical converged call timer_tic('modpois/poisson', 0) - + call fillps if (solver_id == 0) then @@ -198,7 +204,7 @@ subroutine fillps use modmpi, only : excjs use modopenboundary, only : openboundary_excjs implicit none - + integer :: i, j, k, ex, ey real(pois_r) :: rk3coef_inv @@ -223,7 +229,7 @@ subroutine fillps !$acc parallel loop collapse(3) default(present) async(1) do k=1,kmax - do j=2,ey ! openbc needs these to i2,j2. Periodic bc needs them to i1,j1 + do j=2,ey ! openbc needs these to i2,j2. Periodic bc needs them to i1,j1 do i=2,ex pup(i,j,k) = up(i,j,k) + um(i,j,k) * rk3coef_inv pvp(i,j,k) = vp(i,j,k) + vm(i,j,k) * rk3coef_inv @@ -345,7 +351,7 @@ subroutine tderive !$acc wait(1) call timer_toc('modpois/tderive') - + return end subroutine tderive @@ -390,9 +396,9 @@ subroutine solmpj integer :: i, j, k call timer_tic('modpois/solmpj', 1) - + ! Generate tridiagonal matrix - + !$acc parallel loop default(present) async(1) do k=1,kmax ! SB fixed the coefficients