Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions sp/Friction_manning.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ inline void Friction_manning(const float *dT,const float *M_n, //OP_RW, discard
// Update Momentum
values[0] = TruncatedH;
values[3] = values[3];
if (values[0] <= 1e-3){
if (values[0] <= EPS){
values[1] = 0.0f;
values[2] = 0.0f;
} else if (1e-3 < values[0] <= 50.0f) {
} else if (EPS < values[0] <= 50.0f) {
values[1] = values[1] / (1.0f + Fr * *dT);
values[2] = values[2] / (1.0f + Fr * *dT);
} else {
Expand Down
22 changes: 10 additions & 12 deletions sp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ ifdef PTSCOTCH_INSTALL_PATH
PTSCOTCH_INC = -I$(PTSCOTCH_INSTALL_PATH)/include -DHAVE_PTSCOTCH
PTSCOTCH_INC = -I$(PTSCOTCH_INSTALL_PATH)/include
PTSCOTCH_LIB = -L$(PTSCOTCH_INSTALL_PATH)/lib/ -lptscotch \
-L$(PTSCOTCH_INSTALL_PATH)/lib/ -lptscotcherr
-L$(PTSCOTCH_INSTALL_PATH)/lib/ -lscotch -lptscotcherr
endif

ifeq ($(OP2_COMPILER),gnu)
Expand All @@ -56,7 +56,7 @@ else
ifeq ($(OP2_COMPILER),intel)
CPP = icpc
# CPPFLAGS = -g -O0
CPPFLAGS = -O3 -xHost -parallel -g -DMPICH_IGNORE_CXX_SEEK -DMPICH_SKIP_MPICXX
CPPFLAGS = -O3 -parallel -g -DMPICH_IGNORE_CXX_SEEK -DMPICH_SKIP_MPICXX
# CPPFLAGS = -g -O0 -vec-report -xSSE4.2 -parallel
OMPFLAGS = -qopenmp
MPICPP = $(MPI_INSTALL_PATH)/bin/mpicxx
Expand All @@ -75,7 +75,7 @@ endif
#


NVCCFLAGS = -arch=sm_60 -Xptxas=-v -O2 -m64 -g #-G
NVCCFLAGS = -arch=sm_80 -Xptxas=-v -O2 -m64 -g #-G

#
# master to make all versions
Expand Down Expand Up @@ -114,16 +114,14 @@ else
endif


cuda/volna_kernels_cu.o: cuda/volna_kernels.cu \
EvolveValuesRK2_1.h EvolveValuesRK2_2.h EvolveValuesRK3_1.h EvolveValuesRK3_2.h EvolveValuesRK3_3.h EvolveValuesRK3_4.h applyConst.h getMaxElevation.h getTotalVol.h \
cuda/volna_kernels_cu.o: cuda/volna_kernels.cu \
EvolveValuesRK2_1.h EvolveValuesRK2_2.h applyConst.h getMaxElevation.h getMaxSpeed.h \
initBathymetry_formula.h initBathymetry_update.h initBore_select.h initEta_formula.h initGaussianLandslide.h \
initU_formula.h initV_formula.h computeGradient.h computeFluxes.h limiter.h SpaceDiscretization.h NumericalFluxes.h simulation_1.h \
values_operation2.h cuda/applyConst_kernel.cu cuda/EvolveValuesRK2_1_kernel.cu cuda/EvolveValuesRK2_2_kernel.cu cuda/EvolveValuesRK3_1_kernel.cu\
cuda/EvolveValuesRK3_2_kernel.cu cuda/EvolveValuesRK3_3_kernel.cu cuda/EvolveValuesRK3_4_kernel.cu cuda/computeGradient_kernel.cu \
cuda/computeFluxes_kernel.cu cuda/limiter_kernel.cu cuda/SpaceDiscretization_kernel.cu cuda/NumericalFluxes_kernel.cu \
cuda/simulation_1_kernel.cu \
cuda/values_operation2_kernel.cu Makefile

initU_formula.h initV_formula.h computeGradient.h computeFluxes.h limiter.h Timestep.h NumericalFluxes.h simulation_1.h \
values_operation2.h Friction_manning.h cuda/applyConst_kernel.cu cuda/EvolveValuesRK2_1_kernel.cu cuda/EvolveValuesRK2_2_kernel.cu \
cuda/computeFluxes_kernel.cu cuda/limiter_kernel.cu cuda/Timestep_kernel.cu cuda/NumericalFluxes_kernel.cu \
cuda/simulation_1_kernel.cu cuda/computeGradient_kernel.cu cuda/getMaxSpeed_kernel.cu cuda/getMaxElevation_kernel.cu \
cuda/values_operation2_kernel.cu cuda/Friction_manning_kernel.cu Makefile
nvcc $(VAR) $(INC) $(NVCCFLAGS) $(OP2_INC) $(HDF5_INC) -I$(MPI_INC) -c -o cuda/volna_kernels_cu.o cuda/volna_kernels.cu

volna_mpi: volna.cpp volna_event.cpp volna_init.cpp volna_output.cpp volna_writeVTK.cpp volna_simulation.cpp volna_util.cpp Makefile
Expand Down
9 changes: 7 additions & 2 deletions sp/computeFluxes.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ inline void computeFluxes(const float *cellLeft, const float *cellRight,
rightCellValues[3] = leftCellValues[3];
outNormalVelocity = inNormalVelocity;
}
/*

// Wall
rightCellValues[3] = leftCellValues[3];
/*rightCellValues[3] = leftCellValues[3];
rightCellValues[0] = leftCellValues[0];
outNormalVelocity = -1.0f*inNormalVelocity;
outTangentVelocity = inTangentVelocity;
Expand Down Expand Up @@ -136,11 +136,13 @@ inline void computeFluxes(const float *cellLeft, const float *cellRight,
(hR*(uRn - sR) - hL*(uLn - sL));

if ((leftCellValues[0] <= EPS) && (rightCellValues[0] > EPS)) {
// if ((leftCellValues[0] <= 1e-3) && (rightCellValues[0] > 1e-3)) {
sL = uRn - 2.0f*cR;
sR = uRn + cR;
sStar = sL;
}
if ((rightCellValues[0] <= EPS) && (leftCellValues[0] > EPS)) {
// if ((rightCellValues[0] <= 1e-3) && (leftCellValues[0] > 1e-3)) {
sR = uLn + 2.0f*cL;
sL = uLn - cL;
sStar = sR;
Expand All @@ -151,6 +153,7 @@ inline void computeFluxes(const float *cellLeft, const float *cellRight,
float uRp = vR*edgeNormals[0] - uR*edgeNormals[1];

float LeftFluxes_H, LeftFluxes_N, LeftFluxes_U, LeftFluxes_V;
//inlined ProjectedPhysicalFluxes(leftCellValues, Normals, params, LeftFluxes);
float HuDotN = (hL*uL) * edgeNormals[0] + (hL*vL) * edgeNormals[1];

LeftFluxes_H = HuDotN;
Expand All @@ -162,8 +165,10 @@ inline void computeFluxes(const float *cellLeft, const float *cellRight,
LeftFluxes_U += (.5f * g * edgeNormals[0] ) * ( hL * hL );
LeftFluxes_V += (.5f * g * edgeNormals[1] ) * ( hL * hL );
LeftFluxes_N += (.5f * g ) * ( hL * hL );
//end of inlined

float RightFluxes_H,RightFluxes_N, RightFluxes_U, RightFluxes_V;
//inlined ProjectedPhysicalFluxes(rightCellValues, Normals, params, RightFluxes);
HuDotN = (hR*uR) * edgeNormals[0] + (hR*vR) * edgeNormals[1];

RightFluxes_H = HuDotN;
Expand Down
126 changes: 26 additions & 100 deletions sp/cuda/EvolveValuesRK2_1_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,26 @@
//

//user function
__device__
inline void EvolveValuesRK2_1_gpu(const float *dT, float *midPointConservative, //OP_RW //temp
const float *in, //OP_READ
float *inConservative, //OP_WRITE //temp
float *midPoint) //OP_WRITE
{
midPointConservative[0] *= *dT;
midPointConservative[1] *= *dT;
midPointConservative[2] *= *dT;

//call to ToConservativeVariables inlined
inConservative[0] = in[0];
inConservative[1] = in[0] * (in[1]);
inConservative[2] = in[0] * (in[2]);
inConservative[3] = in[3];

midPointConservative[0] += inConservative[0];
midPointConservative[1] += inConservative[1];
midPointConservative[2] += inConservative[2];
midPointConservative[3] += inConservative[3];

//call to ToPhysicalVariables inlined
float TruncatedH = midPointConservative[0] < EPS ? EPS : midPointConservative[0];
midPoint[0] = midPointConservative[0];
midPoint[1] = midPointConservative[1] / TruncatedH;
midPoint[2] = midPointConservative[2] / TruncatedH;
midPoint[3] = midPointConservative[3];
__device__ void EvolveValuesRK2_1_gpu( const float *dT, const float *Lw_n,
const float *in,
float *out) {
out[0] = Lw_n[0] * *dT + in[0];
out[1] = Lw_n[1] * *dT + in[1];
out[2] = Lw_n[2] * *dT + in[2];
out[3] = in[3]-in[0];

float TruncatedH = out[0] < EPS_cuda ? EPS_cuda : out[0];
out[0] = TruncatedH;
out[3] += TruncatedH;

}

// CUDA kernel function
__global__ void op_cuda_EvolveValuesRK2_1(
const float *arg0,
float *arg1,
const float *__restrict arg1,
const float *__restrict arg2,
float *arg3,
float *arg4,
int set_size ) {


Expand All @@ -49,45 +33,41 @@ __global__ void op_cuda_EvolveValuesRK2_1(
EvolveValuesRK2_1_gpu(arg0,
arg1+n*4,
arg2+n*4,
arg3+n*4,
arg4+n*4);
arg3+n*4);
}
}


//GPU host stub function
void op_par_loop_EvolveValuesRK2_1_gpu(char const *name, op_set set,
//host stub function
void op_par_loop_EvolveValuesRK2_1(char const *name, op_set set,
op_arg arg0,
op_arg arg1,
op_arg arg2,
op_arg arg3,
op_arg arg4){
op_arg arg3){

float*arg0h = (float *)arg0.data;
int nargs = 5;
op_arg args[5];
int nargs = 4;
op_arg args[4];

args[0] = arg0;
args[1] = arg1;
args[2] = arg2;
args[3] = arg3;
args[4] = arg4;

// initialise timers
double cpu_t1, cpu_t2, wall_t1, wall_t2;
op_timing_realloc(0);
op_timers_core(&cpu_t1, &wall_t1);
OP_kernels[0].name = name;
OP_kernels[0].count += 1;
if (OP_kernels[0].count==1) op_register_strides();


if (OP_diags>2) {
printf(" kernel routine w/o indirection: EvolveValuesRK2_1");
}

op_mpi_halo_exchanges_cuda(set, nargs, args);
if (set->size > 0) {
int set_size = op_mpi_halo_exchanges_grouped(set, nargs, args, 2);
if (set_size > 0) {

//transfer constants to GPU
int consts_bytes = 0;
Expand All @@ -107,7 +87,6 @@ void op_par_loop_EvolveValuesRK2_1_gpu(char const *name, op_set set,
int nthread = OP_BLOCK_SIZE_0;
#else
int nthread = OP_block_size;
// int nthread = 128;
#endif

int nblocks = 200;
Expand All @@ -117,69 +96,16 @@ void op_par_loop_EvolveValuesRK2_1_gpu(char const *name, op_set set,
(float *) arg1.data_d,
(float *) arg2.data_d,
(float *) arg3.data_d,
(float *) arg4.data_d,
set->size );
}
op_mpi_set_dirtybit_cuda(nargs, args);
cutilSafeCall(cudaDeviceSynchronize());
if (OP_diags>1) {
cutilSafeCall(cudaDeviceSynchronize());
}
//update kernel record
op_timers_core(&cpu_t2, &wall_t2);
OP_kernels[0].time += wall_t2 - wall_t1;
OP_kernels[0].transfer += (float)set->size * arg1.size * 2.0f;
OP_kernels[0].transfer += (float)set->size * arg1.size;
OP_kernels[0].transfer += (float)set->size * arg2.size;
OP_kernels[0].transfer += (float)set->size * arg3.size;
OP_kernels[0].transfer += (float)set->size * arg4.size;
OP_kernels[0].transfer += (float)set->size * arg3.size * 2.0f;
}

void op_par_loop_EvolveValuesRK2_1_cpu(char const *name, op_set set,
op_arg arg0,
op_arg arg1,
op_arg arg2,
op_arg arg3,
op_arg arg4);


//GPU host stub function
#if OP_HYBRID_GPU
void op_par_loop_EvolveValuesRK2_1(char const *name, op_set set,
op_arg arg0,
op_arg arg1,
op_arg arg2,
op_arg arg3,
op_arg arg4){

if (OP_hybrid_gpu) {
op_par_loop_EvolveValuesRK2_1_gpu(name, set,
arg0,
arg1,
arg2,
arg3,
arg4);

}else{
op_par_loop_EvolveValuesRK2_1_cpu(name, set,
arg0,
arg1,
arg2,
arg3,
arg4);

}
}
#else
void op_par_loop_EvolveValuesRK2_1(char const *name, op_set set,
op_arg arg0,
op_arg arg1,
op_arg arg2,
op_arg arg3,
op_arg arg4){

op_par_loop_EvolveValuesRK2_1_gpu(name, set,
arg0,
arg1,
arg2,
arg3,
arg4);

}
#endif //OP_HYBRID_GPU
Loading