Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
392 changes: 392 additions & 0 deletions apps/c/airfoil/airfoil_hdf5/dp/Makefile.legacy

Large diffs are not rendered by default.

78 changes: 78 additions & 0 deletions apps/make-common.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@

# Locate MPI compilers:
ifdef MPI_INSTALL_PATH
ifneq ("","$(wildcard $(MPI_INSTALL_PATH)/bin/mpic++)")
MPICPP_PATH = $(MPI_INSTALL_PATH)/bin/mpic++
else
ifneq ("","$(wildcard $(MPI_INSTALL_PATH)/intel64/bin/mpic++)")
MPICPP_PATH = $(MPI_INSTALL_PATH)/intel64/bin/mpic++
else
MPICPP_PATH = mpic++
endif
endif

ifneq ("","$(wildcard $(MPI_INSTALL_PATH)/bin/mpicxx)")
MPICXX_PATH = $(MPI_INSTALL_PATH)/bin/mpicxx
else
ifneq ("","$(wildcard $(MPI_INSTALL_PATH)/intel64/bin/mpicxx)")
MPICXX_PATH = $(MPI_INSTALL_PATH)/intel64/bin/mpicxx
else
MPICXX_PATH = mpicxx
endif
endif

ifneq ("","$(wildcard $(MPI_INSTALL_PATH)/bin/mpicc)")
MPICC_PATH = $(MPI_INSTALL_PATH)/bin/mpicc
else
ifneq ("","$(wildcard $(MPI_INSTALL_PATH)/intel64/bin/mpicc)")
MPICC_PATH = $(MPI_INSTALL_PATH)/intel64/bin/mpicc
else
MPICC_PATH = mpicc
endif
endif
else
MPICXX_PATH = mpicxx
MPICC_PATH = mpicc
endif

# OP2 paths
ifdef OP2_INSTALL_PATH
OP2_INC = -I$(OP2_INSTALL_PATH)/include
OP2_LIB = -L$(OP2_INSTALL_PATH)/lib
endif

# CUDA paths
ifdef CUDA_INSTALL_PATH
CUDA_INC = -I$(CUDA_INSTALL_PATH)/include
CUDA_LIB = -L$(CUDA_INSTALL_PATH)/lib64
endif

# HDF5 paths
ifdef HDF5_INSTALL_PATH
HDF5_INC := -I$(HDF5_INSTALL_PATH)/include
HDF5_LIB := -L$(HDF5_INSTALL_PATH)/lib
endif
HDF5_LIB += -lhdf5 -lz

#
# partitioning software for MPI versions
#
# ParMETIS
PARMETIS_VER=4
ifdef PARMETIS_INSTALL_PATH
PARMETIS_INC = -I$(PARMETIS_INSTALL_PATH)/include
PARMETIS_LIB = -L$(PARMETIS_INSTALL_PATH)/lib
endif
PARMETIS_INC += -DHAVE_PARMETIS
PARMETIS_LIB += -lparmetis -lmetis
ifeq ($(PARMETIS_VER),4)
PARMETIS_INC += -DPARMETIS_VER_4
endif

# PT-Scotch
ifdef PTSCOTCH_INSTALL_PATH
PTSCOTCH_INC = -I$(PTSCOTCH_INSTALL_PATH)/include
PTSCOTCH_LIB = -L$(PTSCOTCH_INSTALL_PATH)/lib
endif
PTSCOTCH_INC += -DHAVE_PTSCOTCH
PTSCOTCH_LIB += -lptscotch -lscotch -lptscotcherr
20 changes: 11 additions & 9 deletions op2/src/cuda/op_cuda_decl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,22 +73,23 @@ op_dat op_decl_dat_char(op_set set, int dim, char const *type, int size,
op_dat dat = op_decl_dat_core(set, dim, type, size, data, name);

// transpose data
size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size;
if (data != NULL && (strstr(type, ":soa") != NULL || (OP_auto_soa && dim > 1))) {
char *temp_data = (char *)malloc(dat->size * round32(set_size) * sizeof(char));
size_t set_size = round32(dat->set->size + dat->set->exec_size + dat->set->nonexec_size);
char *temp_data = (char *)malloc(dat->size * set_size * sizeof(char));
int element_size = dat->size / dat->dim;
for (int i = 0; i < dat->dim; i++) {
for (int j = 0; j < set_size; j++) {
for (int c = 0; c < element_size; c++) {
temp_data[element_size * i * round32(set_size) + element_size * j + c] =
temp_data[element_size * i * set_size + element_size * j + c] =
dat->data[dat->size * j + element_size * i + c];
}
}
}
op_cpHostToDevice((void **)&(dat->data_d), (void **)&(temp_data),
(size_t)dat->size * round32(set_size));
(size_t)dat->size * set_size);
free(temp_data);
} else {
size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size;
op_cpHostToDevice((void **)&(dat->data_d), (void **)&(dat->data),
(size_t)dat->size * set_size);
}
Expand Down Expand Up @@ -150,15 +151,15 @@ op_set op_decl_set(int size, char const *name) {
op_map op_decl_map(op_set from, op_set to, int dim, int *imap,
char const *name) {
op_map map = op_decl_map_core(from, to, dim, imap, name);
int set_size = map->from->size + map->from->exec_size;
int *temp_map = (int *)malloc(map->dim * round32(set_size) * sizeof(int));
int set_size = round32(map->from->size + map->from->exec_size);
int *temp_map = (int *)malloc(map->dim * set_size * sizeof(int));
for (int i = 0; i < map->dim; i++) {
for (int j = 0; j < set_size; j++) {
temp_map[i * round32(set_size) + j] = map->map[map->dim * j + i];
temp_map[i * set_size + j] = map->map[map->dim * j + i];
}
}
op_cpHostToDevice((void **)&(map->map_d), (void **)&(temp_map),
sizeof(int) * map->dim * round32(set_size));
sizeof(int) * map->dim * set_size);
free(temp_map);
return map;
}
Expand Down Expand Up @@ -290,9 +291,9 @@ void op_upload_all() {
op_dat_entry *item;
TAILQ_FOREACH(item, &OP_dat_list, entries) {
op_dat dat = item->dat;
size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size;
if (dat->data_d) {
if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) {
size_t set_size = round32(dat->set->size + dat->set->exec_size + dat->set->nonexec_size);
char *temp_data = (char *)malloc(dat->size * set_size * sizeof(char));
int element_size = dat->size / dat->dim;
for (int i = 0; i < dat->dim; i++) {
Expand All @@ -308,6 +309,7 @@ void op_upload_all() {
dat->dirty_hd = 0;
free(temp_data);
} else {
size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size;
cutilSafeCall(gpuMemcpy(dat->data_d, dat->data, dat->size * set_size,
gpuMemcpyHostToDevice));
dat->dirty_hd = 0;
Expand Down
18 changes: 9 additions & 9 deletions op2/src/cuda/op_cuda_rt_support.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ void op_cuda_get_data(op_dat dat) {
else
return;
// transpose data
size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size;
size_t set_size = round32(dat->set->size + dat->set->exec_size + dat->set->nonexec_size);
if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) {
char *temp_data = (char *)malloc(dat->size * set_size * sizeof(char));
cutilSafeCall(gpuMemcpy(temp_data, dat->data_d, dat->size * set_size,
Expand Down Expand Up @@ -347,19 +347,19 @@ void cutilDeviceInit(int argc, char **argv) {
void op_upload_dat(op_dat dat) {
if (!OP_hybrid_gpu)
return;
size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size;
size_t set_size = round32(dat->set->size + dat->set->exec_size + dat->set->nonexec_size);
if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) {
char *temp_data = (char *)malloc(dat->size * round32(set_size) * sizeof(char));
char *temp_data = (char *)malloc(dat->size * set_size * sizeof(char));
int element_size = dat->size / dat->dim;
for (int i = 0; i < dat->dim; i++) {
for (int j = 0; j < set_size; j++) {
for (int c = 0; c < element_size; c++) {
temp_data[element_size * i * round32(set_size) + element_size * j + c] =
temp_data[element_size * i * set_size + element_size * j + c] =
dat->data[dat->size * j + element_size * i + c];
}
}
}
cutilSafeCall(gpuMemcpy(dat->data_d, temp_data, round32(set_size) * dat->size,
cutilSafeCall(gpuMemcpy(dat->data_d, temp_data, set_size * dat->size,
gpuMemcpyHostToDevice));
free(temp_data);
} else {
Expand All @@ -371,17 +371,17 @@ void op_upload_dat(op_dat dat) {
void op_download_dat(op_dat dat) {
if (!OP_hybrid_gpu)
return;
size_t set_size = dat->set->size + dat->set->exec_size + dat->set->nonexec_size;
size_t set_size = round32(dat->set->size + dat->set->exec_size + dat->set->nonexec_size);
if (strstr(dat->type, ":soa") != NULL || (OP_auto_soa && dat->dim > 1)) {
char *temp_data = (char *)malloc(dat->size * round32(set_size) * sizeof(char));
cutilSafeCall(gpuMemcpy(temp_data, dat->data_d, round32(set_size) * dat->size,
char *temp_data = (char *)malloc(dat->size * set_size * sizeof(char));
cutilSafeCall(gpuMemcpy(temp_data, dat->data_d, set_size * dat->size,
gpuMemcpyDeviceToHost));
int element_size = dat->size / dat->dim;
for (int i = 0; i < dat->dim; i++) {
for (int j = 0; j < set_size; j++) {
for (int c = 0; c < element_size; c++) {
dat->data[dat->size * j + element_size * i + c] =
temp_data[element_size * i * round32(set_size) + element_size * j + c];
temp_data[element_size * i * set_size + element_size * j + c];
}
}
}
Expand Down
5 changes: 0 additions & 5 deletions translator-v2/resources/templates/cpp/master_kernel.cpp.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ extern {{const.typ}} {{const.ptr}}{% if const.dim > 1 %}[{{const.dim}}]{% endif
void op_decl_const_char(int dim, const char *type, int size, char *dat, const char *name) {
{{guard}}

if (size > MAX_CONST_SIZE) {
printf("error: requested size %d for const %s exceeds MAX_CONST_SIZE\n", size, name);
exit(1);
}

{% for const in app.consts() %}
if (!strcmp(name, "{{const.ptr}}")) {
{{caller(const)}}
Expand Down
14 changes: 7 additions & 7 deletions translator/c/op2.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def op_decl_set_parse(text):
"""Parsing for op_decl_set calls"""

sets = []
for m in re.finditer('op_decl_set\((.*)\)', text):
for m in re.finditer(r'op_decl_set\((.*)\)', text):
args = m.group(1).split(',')

# check for syntax errors
Expand All @@ -101,7 +101,7 @@ def op_decl_set_parse(text):
sets.append({
'name': args[1].strip()
})
for m in re.finditer('op_decl_set_hdf5\((.*)\)', text):
for m in re.finditer(r'op_decl_set_hdf5\((.*)\)', text):
args = m.group(1).split(',')

# check for syntax errors
Expand All @@ -120,7 +120,7 @@ def op_decl_const_parse(text):
"""Parsing for op_decl_const calls"""

consts = []
for m in re.finditer('op_decl_const\((.*)\)', text):
for m in re.finditer(r'op_decl_const\((.*)\)', text):
args = m.group(1).split(',')

# check for syntax errors
Expand Down Expand Up @@ -242,8 +242,8 @@ def get_arg_gbl(arg_string, k):
return temp_gbl

def append_init_soa(text):
text = re.sub('\\bop_init\\b\\s*\((.*)\)','op_init_soa(\\1,1)', text)
text = re.sub('\\bop_mpi_init\\b\\s*\((.*)\)','op_mpi_init_soa(\\1,1)', text)
text = re.sub(r'\\bop_init\\b\\s*\((.*)\)','op_init_soa(\\1,1)', text)
text = re.sub(r'\\bop_mpi_init\\b\\s*\((.*)\)','op_mpi_init_soa(\\1,1)', text)
return text

def op_par_loop_parse(text):
Expand Down Expand Up @@ -305,9 +305,9 @@ def op_par_loop_parse(text):

def op_check_kernel_in_text(text, name):
match = False
inline_impl_pattern = r'inline[ \n]+void[ \n]+'+name+'\s*\('
inline_impl_pattern = r'inline[ \n]+void[ \n]+'+name+r'\s*\('
matches = re.findall(inline_impl_pattern, text)
decl_pattern = r'([$\n]+)(void[ \n]+'+name+'\([ \n]*'+'[ \nA-Za-z0-9\*\_\.,#]+\);)'
decl_pattern = r'([$\n]+)(void[ \n]+'+name+r'\([ \n]*'+r'[ \nA-Za-z0-9\*\_\.,#]+\);)'
if len(re.findall(inline_impl_pattern, text)) == 1:
match = True
elif len(re.findall(decl_pattern, text)) == 1:
Expand Down
8 changes: 4 additions & 4 deletions translator/c/op2_gen_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,10 +961,10 @@ def op2_gen_cuda(master, date, consts, kernels, sets):
code('')

for nc in range (0,len(consts)):
if consts[nc]['dim']==1:
if str(consts[nc]['dim']).isdigit() and int(consts[nc]['dim'])==1:
code('__constant__ '+consts[nc]['type'][1:-1]+' '+consts[nc]['name']+';')
else:
if consts[nc]['dim'] > 0:
if str(consts[nc]['dim']).isdigit() and int(consts[nc]['dim']) > 0:
num = str(consts[nc]['dim'])
else:
num = 'MAX_CONST_SIZE'
Expand Down Expand Up @@ -992,8 +992,8 @@ def op2_gen_cuda(master, date, consts, kernels, sets):
code(' '+consts[nc]['type'][1:-1]+' *dat){')
depth = depth + 2
code('if (!OP_hybrid_gpu) return;')
if not consts[nc]['dim'] or int(consts[nc]['dim']) > 1:
IF('dim*sizeof('+consts[nc]['type'][1:-1]+')>MAX_CONST_SIZE')
if not str(consts[nc]['dim']).isdigit() or int(consts[nc]['dim']) <= 0:
IF('dim>MAX_CONST_SIZE')
code('printf("error: MAX_CONST_SIZE not big enough\\n"); exit(1);')
ENDIF()
code('cutilSafeCall(cudaMemcpyToSymbol('+consts[nc]['name']+'_cuda, dat, dim*sizeof('+consts[nc]['type'][1:-1]+')));')
Expand Down
Loading