Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
bfb9c4a
Squashed commits from Brad @ 171bf087be6f4758c9616b9b5560736267e96424…
everythingfunctional Dec 19, 2024
ed6994d
Re-enable co_min and co_max testing for flang
bonachea Mar 19, 2025
60bbac9
Fix {float,double}_Complex_workaround for flang
bonachea Mar 19, 2025
0d1402d
caffeine.c: Fix a harmless warning
bonachea Mar 19, 2025
20d4dbd
re-enable co_sum test for flang
bonachea Mar 19, 2025
f21e115
Re-enable co_{min,max}_character implementations
bonachea Mar 24, 2025
a5750d8
co_reduce: Deploy bug workaround for gfortran
bonachea Mar 24, 2025
4877943
issue #196: Stop injecting outdated/broken flang-new options
bonachea Mar 25, 2025
90e7b4b
Re-enable tests for prif_co_{min,max}_character
bonachea Mar 24, 2025
0e29552
test/prif_co_reduce_test: re-enable "easy" co_reduce subtests
bonachea Mar 25, 2025
fb3eb3a
test/prif_co_reduce_test: Disable the subtest requiring parameterized…
bonachea Mar 26, 2025
450f77c
implementation-status.md: Update collectives status
bonachea Mar 26, 2025
d198629
issue #159: Expand prif_co_{min,max,sum} to handle every interoperabl…
bonachea Mar 27, 2025
1896426
Expand prif_co_{min,max,sum}_test to use default integer
bonachea Mar 27, 2025
8794e3a
issue #159: Implement prif_co_{min,max,sum} for 8- and 16-bit integer…
bonachea Mar 27, 2025
7e5d2de
Expand prif_co_{min,max,sum}_test to use 8- and 16-bit integers
bonachea Mar 27, 2025
defeb63
implementation-status.md: Update Collectives to fully implemented
bonachea Mar 27, 2025
eb8500e
CI: Add -mmlir -allow-assumed-rank for flang-19
bonachea Mar 31, 2025
d17b2cd
issue #205: workaround flang optimizer bug in prif_co_{min,max}_test
bonachea Apr 3, 2025
2b2b721
install.sh: Restore optimization flags for flang
bonachea Apr 3, 2025
812e758
caffeine.c: factor the caf_co_{min,max,sum} implementations
bonachea Apr 4, 2025
20d6395
prif_co_*: Assert some preconditions
bonachea Apr 4, 2025
27278a7
Flatten away the collective_subroutines subdirectory
bonachea Apr 4, 2025
6a4537a
Apply suggestions from code review
bonachea Apr 4, 2025
3f12d79
caffeine.c: Remove {float,double}_Complex_workaround
bonachea Apr 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs:
version: 19
CC: clang
CXX: clang++
FFLAGS: -mmlir -allow-assumed-rank
# https://hub.docker.com/r/phhargrove/llvm-flang/tags
container: phhargrove/llvm-flang:19.1.1-1
SUBJOB_PREFIX: GASNET_PSHM_NODES=2
Expand All @@ -47,6 +48,7 @@ jobs:
FC: ${{ matrix.compiler }}
CC: ${{ matrix.CC }}
CXX: ${{ matrix.CXX }}
FFLAGS: ${{ matrix.FFLAGS }}
PREFIX: install
GASNET_CONFIGURE_ARGS: --enable-rpath --enable-debug
GASNET_PSHM_NODES: 8
Expand Down Expand Up @@ -114,12 +116,14 @@ jobs:

- name: Build Caffeine (install.sh)
run: |
echo "FC=${FC} CC=${CC} CXX=${CXX}"
for var in FC CC CXX FFLAGS CPPFLAGS CFLAGS LDFLAGS LIBS GASNET_CONFIGURE_ARGS ; do \
eval echo "$var=\$$var"; done
set -x
./install.sh --prefix=${PREFIX} --verbose

- name: Run examples
run: |
echo GASNET_PSHM_NODES=${GASNET_PSHM_NODES}
set -x
./build/run-fpm.sh run --verbose --example hello
./build/run-fpm.sh run --verbose --example stop_with_no_code
Expand All @@ -128,6 +132,8 @@ jobs:

- name: Run unit tests
run: |
echo GASNET_PSHM_NODES=${GASNET_PSHM_NODES}
echo SUBJOB_PREFIX=${SUBJOB_PREFIX}
set -x
./build/run-fpm.sh test --verbose -- -d

16 changes: 8 additions & 8 deletions docs/implementation-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,17 @@ in the following sections.
---

## Collectives
### Support = partial (...)
### Support = **YES**

| Procedure | Status | Notes |
|-----------|--------|-------|
| `prif_co_broadcast` | **YES** | |
| `prif_co_max` | *partial* | only supports 32-bit and 64-bit numeric types |
| `prif_co_max_character` | no | procedure not yet added to Caffeine |
| `prif_co_min` | *partial* | only supports 32-bit and 64-bit numeric types |
| `prif_co_min_character` | no | procedure not yet added to Caffeine |
| `prif_co_sum` | *partial* | only supports 32-bit and 64-bit numeric types |
| `prif_co_reduce` | *partial* | only supports intrinsic types (no support for derived types), interface not yet updated to v0.5 |
| `prif_co_broadcast` | **YES** | |
| `prif_co_max` | **YES** | |
| `prif_co_max_character` | **YES** | |
| `prif_co_min` | **YES** | |
| `prif_co_min_character` | **YES** | |
| `prif_co_sum` | **YES** | |
| `prif_co_reduce` | **YES** | |

---

Expand Down
6 changes: 3 additions & 3 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -416,9 +416,9 @@ exit_if_pkg_config_pc_file_missing "caffeine"
user_compiler_flags="${CPPFLAGS:-} ${FFLAGS:-}"

compiler_version=$($FPM_FC --version)
if [[ $compiler_version == *llvm* ]]; then
compiler_flag="-mmlir -allow-assumed-rank -g -Ofast"
else
if [[ $compiler_version == flang* ]]; then
compiler_flag="-g -O3"
else # assume gfortran
compiler_flag="-g -O3 -ffree-line-length-0 -Wno-unused-dummy-argument"
fi
compiler_flag+=" -DASSERT_MULTI_IMAGE -DASSERT_PARALLEL_CALLBACKS"
Expand Down
251 changes: 140 additions & 111 deletions src/caffeine/caffeine.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <gasnet_vis.h>
#include "gasnet_safe.h"
#include <gasnet_tools.h>
#include <gasnet_portable_platform.h>
#include <ISO_Fortran_binding.h>
#include "../dlmalloc/dl_malloc_caf.h"
#include "../dlmalloc/dl_malloc.h"
Expand All @@ -28,14 +29,6 @@ static gex_TM_t myworldteam;
typedef void(*final_func_ptr)(void*, size_t) ;
typedef uint8_t byte;

#if __GNUC__ >= 12
#define float_Complex_workaround CFI_type_float_Complex
#define double_Complex_workaround CFI_type_double_Complex
#else
#define float_Complex_workaround 2052
#define double_Complex_workaround 4100
#endif

// ---------------------------------------------------
int caf_this_image(gex_TM_t gex_team)
{
Expand Down Expand Up @@ -117,7 +110,7 @@ void* caf_allocate(mspace heap, size_t bytes)
return allocated_space;
}

void* caf_allocate_remaining(mspace heap, void** allocated_space, size_t* allocated_size)
void caf_allocate_remaining(mspace heap, void** allocated_space, size_t* allocated_size)
{
// The following doesn't necessarily give us all remaining space
// nor necessarily the largest open space, but in practice is likely
Expand Down Expand Up @@ -204,9 +197,18 @@ void caf_sync_team( gex_TM_t team ) {
//-------------------------------------------------------------------

void caf_co_reduce(
CFI_cdesc_t* a_desc, int result_image, int num_elements, gex_Coll_ReduceFn_t user_op, void* client_data, gex_TM_t team
)
{
CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_Coll_ReduceFn_t user_op, void* client_data, gex_TM_t team
) {
assert(a_desc);
assert(result_image >= 0);
assert(num_elements > 0);
assert(user_op);
#if PLATFORM_COMPILER_GNU
// gfortran 13.2 & 14 - c_funloc is non-compliant
// it erroneously generates a non-callable pointer to a pointer to the subroutine
// Here we undo that incorrect extra level of indirection
user_op = *(gex_Coll_ReduceFn_t *)user_op;
#endif
char* a_address = (char*) a_desc->base_addr;
size_t c_sizeof_a = a_desc->elem_len;
gex_Event_t ev;
Expand Down Expand Up @@ -236,132 +238,159 @@ void caf_co_broadcast(CFI_cdesc_t * a_desc, int source_image, int num_elements,
gex_Event_Wait(ev);
}

void caf_co_max(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team)
{
gex_DT_t a_type;

switch (a_desc->type)
{
case CFI_type_int32_t: a_type = GEX_DT_I32; break;
case CFI_type_int64_t: a_type = GEX_DT_I64; break;
case CFI_type_float: a_type = GEX_DT_FLT; break;
case CFI_type_double: a_type = GEX_DT_DBL; break;
default:
gasnett_fatalerror("Unrecognized type: %d", (int)a_desc->type);
}
//-------------------------------------------------------------------
// Typed computational collective subroutines
//-------------------------------------------------------------------

char* a_address = (char*) a_desc->base_addr;
// Convert CFI_type_t to the corresponding GEX reduction data type
// returns the size of the native type
static size_t CFI_to_GEX_DT(CFI_type_t cfi_type, gex_DT_t *gex_dt, int *complex_scale) {
assert(gex_dt);

if_pf (complex_scale) *complex_scale = 1;

switch (cfi_type) {
// real cases
case CFI_type_float: *gex_dt = GEX_DT_FLT; return 4;
case CFI_type_double: *gex_dt = GEX_DT_DBL; return 8;

// complex cases
case CFI_type_float_Complex: *gex_dt = GEX_DT_FLT;
if (!complex_scale) gasnett_fatalerror("This operation does not support complex types");
*complex_scale = 2;
return 8;
case CFI_type_double_Complex: *gex_dt = GEX_DT_DBL;
if (!complex_scale) gasnett_fatalerror("This operation does not support complex types");
*complex_scale = 2;
return 16;
// no support for CFI_type_long_double or CFI_type_long_double_Complex
}

size_t c_sizeof_a = a_desc->elem_len;
// integer types
#define CFI_INT_CASE(cfi_type_constant, c_type) \
else if (cfi_type == cfi_type_constant) { \
if (sizeof(c_type) == 4) *gex_dt = GEX_DT_I32; \
else if (sizeof(c_type) > 8) \
gasnett_fatalerror("Unsupported wide integer type: %d", (int)cfi_type); \
else *gex_dt = GEX_DT_I64; \
return sizeof(c_type); \
}
// these must be handled outside the switch because there are duplicates
// for the same reason, start with the most likely candidates
if (0) ;
CFI_INT_CASE(CFI_type_int64_t, int64_t)
CFI_INT_CASE(CFI_type_int32_t, int32_t)
CFI_INT_CASE(CFI_type_int16_t, int16_t)
CFI_INT_CASE(CFI_type_int8_t, int8_t)
CFI_INT_CASE(CFI_type_Bool, _Bool)
CFI_INT_CASE(CFI_type_char, char)
CFI_INT_CASE(CFI_type_signed_char, signed char)
CFI_INT_CASE(CFI_type_short, short int)
CFI_INT_CASE(CFI_type_int, int)
CFI_INT_CASE(CFI_type_long, long int)
CFI_INT_CASE(CFI_type_long_long, long long int)
CFI_INT_CASE(CFI_type_size_t, size_t)
CFI_INT_CASE(CFI_type_int_least8_t, int_least8_t)
CFI_INT_CASE(CFI_type_int_least16_t, int_least16_t)
CFI_INT_CASE(CFI_type_int_least32_t, int_least32_t)
CFI_INT_CASE(CFI_type_int_least64_t, int_least64_t)
CFI_INT_CASE(CFI_type_int_fast8_t, int_fast8_t)
CFI_INT_CASE(CFI_type_int_fast16_t, int_fast16_t)
CFI_INT_CASE(CFI_type_int_fast32_t, int_fast32_t)
CFI_INT_CASE(CFI_type_int_fast64_t, int_fast64_t)
CFI_INT_CASE(CFI_type_intmax_t, intmax_t)
CFI_INT_CASE(CFI_type_intptr_t, intptr_t)
CFI_INT_CASE(CFI_type_ptrdiff_t, ptrdiff_t)
#undef CFI_INT_CASE

gasnett_fatalerror("Unrecognized type: %d", (int)cfi_type);
}

gex_Event_t ev;
// widen an 8- or 16-bit integer array to 64-bit
static int64_t *widen_from_array(CFI_cdesc_t* a_desc, size_t num_elements) {
assert(a_desc);
int64_t *res = malloc(8 * num_elements);
assert(res);
if (a_desc->elem_len == 1) {
int8_t *src = a_desc->base_addr;
for (size_t i=0; i < num_elements; i++) res[i] = src[i];
} else if (a_desc->elem_len == 2) {
int16_t *src = a_desc->base_addr;
for (size_t i=0; i < num_elements; i++) res[i] = src[i];
} else gasnett_fatalerror("Logic error in widen_from_array: %i", a_desc->elem_len);
return res;
}

if (result_image) {
ev = gex_Coll_ReduceToOneNB(team, result_image-1, a_address, a_address, a_type, c_sizeof_a, num_elements, GEX_OP_MAX, NULL, NULL, 0);
} else {
ev = gex_Coll_ReduceToAllNB(team, a_address, a_address, a_type, c_sizeof_a, num_elements, GEX_OP_MAX, NULL, NULL, 0);
}
gex_Event_Wait(ev);
// narrow a 64-bit integer array result back to 8- or 16-bit
static void narrow_to_array(CFI_cdesc_t* a_desc, int64_t *src, size_t num_elements) {
assert(a_desc);
assert(src);
if (a_desc->elem_len == 1) {
int8_t *dst = a_desc->base_addr;
for (size_t i=0; i < num_elements; i++) dst[i] = src[i];
} else if (a_desc->elem_len == 2) {
int16_t *dst = a_desc->base_addr;
for (size_t i=0; i < num_elements; i++) dst[i] = src[i];
} else gasnett_fatalerror("Logic error in narrow_to_array: %i", a_desc->elem_len);
free(src);
}

void caf_co_min(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team)
{
gex_DT_t a_type;

switch (a_desc->type)
{
case CFI_type_int32_t: a_type = GEX_DT_I32; break;
case CFI_type_int64_t: a_type = GEX_DT_I64; break;
case CFI_type_float: a_type = GEX_DT_FLT; break;
case CFI_type_double: a_type = GEX_DT_DBL; break;
default:
gasnett_fatalerror("Unrecognized type: %d", (int)a_desc->type);
GASNETT_INLINE(caf_co_common)
void caf_co_common(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team, gex_OP_t g_op) {

int complex_scale = 1;
gex_DT_t g_dt;
size_t elem_sz = CFI_to_GEX_DT(a_desc->type, &g_dt,
(g_op == GEX_OP_ADD ? &complex_scale : NULL));

int64_t * bounce_buffer = NULL;
void * g_addr = a_desc->base_addr;
size_t g_elem_sz = a_desc->elem_len;
assert(g_elem_sz == elem_sz);

if_pf (complex_scale != 1) { // complex input, only permitted in prif_co_sum
assert(g_op == GEX_OP_ADD);
assert(complex_scale == 2);
assert(g_elem_sz == 8 || g_elem_sz == 16);
g_elem_sz >>= 1;
num_elements <<= 1;
} else if_pf(elem_sz < 4) {
bounce_buffer = widen_from_array(a_desc, num_elements);
assert(g_dt == GEX_DT_I64);
g_elem_sz = 8;
g_addr = bounce_buffer;
}

char* a_address = (char*) a_desc->base_addr;

size_t c_sizeof_a = a_desc->elem_len;

gex_Event_t ev;

if (result_image) {
ev = gex_Coll_ReduceToOneNB(team, result_image-1, a_address, a_address, a_type, c_sizeof_a, num_elements, GEX_OP_MIN, NULL, NULL, 0);
ev = gex_Coll_ReduceToOneNB(team, result_image-1, g_addr, g_addr, g_dt, g_elem_sz, num_elements, g_op, NULL, NULL, 0);
} else {
ev = gex_Coll_ReduceToAllNB(team, a_address, a_address, a_type, c_sizeof_a, num_elements, GEX_OP_MIN, NULL, NULL, 0);
ev = gex_Coll_ReduceToAllNB(team, g_addr, g_addr, g_dt, g_elem_sz, num_elements, g_op, NULL, NULL, 0);
}
gex_Event_Wait(ev);
}

void caf_co_sum(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team)
{
gex_DT_t a_type;

size_t c_sizeof_a = a_desc->elem_len;

switch (a_desc->type)
{
case CFI_type_int32_t: a_type = GEX_DT_I32; break;
case CFI_type_int64_t: a_type = GEX_DT_I64; break;
case CFI_type_float: a_type = GEX_DT_FLT; break;
case CFI_type_double: a_type = GEX_DT_DBL; break;
case float_Complex_workaround: a_type = GEX_DT_FLT; num_elements *= 2; c_sizeof_a /= 2; break;
case double_Complex_workaround: a_type = GEX_DT_DBL; num_elements *= 2; c_sizeof_a /= 2; break;
default:
gasnett_fatalerror("Unrecognized type: %d", (int)a_desc->type);
}
if_pf(bounce_buffer) narrow_to_array(a_desc, bounce_buffer, num_elements);
}

char* a_address = (char*) a_desc->base_addr;

gex_Event_t ev;

if (result_image) {
ev = gex_Coll_ReduceToOneNB(team, result_image-1, a_address, a_address, a_type, c_sizeof_a, num_elements, GEX_OP_ADD, NULL, NULL, 0);
} else {
ev = gex_Coll_ReduceToAllNB(team, a_address, a_address, a_type, c_sizeof_a, num_elements, GEX_OP_ADD, NULL, NULL, 0);
}
gex_Event_Wait(ev);
void caf_co_max(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team) {
caf_co_common(a_desc, result_image, num_elements, team, GEX_OP_MAX);
}

bool caf_same_cfi_type(CFI_cdesc_t* a_desc, CFI_cdesc_t* b_desc)
{
if (a_desc->type == b_desc->type) return true;
return false;
void caf_co_min(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team) {
caf_co_common(a_desc, result_image, num_elements, team, GEX_OP_MIN);
}

size_t caf_elem_len(CFI_cdesc_t* a_desc)
{
return a_desc->elem_len;
void caf_co_sum(CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_TM_t team) {
caf_co_common(a_desc, result_image, num_elements, team, GEX_OP_ADD);
}

//-------------------------------------------------------------------
void caf_form_team(gex_TM_t current_team, gex_TM_t* new_team, int64_t team_number, int new_index)
{
// GASNet color argument is int (32-bit), check for value truncation:
assert((unsigned int)team_number == team_number);
gex_TM_Split(new_team, current_team, team_number, new_index, NULL, 0, GEX_FLAG_TM_NO_SCRATCH);
}

bool caf_numeric_type(CFI_cdesc_t* a_desc)
{
switch (a_desc->type)
{
case CFI_type_int32_t: return true;
case CFI_type_int64_t: return true;
case CFI_type_float: return true;
case CFI_type_double: return true;
case float_Complex_workaround: return true;
case double_Complex_workaround: return true;
default: return false;
}
}

#ifdef __GNUC__
bool caf_is_f_string(CFI_cdesc_t* a_desc){
if ( (a_desc->type - 5) % 256 == 0) return true;
return false;
}
#else // The code below is untested but believed to conform with the Fortran 2018 standard.
bool caf_is_f_string(CFI_cdesc_t* a_desc){
if (a_desc->type == CFI_type_char) return true;
return false;
}
#endif
Loading