diff --git a/install.sh b/install.sh index 0fc02061a..c13e43de3 100755 --- a/install.sh +++ b/install.sh @@ -360,11 +360,13 @@ RUN_FPM_SH="build/run-fpm.sh" cat << EOF > $RUN_FPM_SH #!/bin/sh #-- DO NOT EDIT -- created by caffeine/install.sh -"${FPM}" "\$@" \ ---compiler "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_FC`" \ ---c-compiler "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_CC`" \ ---c-flag "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_CFLAGS`" \ ---link-flag "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_LDFLAGS`" +fpm_sub_cmd=\$1; shift +"${FPM}" "\$fpm_sub_cmd" \\ +--compiler "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_FC`" \\ +--c-compiler "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_CC`" \\ +--c-flag "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_CFLAGS`" \\ +--link-flag "`$PKG_CONFIG caffeine --variable=CAFFEINE_FPM_LDFLAGS`" \\ +"\$@" EOF chmod u+x $RUN_FPM_SH diff --git a/manifest/fpm.toml.template b/manifest/fpm.toml.template index 588401cba..39435e165 100644 --- a/manifest/fpm.toml.template +++ b/manifest/fpm.toml.template @@ -6,7 +6,7 @@ maintainer = "rouson@lbl.gov" copyright = "2021-2024 UC Regents" [dev-dependencies] -veggies = {git = "https://gitlab.com/everythingfunctional/veggies", tag = "v1.0.5"} +veggies = {git = "https://gitlab.com/everythingfunctional/veggies", tag = "v1.1.2"} iso_varying_string = {git = "https://gitlab.com/everythingfunctional/iso_varying_string.git", tag = "v3.0.4"} [build] diff --git a/src/caffeine/allocation_s.f90 b/src/caffeine/allocation_s.f90 index 3e2935bd9..b2b470069 100644 --- a/src/caffeine/allocation_s.f90 +++ b/src/caffeine/allocation_s.f90 @@ -7,7 +7,6 @@ c_f_procpointer, & c_loc, & c_associated, & - c_null_ptr, & c_null_funptr implicit none @@ -28,6 +27,12 @@ coarray_size = product(ubounds-lbounds+1)*element_size me = caf_this_image(current_team%info%gex_team) + if (caf_have_child_teams()) then + ! Free the child team space to make sure we have space to allocate the coarray + if (me == 1) then + call caf_deallocate(current_team%info%heap_mspace, current_team%info%child_heap_info%allocated_memory) + end if + end if if (me == 1) then handle_size = c_sizeof(unused) total_size = handle_size + coarray_size @@ -49,9 +54,12 @@ coarray_handle%info%final_func = final_func coarray_handle%info%lcobounds(1:size(lcobounds)) = lcobounds coarray_handle%info%ucobounds(1:size(ucobounds)) = ucobounds - call add_to_team_list(current_team, coarray_handle) + call add_to_team_list(coarray_handle) allocated_memory = coarray_handle%info%coarray_data + if (caf_have_child_teams()) then + call caf_establish_child_heap + end if end procedure module procedure prif_allocate @@ -118,26 +126,27 @@ call caf_deallocate(current_team%info%heap_mspace, c_loc(coarray_handles(i)%info)) end do if (present(stat)) stat = 0 + if (caf_have_child_teams()) then + ! reclaim any free space possible for the child teams to use + if (caf_this_image(current_team%info%gex_team) == 1) then + call caf_deallocate(current_team%info%heap_mspace, current_team%info%child_heap_info%allocated_memory) + end if + call caf_establish_child_heap + end if end procedure module procedure prif_deallocate call caf_deallocate(non_symmetric_heap_mspace, mem) end procedure - subroutine add_to_team_list(current_team, coarray_handle) - type(prif_team_type), intent(inout) :: current_team - type(prif_coarray_handle), intent(inout) :: coarray_handle + subroutine add_to_team_list(coarray_handle) + type(prif_coarray_handle), intent(in) :: coarray_handle if (associated(current_team%info%coarrays)) then current_team%info%coarrays%previous_handle = c_loc(coarray_handle%info) coarray_handle%info%next_handle = c_loc(current_team%info%coarrays) - coarray_handle%info%previous_handle = c_null_ptr - current_team%info%coarrays => coarray_handle%info - else - current_team%info%coarrays => coarray_handle%info - coarray_handle%info%next_handle = c_null_ptr - coarray_handle%info%previous_handle = c_null_ptr end if + current_team%info%coarrays => coarray_handle%info end subroutine subroutine remove_from_team_list(coarray_handle) @@ -145,9 +154,17 @@ subroutine remove_from_team_list(coarray_handle) type(handle_data), pointer :: tmp_data + if (& + .not.c_associated(coarray_handle%info%previous_handle) & + .and. .not.c_associated(coarray_handle%info%next_handle)) then + nullify(current_team%info%coarrays) + return + end if if (c_associated(coarray_handle%info%previous_handle)) then call c_f_pointer(coarray_handle%info%previous_handle, tmp_data) tmp_data%next_handle = coarray_handle%info%next_handle + else + call c_f_pointer(coarray_handle%info%next_handle, current_team%info%coarrays) end if if (c_associated(coarray_handle%info%next_handle)) then call c_f_pointer(coarray_handle%info%next_handle, tmp_data) diff --git a/src/caffeine/caffeine.c b/src/caffeine/caffeine.c index c355a04fe..b4abda36f 100644 --- a/src/caffeine/caffeine.c +++ b/src/caffeine/caffeine.c @@ -41,9 +41,14 @@ int caf_this_image(gex_TM_t team) } // NOTE: gex_TM_T is a typedef to a C pointer, so the `gex_TM_t* initial_team` arg in the C signature matches the BIND(C) interface of an `intent(out)` arg of type `c_ptr` for the same argument -void caf_caffeinate(mspace* symmetric_heap, intptr_t* symmetric_heap_start, mspace* non_symmetric_heap, gex_TM_t* initial_team) -{ - GASNET_SAFE(gex_Client_Init(&myclient, &myep, &myworldteam, "caffeine", NULL, NULL, 0)); +void caf_caffeinate( + mspace* symmetric_heap, + intptr_t* symmetric_heap_start, + intptr_t* symmetric_heap_size, + mspace* non_symmetric_heap, + gex_TM_t* initial_team +) { + GASNET_SAFE(gex_Client_Init(&myclient, &myep, initial_team, "caffeine", NULL, NULL, 0)); // query largest possible segment GASNet can give us of the same size across all processes: size_t max_seg = gasnet_getMaxGlobalSegmentSize(); @@ -59,7 +64,7 @@ void caf_caffeinate(mspace* symmetric_heap, intptr_t* symmetric_heap_start, mspa // TODO: issue a console warning here instead of silently capping segsz = MIN(segsz,max_seg); - GASNET_SAFE(gex_Segment_Attach(&mysegment, myworldteam, segsz)); + GASNET_SAFE(gex_Segment_Attach(&mysegment, *initial_team, segsz)); *symmetric_heap_start = (intptr_t)gex_Segment_QueryAddr(mysegment); size_t total_heap_size = gex_Segment_QuerySize(mysegment); @@ -72,16 +77,16 @@ void caf_caffeinate(mspace* symmetric_heap, intptr_t* symmetric_heap_start, mspa assert(non_symmetric_fraction > 0 && non_symmetric_fraction < 1); // TODO: real error reporting size_t non_symmetric_heap_size = total_heap_size * non_symmetric_fraction; - size_t symmetric_heap_size = total_heap_size - non_symmetric_heap_size; - intptr_t non_symmetric_heap_start = *symmetric_heap_start + symmetric_heap_size; + *symmetric_heap_size = total_heap_size - non_symmetric_heap_size; + intptr_t non_symmetric_heap_start = *symmetric_heap_start + *symmetric_heap_size; - if (caf_this_image(myworldteam) == 1) { - *symmetric_heap = create_mspace_with_base((void*)*symmetric_heap_start, symmetric_heap_size, 0); - mspace_set_footprint_limit(*symmetric_heap, symmetric_heap_size); + if (caf_this_image(*initial_team) == 1) { + *symmetric_heap = create_mspace_with_base((void*)*symmetric_heap_start, *symmetric_heap_size, 0); + mspace_set_footprint_limit(*symmetric_heap, *symmetric_heap_size); } *non_symmetric_heap = create_mspace_with_base((void*)non_symmetric_heap_start, non_symmetric_heap_size, 0); mspace_set_footprint_limit(*non_symmetric_heap, non_symmetric_heap_size); - *initial_team = myworldteam; + myworldteam = *initial_team; } void caf_decaffeinate(int exit_code) @@ -96,7 +101,24 @@ int caf_num_images(gex_TM_t team) void* caf_allocate(mspace heap, size_t bytes) { - return mspace_memalign(heap, 8, bytes); + void* allocated_space = mspace_memalign(heap, 8, bytes); + if (!allocated_space) // uh-oh, something went wrong.. + gasnett_fatalerror("caf_allocate failed to mspace_memalign(%"PRIuSZ")", + bytes); + return allocated_space; +} + +void* caf_allocate_remaining(mspace heap, void** allocated_space, size_t* allocated_size) +{ + // The following doesn't necessarily give us all remaining space + // nor necessarily the largest open space, but in practice is likely + // to work out that way + struct mallinfo heap_info = mspace_mallinfo(heap); + *allocated_size = heap_info.keepcost * 0.9f; + *allocated_space = mspace_memalign(heap, 8, *allocated_size); + if (!*allocated_space) // uh-oh, something went wrong.. + gasnett_fatalerror("caf_allocate_remaining failed to mspace_memalign(%"PRIuSZ")", + *allocated_size); } void caf_deallocate(mspace heap, void* mem) @@ -104,6 +126,12 @@ void caf_deallocate(mspace heap, void* mem) mspace_free(heap, mem); } +void caf_establish_mspace(mspace* heap, void* heap_start, size_t heap_size) +{ + *heap = create_mspace_with_base(heap_start, heap_size, 0); + mspace_set_footprint_limit(*heap, heap_size); +} + // take address in a segment and convert to an address on given image intptr_t caf_convert_base_addr(void* addr, int image) { @@ -259,6 +287,11 @@ size_t caf_elem_len(CFI_cdesc_t* a_desc) return a_desc->elem_len; } +void caf_form_team(gex_TM_t current_team, gex_TM_t* new_team, intmax_t team_number, int new_index) +{ + gex_TM_Split(new_team, current_team, team_number, new_index, NULL, 0, GEX_FLAG_TM_NO_SCRATCH); +} + bool caf_numeric_type(CFI_cdesc_t* a_desc) { switch (a_desc->type) diff --git a/src/caffeine/collective_subroutines/co_reduce_s.f90 b/src/caffeine/collective_subroutines/co_reduce_s.f90 index 1eda074eb..77abd2a72 100644 --- a/src/caffeine/collective_subroutines/co_reduce_s.f90 +++ b/src/caffeine/collective_subroutines/co_reduce_s.f90 @@ -2,7 +2,7 @@ ! Terms of use are as specified in LICENSE.txt submodule(prif:prif_private_s) co_reduce_s use iso_c_binding, only : & - c_loc, c_null_ptr, c_funloc, c_associated, c_f_pointer, c_f_procpointer, c_char, c_int64_t, c_double, & + c_loc, c_funloc, c_associated, c_f_pointer, c_f_procpointer, c_char, c_int64_t, c_double, & c_float, c_int32_t implicit none diff --git a/src/caffeine/prif_private_s.f90 b/src/caffeine/prif_private_s.f90 index 2f681ca11..65bcab992 100644 --- a/src/caffeine/prif_private_s.f90 +++ b/src/caffeine/prif_private_s.f90 @@ -18,11 +18,17 @@ pure module subroutine assert(assertion, description, diagnostics) ! ________ Program initiation and finalization ___________ - subroutine caf_caffeinate(symmetric_heap, symmetric_heap_start, non_symmetric_heap, initial_team) bind(C) + subroutine caf_caffeinate( & + symmetric_heap, & + symmetric_heap_start, & + symmetric_heap_size, & + non_symmetric_heap, & + initial_team) & + bind(C) import c_ptr, c_intptr_t implicit none type(c_ptr), intent(out) :: symmetric_heap - integer(c_intptr_t), intent(out) :: symmetric_heap_start + integer(c_intptr_t), intent(out) :: symmetric_heap_start, symmetric_heap_size type(c_ptr), intent(out) :: non_symmetric_heap type(c_ptr), intent(out) :: initial_team end subroutine @@ -62,6 +68,14 @@ function caf_allocate(mspace, bytes) result(ptr) bind(c) type(c_ptr) :: ptr end function + subroutine caf_allocate_remaining(mspace, allocated_space, allocated_size) bind(c) + import c_size_t, c_ptr + implicit none + type(c_ptr), intent(in), value :: mspace + type(c_ptr), intent(out) :: allocated_space + integer(c_size_t), intent(out) :: allocated_size + end subroutine + subroutine caf_deallocate(mspace, mem) bind(c) import c_ptr implicit none @@ -69,6 +83,14 @@ subroutine caf_deallocate(mspace, mem) bind(c) type(c_ptr), intent(in), value :: mem end subroutine + subroutine caf_establish_mspace(mspace, mem, mem_size) bind(c) + import c_size_t, c_ptr + implicit none + type(c_ptr), intent(out) :: mspace + type(c_ptr), intent(in), value :: mem + integer(c_size_t), intent(in), value :: mem_size + end subroutine + ! ___________________ PRIF Queries ______________________ module function caf_convert_base_addr(addr, image) result(ptr) bind(c) @@ -185,6 +207,15 @@ pure function caf_elem_len(a) result(a_elem_len) bind(C) integer(c_size_t), target :: a_elem_len end function + subroutine caf_form_team(current_team, new_team, team_number, new_index) bind(C) + !! void caf_form_team(gex_TM_t* current_team, gex_TM_t* new_team, intmax_t team_number, int new_index); + import c_ptr, c_int, c_intmax_t + type(c_ptr), intent(in), value :: current_team + type(c_ptr), intent(out) :: new_team + integer(c_intmax_t), intent(in), value :: team_number + integer(c_int), intent(in), value :: new_index + end subroutine + end interface contains @@ -238,4 +269,20 @@ pure function optional_value(var) result(c_val) end if end function + subroutine caf_establish_child_heap + if (caf_this_image(current_team%info%gex_team) == 1) then + call caf_allocate_remaining( & + current_team%info%heap_mspace, & + current_team%info%child_heap_info%allocated_memory, & + current_team%info%child_heap_info%size) + current_team%info%child_heap_info%offset = & + as_int(current_team%info%child_heap_info%allocated_memory) - current_team%info%heap_start + end if + call prif_co_broadcast(current_team%info%child_heap_info, 1) + end subroutine + + logical function caf_have_child_teams() + caf_have_child_teams = associated(current_team%info%child_heap_info) + end function + end submodule prif_private_s diff --git a/src/caffeine/program_startup_s.F90 b/src/caffeine/program_startup_s.F90 index 8d75f400b..e5f8fbdc6 100644 --- a/src/caffeine/program_startup_s.F90 +++ b/src/caffeine/program_startup_s.F90 @@ -1,7 +1,6 @@ ! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt submodule(prif:prif_private_s) program_startup_s - implicit none contains @@ -14,6 +13,7 @@ call caf_caffeinate( & initial_team%heap_mspace, & initial_team%heap_start, & + initial_team%heap_size, & non_symmetric_heap_mspace, & initial_team%gex_team) current_team%info => initial_team diff --git a/src/caffeine/teams_s.f90 b/src/caffeine/teams_s.f90 index 870d5d967..b3f393fb6 100644 --- a/src/caffeine/teams_s.f90 +++ b/src/caffeine/teams_s.f90 @@ -1,20 +1,79 @@ ! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt submodule(prif:prif_private_s) teams_s - implicit none + use iso_c_binding, only: c_null_funptr, c_f_pointer, c_loc + implicit none contains module procedure prif_change_team - call unimplemented("prif_change_team") + team%info%heap_start = current_team%info%child_heap_info%offset + current_team%info%heap_start + team%info%heap_size = current_team%info%child_heap_info%size + if (caf_this_image(team%info%gex_team) == 1) then ! need to setup the heap for the team + call caf_establish_mspace( & + team%info%heap_mspace, & + as_c_ptr(team%info%heap_start), & + current_team%info%child_heap_info%size) + end if + current_team = team + if (caf_have_child_teams()) then ! need to establish heap for child teams + call caf_establish_child_heap + end if end procedure module procedure prif_end_team - call unimplemented("prif_end_team") + type(prif_coarray_handle), allocatable :: teams_coarrays(:) + integer :: num_coarrays_in_team, i + type(handle_data), pointer :: tmp_data + + ! deallocate the teams coarrays + ! Currently we work to batch together all the deallocations into a single call + ! to prif_deallocate_coarray(), in the hope it can amortize some costs + num_coarrays_in_team = 0 + tmp_data => current_team%info%coarrays + do while (associated(tmp_data)) + num_coarrays_in_team = num_coarrays_in_team + 1 + call c_f_pointer(tmp_data%next_handle, tmp_data) + end do + if (num_coarrays_in_team > 0) then + allocate(teams_coarrays(num_coarrays_in_team)) + tmp_data => current_team%info%coarrays + do i = 1, num_coarrays_in_team + teams_coarrays(i)%info => tmp_data + call c_f_pointer(tmp_data%next_handle, tmp_data) + end do + call prif_deallocate_coarray(teams_coarrays, stat, errmsg, errmsg_alloc) + end if + nullify(current_team%info%coarrays) + + ! set the current team back to the parent team + current_team%info => current_team%info%parent_team end procedure module procedure prif_form_team - call unimplemented("prif_form_team") + ! indicates this is the first time we're creating a child team + if (.not.caf_have_child_teams()) then + allocate(current_team%info%child_heap_info) + call caf_establish_child_heap + end if + + block + integer(c_int) :: new_index_ + if (present(new_index)) then + new_index_ = new_index + else + new_index_ = 1 + end if + +! DOB: The two allocates in this procedure do not have a corresponding deallocate, +! because Fortran lacks a destroy team operation. We consider this to represent +! a defect in the Fortran design of teams. +! As such, team-specific state such as these data structures and the corresponding +! team-related data structures in GASNet can never be reclaimed. + allocate(team%info) + team%info%parent_team => current_team%info + call caf_form_team(current_team%info%gex_team, team%info%gex_team, team_number, new_index_) + end block end procedure module procedure prif_get_team diff --git a/src/prif.f90 b/src/prif.f90 index 4ec2cd489..4ee2a4e30 100644 --- a/src/prif.f90 +++ b/src/prif.f90 @@ -2,7 +2,7 @@ ! Terms of use are as specified in LICENSE.txt module prif - use iso_c_binding, only: c_int, c_bool, c_intptr_t, c_intmax_t, c_ptr, c_funptr, c_size_t, c_ptrdiff_t + use iso_c_binding, only: c_int, c_bool, c_intptr_t, c_intmax_t, c_ptr, c_funptr, c_size_t, c_ptrdiff_t, c_null_ptr implicit none @@ -1023,7 +1023,7 @@ module subroutine prif_atomic_ref_logical_indirect(image_num, atom_remote_ptr, v integer(c_size_t) :: coarray_size integer(c_size_t) :: element_size type(c_funptr) :: final_func - type(c_ptr) :: previous_handle, next_handle + type(c_ptr) :: previous_handle = c_null_ptr, next_handle = c_null_ptr integer(c_intmax_t) :: lcobounds(15), ucobounds(15) end type @@ -1031,8 +1031,15 @@ module subroutine prif_atomic_ref_logical_indirect(image_num, atom_remote_ptr, v type(c_ptr) :: gex_team type(c_ptr) :: heap_mspace integer(c_intptr_t) :: heap_start + integer(c_size_t) :: heap_size type(team_data), pointer :: parent_team => null() type(handle_data), pointer :: coarrays => null() + type(child_team_info), pointer :: child_heap_info => null() end type + type :: child_team_info + type(c_ptr) :: allocated_memory + integer(c_ptrdiff_t) :: offset + integer(c_size_t) :: size + end type end module prif diff --git a/test/caf_teams_test.f90 b/test/caf_teams_test.f90 new file mode 100644 index 000000000..dcaa449c5 --- /dev/null +++ b/test/caf_teams_test.f90 @@ -0,0 +1,67 @@ +module caf_teams_test + use iso_c_binding, only: c_size_t, c_ptr, c_intmax_t, c_null_funptr + use prif, only: & + prif_coarray_handle, & + prif_allocate_coarray, & + prif_deallocate_coarray, & + prif_this_image_no_coarray, & + prif_num_images, & + prif_team_type, & + prif_form_team, & + prif_change_team, & + prif_end_team + use veggies, only: result_t, test_item_t, assert_equals, describe, it, succeed + + implicit none + private + public :: test_caf_teams +contains + function test_caf_teams() result(tests) + type(test_item_t) :: tests + + tests = describe( & + "Teams", & + [ it("can be created, changed to, and allocate coarrays", check_teams) & + ]) + end function + + function check_teams() result(result_) + type(result_t) :: result_ + + ! TODO: use final_func to observe automatic deallocation of coarrays + integer :: dummy_element, initial_num_imgs, num_imgs, me, i + integer(c_size_t) :: element_size + integer(c_intmax_t) :: which_team + integer, parameter :: num_coarrays = 4 + type(prif_coarray_handle) :: coarrays(num_coarrays) + type(c_ptr) :: allocated_memory + type(prif_team_type) :: team + + call prif_this_image_no_coarray(this_image=me) + call prif_num_images(num_images=initial_num_imgs) + which_team = merge(1_c_intmax_t, 2_c_intmax_t, mod(me, 2) == 0) + element_size = int(storage_size(dummy_element)/8, c_size_t) + call prif_form_team(team_number = which_team, team = team) + call prif_change_team(team) + call prif_num_images(num_images=num_imgs) + result_ = assert_equals( & + initial_num_imgs/2 + mod(initial_num_imgs,2)*(int(which_team)-1), & + num_imgs, & + "Team has correct number of images") + do i = 1, num_coarrays + call prif_allocate_coarray( & + lcobounds = [1_c_intmax_t], & + ucobounds = [int(num_imgs, c_intmax_t)], & + lbounds = [integer(c_intmax_t)::], & + ubounds = [integer(c_intmax_t)::], & + element_size = element_size, & + final_func = c_null_funptr, & + coarray_handle = coarrays(i), & + allocated_memory = allocated_memory) + end do + call prif_deallocate_coarray(coarrays(4:4)) + call prif_deallocate_coarray(coarrays(2:2)) + call prif_end_team() + result_ = result_.and.succeed("Seems to have worked") + end function +end module diff --git a/test/main.f90 b/test/main.f90 index 10eb12499..a8a057e4f 100644 --- a/test/main.f90 +++ b/test/main.f90 @@ -41,6 +41,9 @@ function run() result(passed) use caf_stop_test, only: & caf_stop_prif_this_image => & test_prif_this_image + use caf_teams_test, only: & + caf_teams_caf_teams => & + test_caf_teams use caf_this_image_test, only: & caf_this_image_prif_this_image_no_coarray => & test_prif_this_image_no_coarray @@ -51,7 +54,7 @@ function run() result(passed) logical :: passed type(test_item_t) :: tests - type(test_item_t) :: individual_tests(13) + type(test_item_t) :: individual_tests(14) individual_tests(1) = a00_caffeinate_caffeinate() individual_tests(2) = caf_allocate_prif_allocate() @@ -65,7 +68,8 @@ function run() result(passed) individual_tests(10) = caf_num_images_prif_num_images() individual_tests(11) = caf_rma_prif_rma() individual_tests(12) = caf_stop_prif_this_image() - individual_tests(13) = caf_this_image_prif_this_image_no_coarray() + individual_tests(13) = caf_teams_caf_teams() + individual_tests(14) = caf_this_image_prif_this_image_no_coarray() tests = test_that(individual_tests)