Skip to content
10 changes: 9 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ jobs:
CAF_IMAGES=$(( CAF_IMAGES / 2 )) ; \
done

- name: Run exit tests
- name: Run exit/failure tests
run: |
echo CAF_IMAGES=${CAF_IMAGES}
set -x
Expand All @@ -301,6 +301,14 @@ jobs:
./run-fpm.sh run --verbose --example fail_image 2>&1 | tee output ; \
test ${PIPESTATUS[0]} > 0 && grep -q "FAIL IMAGE" output \
)
( set +e ; \
./run-fpm.sh run --verbose --example out_of_memory 2>&1 | tee output ; \
test ${PIPESTATUS[0]} > 0 && grep -q "out of memory" output \
)
( set +e ; \
./run-fpm.sh run --verbose --example out_of_memory -- --coarray 2>&1 | tee output ; \
test ${PIPESTATUS[0]} > 0 && grep -q "out of memory" output \
)
unset GASNET_SPAWN_VERBOSE
for ((i=1; i<=4; i++)); do \
(set +e ; \
Expand Down
5 changes: 0 additions & 5 deletions docs/implementation-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ the labels in the Caffeine [issue tracker](https://github.com/BerkeleyLab/caffei
Caffeine contains definitions for all of the PRIF-relevant constants from ISO_FORTRAN_ENV and for
all of the PRIF-specific constants.

## `stat` and `errmsg` support

Many PRIF procedures have optional arguments `stat`, `errmsg`, and `errmsg_alloc`. These arguments
are accepted, but in some cases, the associated runtime behavior is not fully implemented.

## Program Startup and Shutdown

| Procedure | Status | Notes |
Expand Down
53 changes: 53 additions & 0 deletions example/support-test/out_of_memory.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
program out_of_memory
use iso_c_binding, only: c_bool, c_size_t, c_ptr, c_null_funptr, c_int64_t
use prif
implicit none

integer :: init_exit_code, me, i
integer(c_size_t) :: size_in_bytes = ishft(500_c_size_t, 40)
type(c_ptr) :: allocated_memory
logical :: coarray = .false.
character(len=256) :: arg

call prif_init(init_exit_code)
if (init_exit_code /= 0 .and. init_exit_code /= PRIF_STAT_ALREADY_INIT) then
call prif_error_stop(quiet=.false._c_bool, stop_code_char="program startup failed")
end if
call prif_this_image_no_coarray(this_image=me)

do i = 1, command_argument_count()
call get_command_argument(i, arg)

if (trim(arg) == "--coarray" .or. trim(arg) == "-c") then
coarray = .true.
else
read(arg, *) size_in_bytes
end if
end do

if (coarray) then
if (me == 1) print *, "prif_allocate_coarray: ", size_in_bytes, " bytes"
block
integer(c_int64_t), dimension(1) :: lcobounds, ucobounds
integer :: num_imgs
type(prif_coarray_handle) :: coarray_handle

call prif_num_images(num_images=num_imgs)
lcobounds(1) = 1
ucobounds(1) = num_imgs

call prif_allocate_coarray( &
lcobounds, ucobounds, size_in_bytes, c_null_funptr, &
coarray_handle, allocated_memory)
end block
else
if (me == 1) print *, "prif_allocate: ", size_in_bytes, " bytes"
call prif_sync_all()
call prif_allocate(size_in_bytes, allocated_memory)
end if


call prif_sync_all()
call prif_error_stop(quiet=.false._c_bool, stop_code_char="test failed")

end program
88 changes: 63 additions & 25 deletions src/caffeine/allocation_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,26 @@
descriptor_size = c_sizeof(unused)
total_size = descriptor_size + size_in_bytes
whole_block = caf_allocate(current_team%info%heap_mspace, total_size)
block_offset = as_int(whole_block) - current_team%info%heap_start
if (.not. c_associated(whole_block)) then
block_offset = -1 ! out of memory
else
block_offset = as_int(whole_block) - current_team%info%heap_start
end if
else
block_offset = 0
end if
call prif_sync_memory ! end the current segment
! Use a co_sum to aggregate broadcasing the information from image 1
! together with the team barrier spec-required by coarray allocation
call prif_co_sum(block_offset)
if (block_offset == -1) then ! out of memory - abort allocation attempt
call report_error(PRIF_STAT_OUT_OF_MEMORY, out_of_memory_message(size_in_bytes, .true.), &
stat, errmsg, errmsg_alloc)
if (caf_have_child_teams()) then ! unroll state change above before return
call caf_establish_child_heap
end if
return
end if
if (me /= 1) whole_block = as_c_ptr(current_team%info%heap_start + block_offset)

call c_f_pointer(whole_block, coarray_handle%info)
Expand All @@ -70,9 +82,51 @@
end procedure

module procedure prif_allocate
allocated_memory = caf_allocate(non_symmetric_heap_mspace, size_in_bytes)
type(c_ptr) :: mem

mem = caf_allocate(non_symmetric_heap_mspace, size_in_bytes)
if (.not. c_associated(mem)) then
call report_error(PRIF_STAT_OUT_OF_MEMORY, out_of_memory_message(size_in_bytes, .false.), &
stat, errmsg, errmsg_alloc)
else
allocated_memory = mem
end if
end procedure

function out_of_memory_message(size_in_bytes, symmetric) result(message)
integer(c_size_t), intent(in) :: size_in_bytes
logical, intent(in) :: symmetric
character(len=:), allocatable :: mem_type
character(len=:), allocatable :: message

message = "Fortran shared heap is out of memory"
if (symmetric) then
mem_type = "coarray"
else
message = message // " on image " // num_to_str(initial_team%this_image)
mem_type = "non-coarray"
end if
message = message // new_line('') &
// " while allocating " // num_to_str(size_in_bytes, .true.) // " of additional " &
// mem_type // " memory." // new_line('') &
// new_line('') &
// " Shared heap size information:" // new_line('') &
// " Total shared heap: " // pad(num_to_str(total_heap_size, .true.)) &
// " (CAF_HEAP_SIZE)" // new_line('') &
// " Total non-coarray heap: " // pad(num_to_str(non_symmetric_heap_size, .true.)) &
// " (CAF_COMP_FRAC * CAF_HEAP_SIZE)" // new_line('') &
// " Current team coarray heap: " // pad(num_to_str(current_team%info%heap_size, .true.)) // new_line('') &
// new_line('') &
// " Consider setting the CAF_HEAP_SIZE environment variable to request a larger heap."
contains
function pad(str) result(s)
character(len=*), intent(in) :: str
character(len=:), allocatable :: s
s = str
s = repeat(' ',max(0, 10 - len(str))) // s
end function
end function

#if CAF_PRIF_VERSION <= 6
module procedure prif_deallocate_coarray
#else
Expand All @@ -82,7 +136,6 @@
module procedure prif_deallocate_coarrays
#endif
integer :: i, num_handles
character(len=*), parameter :: unallocated_message = "Attempted to deallocate unallocated coarray"
type(prif_coarray_handle), target :: coarray_handle
# if HAVE_FINAL_FUNC_SUPPORT
abstract interface
Expand All @@ -102,17 +155,9 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C)
call prif_sync_all ! Need to ensure we don't deallocate anything till everyone gets here
num_handles = size(coarray_handles)
if (.not. all([(associated(coarray_handles(i)%info), i = 1, num_handles)])) then
if (present(stat)) then
stat = 1 ! TODO: decide what our stat codes should be
if (present(errmsg)) then
errmsg = unallocated_message
else if (present(errmsg_alloc)) then
errmsg_alloc = unallocated_message
end if
return
else
call prif_error_stop(.false._c_bool, stop_code_char=unallocated_message)
end if
call report_error(CAF_STAT_INVALID_ARGUMENT, "Attempted to deallocate unallocated coarray", &
stat, errmsg, errmsg_alloc)
return
end if
call_assert(all(coarray_handle_check(coarray_handles)))

Expand All @@ -129,17 +174,9 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C)
if (.not. allocated(local_errmsg)) then ! provide a default errmsg
local_errmsg = "coarray_cleanup finalization callback failed"
end if
if (present(stat)) then
stat = local_stat
if (present(errmsg)) then
errmsg = local_errmsg
else if (present(errmsg_alloc)) then
call move_alloc(local_errmsg, errmsg_alloc)
end if
return ! NOTE: We no longer have guarantees that coarrays are in consistent state
else
call prif_error_stop(.false._c_bool, stop_code_char=local_errmsg)
end if
call report_error(local_stat, local_errmsg, &
stat, errmsg, errmsg_alloc)
return ! NOTE: We no longer have guarantees that coarrays are in consistent state
end if
# else
! TODO: issue a warning that we are ignoring the final_func?
Expand All @@ -164,6 +201,7 @@ subroutine coarray_cleanup_i(handle, stat, errmsg) bind(C)

module procedure prif_deallocate
call caf_deallocate(non_symmetric_heap_mspace, mem)
if (present(stat)) stat = 0
end procedure

subroutine add_to_team_list(coarray_handle)
Expand Down
40 changes: 30 additions & 10 deletions src/caffeine/caffeine.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ int caf_image_from_initial(gex_TM_t tm, int image_num) {
// ---------------------------------------------------
// NOTE: gex_TM_T is a typedef to a C pointer, so the `gex_TM_t* initial_team` arg in the C signature matches the BIND(C) interface of an `intent(out)` arg of type `c_ptr` for the same argument
void caf_caffeinate(
intptr_t* total_heap_size,
mspace* symmetric_heap,
intptr_t* symmetric_heap_start,
intptr_t* symmetric_heap_size,
Expand All @@ -90,41 +91,63 @@ void caf_caffeinate(
numprocs = gex_TM_QuerySize(myworldteam);
*initial_team = myworldteam;

#define PAGE_ALIGNUP(sz) ((sz + GASNET_PAGESIZE - 1) & ~(GASNET_PAGESIZE-1))

// query largest possible segment GASNet can give us of the same size across all processes:
size_t max_seg = gasnet_getMaxGlobalSegmentSize();
uintptr_t max_seg = gasnet_getMaxGlobalSegmentSize();
// impose a reasonable default size
#ifndef CAF_DEFAULT_HEAP_SIZE
#define CAF_DEFAULT_HEAP_SIZE (128*1024*1024) // 128 MiB
#endif
size_t default_seg = MIN(max_seg, CAF_DEFAULT_HEAP_SIZE);
uintptr_t default_seg = MIN(max_seg, CAF_DEFAULT_HEAP_SIZE);
// retrieve user preference, defaulting to the above and units of MiB
size_t segsz = gasnett_getenv_int_withdefault("CAF_HEAP_SIZE",
uintptr_t segsz = gasnett_getenv_int_withdefault("CAF_HEAP_SIZE",
default_seg, 1024*1024);
// ensure at least two full pages
segsz = MAX(segsz,2*GASNET_PAGESIZE);
// round-up to closest page size
segsz = PAGE_ALIGNUP(segsz);
// cap user request to the largest available:
// TODO: issue a console warning here instead of silently capping
segsz = MIN(segsz,max_seg);
assert(segsz % GASNET_PAGESIZE == 0);

GASNET_SAFE(gex_Segment_Attach(&mysegment, myworldteam, segsz));

*symmetric_heap_start = (intptr_t)gex_Segment_QueryAddr(mysegment);
size_t total_heap_size = gex_Segment_QuerySize(mysegment);
*total_heap_size = gex_Segment_QuerySize(mysegment);
assert(*total_heap_size >= 2*GASNET_PAGESIZE);

#ifndef CAF_DEFAULT_COMP_FRAC
#define CAF_DEFAULT_COMP_FRAC 0.1f // 10%
#endif
float default_comp_frac = MAX(MIN(0.99f, CAF_DEFAULT_COMP_FRAC), 0.01f);
float non_symmetric_fraction = gasnett_getenv_dbl_withdefault("CAF_COMP_FRAC", default_comp_frac);
assert(non_symmetric_fraction > 0 && non_symmetric_fraction < 1); // TODO: real error reporting
if (non_symmetric_fraction <= 0 || non_symmetric_fraction >= 1) {
gasnett_fatalerror_nopos("If used, environment variable 'CAF_COMP_FRAC' must be a valid floating point value or fraction between 0 and 1.");
}

size_t non_symmetric_heap_size = total_heap_size * non_symmetric_fraction;
*symmetric_heap_size = total_heap_size - non_symmetric_heap_size;
uintptr_t non_symmetric_heap_size = *total_heap_size * non_symmetric_fraction;
non_symmetric_heap_size = PAGE_ALIGNUP(non_symmetric_heap_size);
*symmetric_heap_size = *total_heap_size - non_symmetric_heap_size;
if (*symmetric_heap_size == 0) {
assert(non_symmetric_heap_size > GASNET_PAGESIZE);
non_symmetric_heap_size -= GASNET_PAGESIZE;
*symmetric_heap_size += GASNET_PAGESIZE;
}
assert(non_symmetric_heap_size > 0);
assert(non_symmetric_heap_size % GASNET_PAGESIZE == 0);
assert(*symmetric_heap_size > 0);
assert(*symmetric_heap_size % GASNET_PAGESIZE == 0);
intptr_t non_symmetric_heap_start = *symmetric_heap_start + *symmetric_heap_size;

if (myproc == 0) {
*symmetric_heap = create_mspace_with_base((void*)*symmetric_heap_start, *symmetric_heap_size, 0);
assert(*symmetric_heap);
mspace_set_footprint_limit(*symmetric_heap, *symmetric_heap_size);
}
*non_symmetric_heap = create_mspace_with_base((void*)non_symmetric_heap_start, non_symmetric_heap_size, 0);
assert(*non_symmetric_heap);
mspace_set_footprint_limit(*non_symmetric_heap, non_symmetric_heap_size);

// init various subsystems:
Expand Down Expand Up @@ -160,9 +183,6 @@ void caf_fatal_error( const CFI_cdesc_t* Fstr )
void* caf_allocate(mspace heap, size_t bytes)
{
void* allocated_space = mspace_memalign(heap, 8, bytes);
if (!allocated_space) // uh-oh, something went wrong..
gasnett_fatalerror("caf_allocate failed to mspace_memalign(%"PRIuSZ")",
bytes);
return allocated_space;
}

Expand Down
2 changes: 2 additions & 0 deletions src/caffeine/coarray_access_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@
dest = current_image_buffer, &
src = remote_ptr, &
size = size_in_bytes)

if (present(stat)) stat = 0
end procedure

! _______________________ Strided Get RMA ____________________________
Expand Down
8 changes: 7 additions & 1 deletion src/caffeine/coarray_queries_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,14 @@ subroutine initial_index_helper(coarray_handle, sub, team, initial_team_index)

module procedure prif_initial_team_index
call initial_index_helper(coarray_handle, sub, current_team, initial_team_index)

if (present(stat)) stat = 0
end procedure

module procedure prif_initial_team_index_with_team
call initial_index_helper(coarray_handle, sub, team, initial_team_index)

if (present(stat)) stat = 0
end procedure

module procedure prif_initial_team_index_with_team_number
Expand All @@ -152,7 +156,9 @@ subroutine initial_index_helper(coarray_handle, sub, team, initial_team_index)
call initial_index_helper(coarray_handle, sub, current_team, initial_team_index)
else
call unimplemented("prif_initial_team_index_with_team_number: no support for sibling teams")
end if
end if

if (present(stat)) stat = 0
end procedure

!---------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions src/caffeine/critical_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

module procedure prif_critical
call unimplemented("prif_critical")

if (present(stat)) stat = 0
end procedure

module procedure prif_end_critical
Expand Down
8 changes: 8 additions & 0 deletions src/caffeine/locks_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,26 @@

module procedure prif_lock
call unimplemented("prif_lock")

if (present(stat)) stat = 0
end procedure

module procedure prif_lock_indirect
call unimplemented("prif_lock_indirect")

if (present(stat)) stat = 0
end procedure

module procedure prif_unlock
call unimplemented("prif_unlock")

if (present(stat)) stat = 0
end procedure

module procedure prif_unlock_indirect
call unimplemented("prif_unlock_indirect")

if (present(stat)) stat = 0
end procedure

end submodule locks_s
Loading