Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ Change '2' above to the number of images that you would like to launch in parall
#### Multi-image (parallel) execution
The unit test suite uses small grids to minimize runtime, which limits the scalability of the test suite only.
Hence, the test suite is _not_ designed for execution in a large number of images.
All tests pass when running the test suite in 1-4 images:

All tests pass when running the test suite in 1-4 images:
```
export FOR_COARRAY_NUM_IMAGES=4
fpm test --compiler ifx --profile release --flag "-heap-arrays -coarray"
Expand Down
4 changes: 2 additions & 2 deletions doc/ai4dev/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Vibe Coding
```
4. Use the contents of [vibe-coding/README.md](./vibe-coding/README.md) as your
prompt to a large language model (LLM).
5. Use an editor to edit the test-suite.F90 lines beginning and ending with
5. Use an editor to edit the test-suite.F90 lines beginning and ending with
`module procedure laplacian` and `end procedure laplacian`, respectively,
replacing those lines with the LLM's response.
6. Compile test-suite.F90 as in step 2 again.
Expand All @@ -36,7 +36,7 @@ Vibe Coding
but tests failed replace "compiling" with "running" and replace "errors"
with "test failures".
* Append LLM-generated code from the previous iteration.

Idiomatic Vibe Coding
---------------------
Follow the [vibe coding](#vibe-coding) steps except as described below:
Expand Down
46 changes: 23 additions & 23 deletions doc/ai4dev/idiomatic-vibe-coding/idiomatic-vibe-response.f90
Original file line number Diff line number Diff line change
Expand Up @@ -2,87 +2,87 @@ pure module function laplacian(rhs) result(laplacian_rhs)
implicit none
class(subdomain_t), intent(in) :: rhs
type(subdomain_t) :: laplacian_rhs

! Local variables for array bounds and grid spacing
integer :: my_nx, ny, nz, me, num_subdomains
real :: dx_val, dy_val, dz_val, dx2_inv, dy2_inv, dz2_inv

! Get array dimensions and parallel image information
my_nx = size(rhs%s_, 1)
ny = size(rhs%s_, 2)
ny = size(rhs%s_, 2)
nz = size(rhs%s_, 3)
me = this_image()
num_subdomains = num_images()

! Get grid spacing values
dx_val = rhs%dx()
dy_val = rhs%dy()
dy_val = rhs%dy()
dz_val = rhs%dz()

! Compute inverse squared grid spacings for efficiency
dx2_inv = 1.0 / (dx_val * dx_val)
dy2_inv = 1.0 / (dy_val * dy_val)
dz2_inv = 1.0 / (dz_val * dz_val)

! Allocate result array with same shape as input
allocate(laplacian_rhs%s_(my_nx, ny, nz))

! Initialize result to zero
laplacian_rhs%s_ = 0.0

! Compute 2nd-order central difference Laplacian in interior points
! Laplacian = d²/dx² + d²/dy² + d²/dz²
! Laplacian = d2/dx2 + d2/dy2 + d2/dz2
do concurrent (i = 2:my_nx-1, j = 2:ny-1, k = 2:nz-1) default(none) &
shared(laplacian_rhs, rhs, dx2_inv, dy2_inv, dz2_inv, my_nx, ny, nz, me, num_subdomains)
! Second derivative in x-direction: (f(i+1) - 2*f(i) + f(i-1))/dx²
! Second derivative in y-direction: (f(j+1) - 2*f(j) + f(j-1))/dy²
! Second derivative in z-direction: (f(k+1) - 2*f(k) + f(k-1))/dz²

! Second derivative in x-direction: (f(i+1) - 2*f(i) + f(i-1))/dx2
! Second derivative in y-direction: (f(j+1) - 2*f(j) + f(j-1))/dy2
! Second derivative in z-direction: (f(k+1) - 2*f(k) + f(k-1))/dz2
laplacian_rhs%s_(i,j,k) = &
dx2_inv * (rhs%s_(i+1,j,k) - 2.0*rhs%s_(i,j,k) + rhs%s_(i-1,j,k)) + &
dy2_inv * (rhs%s_(i,j+1,k) - 2.0*rhs%s_(i,j,k) + rhs%s_(i,j-1,k)) + &
dz2_inv * (rhs%s_(i,j,k+1) - 2.0*rhs%s_(i,j,k) + rhs%s_(i,j,k-1))
end do

! Handle boundary conditions at subdomain interfaces for x-direction
! Left boundary (i=1) - only compute if not the first subdomain
if (me > 1) then
do concurrent (j = 2:ny-1, k = 2:nz-1) default(none) &
shared(laplacian_rhs, rhs, dx2_inv, dy2_inv, dz2_inv, ny, nz, halo_x)

laplacian_rhs%s_(1,j,k) = &
dx2_inv * (rhs%s_(2,j,k) - 2.0*rhs%s_(1,j,k) + halo_x(west,j,k)) + &
dy2_inv * (rhs%s_(1,j+1,k) - 2.0*rhs%s_(1,j,k) + rhs%s_(1,j-1,k)) + &
dz2_inv * (rhs%s_(1,j,k+1) - 2.0*rhs%s_(1,j,k) + rhs%s_(1,j,k-1))
end do
end if
! Right boundary (i=my_nx) - only compute if not the last subdomain

! Right boundary (i=my_nx) - only compute if not the last subdomain
if (me < num_subdomains) then
do concurrent (j = 2:ny-1, k = 2:nz-1) default(none) &
shared(laplacian_rhs, rhs, dx2_inv, dy2_inv, dz2_inv, my_nx, ny, nz, halo_x, num_subdomains)

laplacian_rhs%s_(my_nx,j,k) = &
dx2_inv * (halo_x(east,j,k) - 2.0*rhs%s_(my_nx,j,k) + rhs%s_(my_nx-1,j,k)) + &
dy2_inv * (rhs%s_(my_nx,j+1,k) - 2.0*rhs%s_(my_nx,j,k) + rhs%s_(my_nx,j-1,k)) + &
dz2_inv * (rhs%s_(my_nx,j,k+1) - 2.0*rhs%s_(my_nx,j,k) + rhs%s_(my_nx,j,k-1))
end do
end if

! Set boundary values to zero as specified
! Y boundaries
laplacian_rhs%s_(:, 1, :) = 0.0
laplacian_rhs%s_(:, ny, :) = 0.0
! Z boundaries

! Z boundaries
laplacian_rhs%s_(:, :, 1) = 0.0
laplacian_rhs%s_(:, :, nz) = 0.0

! X boundaries for first and last subdomains
if (me == 1) then
laplacian_rhs%s_(1, :, :) = 0.0
end if

if (me == num_subdomains) then
laplacian_rhs%s_(my_nx, :, :) = 0.0
end if
Expand Down
18 changes: 9 additions & 9 deletions doc/ai4dev/vibe-coding/vibe-response.f90
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
module procedure laplacian
! Separate module procedure for pure function laplacian
! Computes 2nd-order central-difference Laplacian operator

integer :: i, j, k
real, allocatable :: s_west(:,:), s_east(:,:)

! Allocate result array
allocate(laplacian_rhs%s_(my_nx, ny, nz))

! Initialize result to zero
laplacian_rhs%s_ = 0.0

! Prepare halo data for boundary conditions
allocate(s_west(ny, nz), s_east(ny, nz))
s_west = merge(halo_x(west,:,:), 0.0, me == 1)
s_east = merge(halo_x(east,:,:), 0.0, me == num_subdomains)

! Compute Laplacian using do concurrent with proper boundary handling
do concurrent (i = 1:my_nx, j = 1:ny, k = 1:nz) &
default(none) shared(laplacian_rhs, rhs, s_west, s_east) &
local(i, j, k)

! Apply boundary conditions - result vanishes at boundaries
laplacian_rhs%s_(i,j,k) = merge(0.0, &
! X-direction second derivative
Expand All @@ -29,7 +29,7 @@
merge(rhs%s_(i-1,j,k), 0.0, i > 1 .or. me > 1) + &
merge(rhs%s_(i+1,j,k), 0.0, i < my_nx .or. me < num_subdomains) - &
2.0 * rhs%s_(i,j,k) + &
! Y-direction second derivative
! Y-direction second derivative
merge(rhs%s_(i,j-1,k) + rhs%s_(i,j+1,k) - 2.0 * rhs%s_(i,j,k), 0.0, &
j > 1 .and. j < ny) + &
! Z-direction second derivative
Expand All @@ -38,7 +38,7 @@
! Boundary condition mask
j == 1 .or. j == ny .or. k == 1 .or. k == nz .or. &
(i == 1 .and. me == 1) .or. (i == my_nx .and. me == num_subdomains))

end do

end procedure laplacian
8 changes: 4 additions & 4 deletions example/time-paradigm.F90
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ program time_paradigm_m
!! Time various alternative programming paradigms
use subdomain_m, only : subdomain_t
use assert_m
use julienne_m, only : string_t, file_t, command_line_t, bin_t, csv
use julienne_m, only : string_t, file_t, command_line_t, bin_t, csv
use iso_fortran_env, only : int64
implicit none

Expand All @@ -32,23 +32,23 @@ program time_paradigm_m
if (len(steps_string)/=0) read(steps_string,*) steps
if (len(resolution_string)/=0) read(resolution_string,*) resolution

if (me==1) then
if (me==1) then
print *,"Number of steps to execute: ",steps
print *,"Number of grid points in each coordinate direction: ",resolution
print *,"Starting functional solver."
end if
associate(t_functional => functional_programming_time())
if (me==1) print *,"Starting procedural solver."
associate(t_procedural => functional_programming_time())
if (me==1) then
if (me==1) then
print *,"Functional program time: ", t_functional
print *,"Procedural program time: ", t_procedural
print *,"Procedural speedup: ", (t_functional - t_procedural)/t_functional
end if
end associate
end associate
end associate

contains

function functional_programming_time() result(system_time)
Expand Down
20 changes: 10 additions & 10 deletions src/matcha/distribution_s.F90
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
implicit none

contains

pure function monotonically_increasing(f) result(monotonic)
double precision, intent(in) :: f(:)
logical monotonic
Expand All @@ -21,8 +21,8 @@ pure function monotonically_increasing(f) result(monotonic)

call_assert(all(sample_distribution(:,2)>=0.D0))

associate(nintervals => size(sample_distribution,1))
distribution%vel_ = [(sample_distribution(i,1), i =1, nintervals)] ! Assign speeds to each distribution bin
associate(nintervals => size(sample_distribution,1))
distribution%vel_ = [(sample_distribution(i,1), i =1, nintervals)] ! Assign speeds to each distribution bin
distribution%cumulative_distribution_ = [0.D0, [(sum(sample_distribution(1:i,2)), i=1, nintervals)]]
end associate

Expand All @@ -33,14 +33,14 @@ pure function monotonically_increasing(f) result(monotonic)
module procedure cumulative_distribution
call_assert(allocated(self%cumulative_distribution_))
my_cumulative_distribution = self%cumulative_distribution_
end procedure
end procedure

module procedure velocities

double precision, allocatable :: sampled_speeds(:,:), dir(:,:,:)
integer cell, step, k
call_assert(allocated(self%cumulative_distribution_))

call_assert(allocated(self%cumulative_distribution_))
call_assert(allocated(self%vel_))

! Sample from the distribution
Expand All @@ -50,7 +50,7 @@ pure function monotonically_increasing(f) result(monotonic)
k = findloc(speeds(cell,step) >= self%cumulative_distribution(), value=.false., dim=1)-1
sampled_speeds(cell,step) = self%vel_(k)
end do

! Create unit vectors
dir = directions(:,1:nsteps,:)

Expand All @@ -63,7 +63,7 @@ pure function monotonically_increasing(f) result(monotonic)
end associate

allocate(my_velocities, mold=dir)

do concurrent(step=1:nsteps)
my_velocities(:,step,1) = sampled_speeds(:,step)*dir(:,step,1)
my_velocities(:,step,2) = sampled_speeds(:,step)*dir(:,step,2)
Expand Down
Loading