Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
- name: Run tests with GCC
shell: bash
run: |
${PWD}/build/test/test_FDTD_method --gtest_repeat=2 --gtest_break_on_failure
${PWD}/build/unit-tests/test_FDTD_method --gtest_repeat=2 --gtest_break_on_failure

- name: Run kokkos_sample
shell: bash
Expand All @@ -49,3 +49,26 @@ jobs:
shell: bash
run: |
${PWD}/bin/sample

- name: Install Intel OneAPI (ifort)
run: |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
sudo apt-get update
sudo apt-get install -y intel-oneapi-compiler-fortran

- name: Compile and Run Coarray program with ifx
shell: bash
run: |
source /opt/intel/oneapi/setvars.sh intel64

echo "--- Verifying installation ---"
which ifx
ifx -v

echo "--- Compiling Coarray program ---"
ifx -coarray ${PWD}/coarray/fdtd.F90 -o fdtd

echo "--- Running Coarray program ---"
export FOR_COARRAY_NUM_IMAGES=4
${PWD}/fdtd
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,5 @@ add_custom_target(create_kokkoscore ALL DEPENDS ${KOKKOS_CORE_LIB})
find_package(Kokkos REQUIRED)

add_subdirectory(project)
add_subdirectory(test)
add_subdirectory(samples)
add_subdirectory(unit-tests)
add_subdirectory(perf-tests)
73 changes: 52 additions & 21 deletions coarray/fdtd.F90
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ program fdtd_coarray_optimized
implicit none

! Parameters
integer, parameter :: Ni = 512, Nj = 512, Nk = 512
integer, parameter :: num_iterations = 25
integer, parameter :: Ni = 32, Nj = 32, Nk = 32
integer, parameter :: num_iterations = 100
real(8), parameter :: C = 3e10, PI = 3.14159265358
real(8), parameter :: dx = C, dy = C, dz = C, dt = 0.2
real(8), parameter :: coef_B_dx = C * dt / (2 * dx), coef_B_dy = C * dt / (2 * dy), coef_B_dz = C * dt / (2 * dz)
Expand All @@ -14,7 +14,7 @@ program fdtd_coarray_optimized
! Field arrays (optimized layout for vectorization)
real(8), allocatable :: Ex(:,:,:)[:], Ey(:,:,:)[:], Ez(:,:,:)[:]
real(8), allocatable :: Bx(:,:,:)[:], By(:,:,:)[:], Bz(:,:,:)[:]
real(8), allocatable :: Jx(:,:,:)[:]
real(8), allocatable :: Jx(:,:,:)[:], Jy(:,:,:)[:], Jz(:,:,:)[:]
real(8), allocatable :: next_Ex(:,:)[:], next_Ey(:,:)[:], pred_Bx(:,:)[:], pred_By(:,:)[:]

! Local variables
Expand All @@ -24,7 +24,7 @@ program fdtd_coarray_optimized
integer(int64) :: start_time, end_time, rate
integer :: start_i, start_j, start_k, max_i, max_j, max_k
real(8) :: elapsed_time
real(8) :: Tx, Ty, Tz, TT, bnd
real(8) :: Tx, Ty, Tz, TT, bnd, current_time

! Coarray initialization
this_img = this_image()
Expand All @@ -36,6 +36,7 @@ program fdtd_coarray_optimized
Tx = 4 * C
Ty = 4 * C
Tz = 4 * C
current_time = min(int(TT / dt), num_iterations)

bnd = Ni / 2.0 * dx

Expand All @@ -58,7 +59,9 @@ program fdtd_coarray_optimized
allocate(By(Ni, Nj, k_local)[*])
allocate(Bz(Ni, Nj, k_local)[*])
allocate(Jx(Ni, Nj, k_local)[*])

allocate(Jy(Ni, Nj, k_local)[*])
allocate(Jz(Ni, Nj, k_local)[*])

allocate(next_Ex(Ni, Nj)[*])
allocate(next_Ey(Ni, Nj)[*])
allocate(pred_Bx(Ni, Nj)[*])
Expand All @@ -75,9 +78,7 @@ program fdtd_coarray_optimized
end if

! Main FDTD loop
do t = 1, num_iterations
if (this_img == 1) print '(A, I3)', 'Iteration ', t

do t = 1, current_time
if (this_img == 1) then
call init_currents(t)
end if
Expand All @@ -101,6 +102,31 @@ program fdtd_coarray_optimized
sync all

end do

Jx = 0.0
Jy = 0.0
Jz = 0.0
sync all

do t = current_time + 1, num_iterations
call update_B_field()
sync all

pred_Bx(:,:) = Bx(:, :, k_local)[pred_img]
pred_By(:,:) = By(:, :, k_local)[pred_img]
sync all

call update_E_field()
sync all

next_Ex(:,:) = Ex(:, :, 1)[next_img]
next_Ey(:,:) = Ey(:, :, 1)[next_img]
sync all

call update_B_field()
sync all

end do

sync all
! Final timing and cleanup
Expand All @@ -115,7 +141,7 @@ program fdtd_coarray_optimized
call print_full_E_slice()
end if

deallocate(Ex, Ey, Ez, Bx, By, Bz, Jx)
deallocate(Ex, Ey, Ez, Bx, By, Bz, Jx, Jy, Jz)
deallocate(next_Ex, next_Ey, pred_Bx, pred_By)

contains
Expand Down Expand Up @@ -150,23 +176,28 @@ subroutine init_fields
By = 0.0
Bz = 0.0
Jx = 0.0
Jy = 0.0
Jz = 0.0
end subroutine init_fields

!===============================================================
subroutine init_currents(this_t)
integer, intent(in) :: this_t
integer :: i, j, k
integer :: this_cur_img
real(8) :: value

do k = start_k, max_k
do j = start_j, max_j
do i = start_i, max_i
this_cur_img = ceiling(real(k) / real(k_local))
Jx(i,j,k - (this_cur_img - 1) * k_local)[this_cur_img] = &
(sin(2.0 * PI * this_t * dt / TT)) &
* (cos(2.0 * PI * (i-1) * dx / Tx)**2) &
* (cos(2.0 * PI * (j-1) * dy / Ty)**2) &
* (cos(2.0 * PI * (k-1) * dz / Tz)**2)
value = (sin(2.0 * PI * this_t * dt / TT)) &
* (cos(2.0 * PI * (i-1) * dx / Tx)**2) &
* (cos(2.0 * PI * (j-1) * dy / Ty)**2) &
* (cos(2.0 * PI * (k-1) * dz / Tz)**2)
Jx(i,j,k - (this_cur_img - 1) * k_local)[this_cur_img] = value
Jy(i,j,k - (this_cur_img - 1) * k_local)[this_cur_img] = value
Jz(i,j,k - (this_cur_img - 1) * k_local)[this_cur_img] = value
end do
end do
end do
Expand Down Expand Up @@ -226,11 +257,11 @@ subroutine update_E_field
coef_E_dy * (Bz(i,j,1) - Bz(i,jm,1)) - &
coef_E_dz * (By(i,j,1) - pred_By(i,j))

Ey(i,j,1) = Ey(i,j,1) - coef_J * Jx(i,j,1) + &
Ey(i,j,1) = Ey(i,j,1) - coef_J * Jy(i,j,1) + &
coef_E_dz * (Bx(i,j,1) - pred_Bx(i,j)) - &
coef_E_dx * (Bz(i,j,1) - Bz(im,j,1))

Ez(i,j,1) = Ez(i,j,1) - coef_J * Jx(i,j,1) + &
Ez(i,j,1) = Ez(i,j,1) - coef_J * Jz(i,j,1) + &
coef_E_dx * (By(i,j,1) - By(im,j,1)) - &
coef_E_dy * (Bx(i,j,1) - Bx(i,jm,1))
end do
Expand All @@ -245,11 +276,11 @@ subroutine update_E_field
coef_E_dy * (Bz(i,j,k) - Bz(i,jm,k)) - &
coef_E_dz * (By(i,j,k) - By(i,j,k-1))

Ey(i,j,k) = Ey(i,j,k) - coef_J * Jx(i,j,k) + &
Ey(i,j,k) = Ey(i,j,k) - coef_J * Jy(i,j,k) + &
coef_E_dz * (Bx(i,j,k) - Bx(i,j,k-1)) - &
coef_E_dx * (Bz(i,j,k) - Bz(im,j,k))

Ez(i,j,k) = Ez(i,j,k) - coef_J * Jx(i,j,k) + &
Ez(i,j,k) = Ez(i,j,k) - coef_J * Jz(i,j,k) + &
coef_E_dx * (By(i,j,k) - By(im,j,k)) - &
coef_E_dy * (Bx(i,j,k) - Bx(i,jm,k))
end do
Expand All @@ -260,10 +291,10 @@ end subroutine update_E_field
!===============================================================
subroutine print_full_E_slice()
integer :: img, ik, ij
do ik = Ni/2-5, Ni/2+5
do ij = Nj/2-5, Nj/2+5
do ij = Nj/2-4, Nj/2+5
do ik = Ni/2-4, Ni/2+5
img = ceiling(real(Nk/2+1) / real(k_local))
write(*, '(F12.6)', advance='no') Ex(ik,ij,Nk/2+1 - (img - 1) * k_local)[img]
write(*, '(F12.5)', advance='no') Ex(ik,ij,Nk/2+1 - (img - 1) * k_local)[img]
end do
print *
end do
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.