Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
264 changes: 49 additions & 215 deletions include/FDTD_kokkos/kokkos_functors.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ class ComputeE_FieldFunctor : public Base_Functor {
Field &Ex, &Ey, &Ez;
Field &Bx, &By, &Bz;
Field &Jx, &Jy, &Jz;
int start_i, end_i;
int Ni, Nj, Nk;
int Ni, Nj, Nk, Nij;
FP current_coef;
FP coef_dx, coef_dy, coef_dz;

Expand All @@ -36,13 +35,11 @@ class ComputeE_FieldFunctor : public Base_Functor {
Field& Bx, Field& By, Field& Bz,
Field& Jx, Field& Jy, Field& Jz,
const FP& current_coef,
const int& start_i, const int& end_i,
const int& Ni, const int& Nj, const int& Nk,
const FP& coef_dx, const FP& coef_dy, const FP& coef_dz) :
Ex(Ex), Ey(Ey), Ez(Ez), Bx(Bx), By(By), Bz(Bz),
Jx(Jx), Jy(Jy), Jz(Jz), current_coef(current_coef),
start_i(start_i), end_i(end_i),
Ni(Ni), Nj(Nj), Nk(Nk),
Ni(Ni), Nj(Nj), Nk(Nk), Nij(Ni * Nj),
coef_dx(coef_dx), coef_dy(coef_dy), coef_dz(coef_dz) {}

static void apply(
Expand All @@ -52,144 +49,57 @@ class ComputeE_FieldFunctor : public Base_Functor {
const FP& current_coef,
const int bounds_i[2], const int bounds_j[2], const int bounds_k[2],
const int& Ni, const int& Nj, const int& Nk,
const FP& coef_dx, const FP& coef_dy, FP& coef_dz) {
const FP& coef_dx, const FP& coef_dy, const FP& coef_dz) {

ComputeE_FieldFunctor functor(
Ex, Ey, Ez, Bx, By, Bz, Jx, Jy, Jz,
current_coef, bounds_i[0], bounds_i[1],
Ni, Nj, Nk, coef_dx, coef_dy, coef_dz);
current_coef, Ni, Nj, Nk, coef_dx, coef_dy, coef_dz);

Kokkos::MDRangePolicy<Kokkos::Rank<2>> policy(
{bounds_k[0], bounds_j[0]},
{bounds_k[1], bounds_j[1]});
Kokkos::MDRangePolicy<Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>> policy(
{bounds_k[0], bounds_j[0], bounds_i[0]},
{bounds_k[1], bounds_j[1], bounds_i[1]});

Kokkos::parallel_for("UpdateEField", policy, functor);
}
KOKKOS_INLINE_FUNCTION void operator()(const int& k, const int& j) const {

int j_pred = j - 1;
int k_pred = k - 1;

applyPeriodicBoundary(k_pred, Nk);
applyPeriodicBoundary(j_pred, Nj);

const int index_kj_offset = j * Ni + k * Ni * Nj;
const int j_pred_kj_offset = j_pred * Ni + k * Ni * Nj;
const int k_pred_kj_offset = j * Ni + k_pred * Ni * Nj;

int i_base = start_i;
for (; i_base + simd_width <= end_i; i_base += simd_width) {
const int current_simd_block_start_idx = i_base + index_kj_offset;
const int j_pred_simd_block_start_idx = i_base + j_pred_kj_offset;
const int k_pred_simd_block_start_idx = i_base + k_pred_kj_offset;

simd_type Ex_simd, Ey_simd, Ez_simd;
simd_type Bx_simd, By_simd, Bz_simd;
simd_type Jx_simd, Jy_simd, Jz_simd;
simd_type Bz_pred_simd, Bx_j_pred_simd, By_pred_simd,
Bx_pred_simd, Bz_i_pred_simd, By_i_pred_simd;

Ex_simd.copy_from(Ex.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Ey_simd.copy_from(Ey.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Ez_simd.copy_from(Ez.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);

Bx_simd.copy_from(Bx.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
By_simd.copy_from(By.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Bz_simd.copy_from(Bz.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);

Jx_simd.copy_from(Jx.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Jy_simd.copy_from(Jy.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Jz_simd.copy_from(Jz.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);

Bz_pred_simd.copy_from(Bz.data() + j_pred_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default); // Bz(i, j-1, k)
Bx_j_pred_simd.copy_from(Bx.data() + j_pred_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default); // Bx(i, j-1, k)
By_pred_simd.copy_from(By.data() + k_pred_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default); // By(i, j, k-1)
Bx_pred_simd.copy_from(Bx.data() + k_pred_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default); // Bx(i, j, k-1)

#pragma unroll
for (int lane = 0; lane < simd_width; ++lane) {
int i = i_base + lane;
int i_pred = i - 1;

applyPeriodicBoundary(i_pred, Ni);

int scalar_i_pred_idx = i_pred + index_kj_offset;

Bz_i_pred_simd[lane] = Bz[scalar_i_pred_idx];
By_i_pred_simd[lane] = By[scalar_i_pred_idx];
}

Ex_simd += current_coef * Jx_simd +
coef_dy * (Bz_simd - Bz_pred_simd) -
coef_dz * (By_simd - By_pred_simd);

Ey_simd += current_coef * Jy_simd +
coef_dz * (Bx_simd - Bx_pred_simd) -
coef_dx * (Bz_simd - Bz_i_pred_simd);

Ez_simd += current_coef * Jz_simd +
coef_dx * (By_simd - By_i_pred_simd) -
coef_dy * (Bx_simd - Bx_j_pred_simd);

Ex_simd.copy_to(Ex.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Ey_simd.copy_to(Ey.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Ez_simd.copy_to(Ez.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
}
for (int i = i_base; i < end_i; ++i) {
const int index = i + index_kj_offset;
int i_pred = i - 1;

applyPeriodicBoundary(i_pred, Ni);

const int i_pred_idx = i_pred + index_kj_offset;
const int j_pred_idx = i + j_pred_kj_offset;
const int k_pred_idx = i + k_pred_kj_offset;

Ex[index] += current_coef * Jx[index] +
coef_dy * (Bz[index] - Bz[j_pred_idx]) -
coef_dz * (By[index] - By[k_pred_idx]);
Ey[index] += current_coef * Jy[index] +
coef_dz * (Bx[index] - Bx[k_pred_idx]) -
coef_dx * (Bz[index] - Bz[i_pred_idx]);
Ez[index] += current_coef * Jz[index] +
coef_dx * (By[index] - By[i_pred_idx]) -
coef_dy * (Bx[index] - Bx[j_pred_idx]);
}
KOKKOS_INLINE_FUNCTION void operator()(const int& k, const int& j, const int& i) const {

const int j_pred = (j == 0) ? (Nj - 1) : (j - 1);
const int k_pred = (k == 0) ? (Nk - 1) : (k - 1);
const int i_pred = (i == 0) ? (Ni - 1) : (i - 1);

const int jk_offset = j * Ni + k * Nij;
const int index = i + jk_offset;

const int i_pred_idx = i_pred + jk_offset;
const int j_pred_idx = i + j_pred * Ni + k * Nij;
const int k_pred_idx = i + j * Ni + k_pred * Nij;

Ex[index] += current_coef * Jx[index] +
coef_dy * (Bz[index] - Bz[j_pred_idx]) -
coef_dz * (By[index] - By[k_pred_idx]);
Ey[index] += current_coef * Jy[index] +
coef_dz * (Bx[index] - Bx[k_pred_idx]) -
coef_dx * (Bz[index] - Bz[i_pred_idx]);
Ez[index] += current_coef * Jz[index] +
coef_dx * (By[index] - By[i_pred_idx]) -
coef_dy * (Bx[index] - Bx[j_pred_idx]);
}
};

class ComputeB_FieldFunctor : public Base_Functor {
private:
Field &Ex, &Ey, &Ez;
Field &Bx, &By, &Bz;
int Ni, Nj, Nk;
int Ni, Nj, Nk, Nij;
FP coef_dx, coef_dy, coef_dz;
int start_i, end_i;
public:
ComputeB_FieldFunctor(
Field& Ex, Field& Ey, Field& Ez,
Field& Bx, Field& By, Field& Bz,
const int& start_i, const int& end_i,
const int& Ni, const int& Nj, const int& Nk,
const FP& coef_dx, const FP& coef_dy, const FP& coef_dz) :
Ex(Ex), Ey(Ey), Ez(Ez), Bx(Bx), By(By), Bz(Bz),
Ni(Ni), Nj(Nj), Nk(Nk), start_i(start_i), end_i(end_i),
Ni(Ni), Nj(Nj), Nk(Nk), Nij(Ni * Nj),
coef_dx(coef_dx), coef_dy(coef_dy), coef_dz(coef_dz) {}

static void apply(
Expand All @@ -201,110 +111,34 @@ class ComputeB_FieldFunctor : public Base_Functor {

ComputeB_FieldFunctor functor(
Ex, Ey, Ez, Bx, By, Bz,
bounds_i[0], bounds_i[1],
Ni, Nj, Nk, coef_dx, coef_dy, coef_dz);

Kokkos::MDRangePolicy<Kokkos::Rank<2>> policy(
{bounds_k[0], bounds_j[0]},
{bounds_k[1], bounds_j[1]});
Kokkos::MDRangePolicy<Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>> policy(
{bounds_k[0], bounds_j[0], bounds_i[0]},
{bounds_k[1], bounds_j[1], bounds_i[1]});

Kokkos::parallel_for("UpdateBField", policy, functor);
}
KOKKOS_INLINE_FUNCTION void operator()(const int& k, const int& j) const {
int j_next = j + 1;
int k_next = k + 1;

applyPeriodicBoundary(k_next, Nk);
applyPeriodicBoundary(j_next, Nj);

const int index_kj_offset = j * Ni + k * Ni * Nj;
const int j_next_kj_offset = j_next * Ni + k * Ni * Nj;
const int k_next_kj_offset = j * Ni + k_next * Ni * Nj;

int i_base = start_i;
for (; i_base + simd_width <= end_i; i_base += simd_width) {
const int current_simd_block_start_idx = i_base + index_kj_offset;
const int j_next_simd_block_start_idx = i_base + j_next_kj_offset;
const int k_next_simd_block_start_idx = i_base + k_next_kj_offset;

simd_type Bx_simd, By_simd, Bz_simd;
simd_type Ex_simd, Ey_simd, Ez_simd;
simd_type Ey_k_next_simd, Ex_j_next_simd;
simd_type Ez_i_next_simd, Ey_i_next_simd;
simd_type Ez_j_next_simd, Ex_k_next_simd;

Bx_simd.copy_from(Bx.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
By_simd.copy_from(By.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Bz_simd.copy_from(Bz.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);

Ex_simd.copy_from(Ex.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Ey_simd.copy_from(Ey.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Ez_simd.copy_from(Ez.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);

Ey_k_next_simd.copy_from(Ey.data() + k_next_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Ex_k_next_simd.copy_from(Ex.data() + k_next_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);

Ex_j_next_simd.copy_from(Ex.data() + j_next_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Ez_j_next_simd.copy_from(Ez.data() + j_next_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);

#pragma unroll
for (int lane = 0; lane < simd_width; ++lane) {
int i = i_base + lane;
int i_next = i + 1;
applyPeriodicBoundary(i_next, Ni);

int scalar_i_next_idx = i_next + index_kj_offset;

Ez_i_next_simd[lane] = Ez[scalar_i_next_idx];
Ey_i_next_simd[lane] = Ey[scalar_i_next_idx];
}

Bx_simd += coef_dz * (Ey_k_next_simd - Ey_simd) -
coef_dy * (Ez_j_next_simd - Ez_simd);

By_simd += coef_dx * (Ez_i_next_simd - Ez_simd) -
coef_dz * (Ex_k_next_simd - Ex_simd);

Bz_simd += coef_dy * (Ex_j_next_simd - Ex_simd) -
coef_dx * (Ey_i_next_simd - Ey_simd);

Bx_simd.copy_to(Bx.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
By_simd.copy_to(By.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
Bz_simd.copy_to(Bz.data() + current_simd_block_start_idx,
Kokkos::Experimental::simd_flag_default);
}
KOKKOS_INLINE_FUNCTION void operator()(const int& k, const int& j, const int& i) const {
const int j_next = (j + 1 == Nj) ? 0 : (j + 1);
const int k_next = (k + 1 == Nk) ? 0 : (k + 1);
const int i_next = (i + 1 == Ni) ? 0 : (i + 1);

for (int i = i_base; i < end_i; ++i) {
const int index = i + index_kj_offset;
const int jk_offset = j * Ni + k * Nij;
const int index = i + jk_offset;

int i_next = i + 1;
applyPeriodicBoundary(i_next, Ni);

const int scalar_i_next_idx = i_next + index_kj_offset;
const int scalar_j_next_idx = i + j_next_kj_offset;
const int scalar_k_next_idx = i + k_next_kj_offset;
const int scalar_i_next_idx = i_next + jk_offset;
const int scalar_j_next_idx = i + j_next * Ni + k * Nij;
const int scalar_k_next_idx = i + j * Ni + k_next * Nij;

Bx[index] += coef_dz * (Ey[scalar_k_next_idx] - Ey[index]) -
coef_dy * (Ez[scalar_j_next_idx] - Ez[index]);
Bx[index] += coef_dz * (Ey[scalar_k_next_idx] - Ey[index]) -
coef_dy * (Ez[scalar_j_next_idx] - Ez[index]);

By[index] += coef_dx * (Ez[scalar_i_next_idx] - Ez[index]) -
coef_dz * (Ex[scalar_k_next_idx] - Ex[index]);
By[index] += coef_dx * (Ez[scalar_i_next_idx] - Ez[index]) -
coef_dz * (Ex[scalar_k_next_idx] - Ex[index]);

Bz[index] += coef_dy * (Ex[scalar_j_next_idx] - Ex[index]) -
coef_dx * (Ey[scalar_i_next_idx] - Ey[index]);
}
Bz[index] += coef_dy * (Ex[scalar_j_next_idx] - Ex[index]) -
coef_dx * (Ey[scalar_i_next_idx] - Ey[index]);
}
};

Expand Down
8 changes: 4 additions & 4 deletions include/FDTD_kokkos/kokkos_shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
#include <vector>

#include <Kokkos_Core.hpp>
#include <Kokkos_SIMD.hpp>

#include "Structures.h"


namespace FDTD_kokkos {
using simd_type = Kokkos::Experimental::native_simd<double>;
constexpr int simd_width = int(simd_type::size());
using Device = Kokkos::DefaultExecutionSpace;
using Field = Kokkos::View<double*, Kokkos::MemoryTraits<Kokkos::Aligned>>;
using Field = Kokkos::View<
FP*,
Kokkos::MemoryTraits<Kokkos::Restrict | Kokkos::Aligned>
>;
using TimeField = std::vector<Field>;
using Function = std::function<int(int, int, int)>;
using InitFunction = std::function<double(double, double, double, double)>;
Expand Down