Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions runtime/common/callbacks.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ typedef struct {
// Copy bytes from device memory to host
int (*copy_from_dev) (void* host_ptr, vx_buffer_h hbuffer, uint64_t src_offset, uint64_t size);

// Copy bytes from device memory to device memory
int (*copy_dev_to_dev) (vx_buffer_h hdest_buffer, uint64_t dest_offset, vx_buffer_h hsrc_buffer, uint64_t src_offset, uint64_t size);

// Start device execution
int (*start) (vx_device_h hdevice, vx_buffer_h hkernel, vx_buffer_h harguments);

Expand Down
16 changes: 16 additions & 0 deletions runtime/common/callbacks.inc
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,22 @@ extern int vx_dev_init(callbacks_t* callbacks) {
return device->download(host_ptr, buffer->addr + src_offset, size);
};

callbacks->copy_dev_to_dev = [](vx_buffer_h hdest_buffer, uint64_t dest_offset, vx_buffer_h hsrc_buffer, uint64_t src_offset, uint64_t size) {
if (nullptr == hdest_buffer || nullptr == hsrc_buffer)
return -1;
auto dest_buffer = ((vx_buffer*)hdest_buffer);
auto src_buffer = ((vx_buffer*)hsrc_buffer);
auto device = ((vx_device*)dest_buffer->device);
if ((dest_offset + size) > dest_buffer->size
|| (src_offset + size) > src_buffer->size)
return -1;
DBGPRINT("COPY_DEV_TO_DEV: hdest_buffer=%p, dest_offset=%ld, hsrc_buffer=%p, src_offset=%ld, size=%ld\n",
hdest_buffer, dest_offset, hsrc_buffer, src_offset, size);
return device->copy(dest_buffer->addr + dest_offset,
src_buffer->addr + src_offset,
size);
};

callbacks->start = [](vx_device_h hdevice, vx_buffer_h hkernel, vx_buffer_h harguments) {
if (nullptr == hdevice || nullptr == hkernel || nullptr == harguments)
return -1;
Expand Down
2 changes: 2 additions & 0 deletions runtime/include/vortex.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ int vx_copy_to_dev(vx_buffer_h hbuffer, const void* host_ptr, uint64_t dst_offse
// Copy bytes from device memory to host
int vx_copy_from_dev(void* host_ptr, vx_buffer_h hbuffer, uint64_t src_offset, uint64_t size);

// Copy bytes from device memory to device memory
int vx_copy_dev_to_dev(vx_buffer_h hdest_buffer, uint64_t dest_offset, vx_buffer_h hsrc_buffer, uint64_t src_offset, uint64_t size);
// Start device execution
int vx_start(vx_device_h hdevice, vx_buffer_h hkernel, vx_buffer_h harguments);

Expand Down
1 change: 1 addition & 0 deletions runtime/opae/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ int drv_init(opae_drv_api_t* opae_drv_funcs) {
SET_API (fpgaGetIOAddress);
SET_API (fpgaWriteMMIO64);
SET_API (fpgaReadMMIO64);
SET_API (fpgaCopyBuffer);
SET_API (fpgaErrStr);

return 0;
Expand Down
2 changes: 2 additions & 0 deletions runtime/opae/driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ typedef fpga_result (*pfn_fpgaReleaseBuffer)(fpga_handle handle, uint64_t wsid);
typedef fpga_result (*pfn_fpgaGetIOAddress)(fpga_handle handle, uint64_t wsid, uint64_t *ioaddr);
typedef fpga_result (*pfn_fpgaWriteMMIO64)(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t value);
typedef fpga_result (*pfn_fpgaReadMMIO64)(fpga_handle handle, uint32_t mmio_num, uint64_t offset, uint64_t *value);
typedef fpga_result (*pfn_fpgaCopyBuffer)(fpga_handle handle, uint64_t dest, uint64_t src, uint64_t size);
typedef const char *(*pfn_fpgaErrStr)(fpga_result e);

struct opae_drv_api_t {
Expand All @@ -52,6 +53,7 @@ struct opae_drv_api_t {
pfn_fpgaGetIOAddress fpgaGetIOAddress;
pfn_fpgaWriteMMIO64 fpgaWriteMMIO64;
pfn_fpgaReadMMIO64 fpgaReadMMIO64;
pfn_fpgaCopyBuffer fpgaCopyBuffer;
pfn_fpgaErrStr fpgaErrStr;
};

Expand Down
15 changes: 15 additions & 0 deletions runtime/opae/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,21 @@ class vx_device {
return 0;
}

int copy(uint64_t dest_addr, uint64_t src_addr, uint64_t size){
if( dest_addr == src_addr) {
return 0;
}

if (dest_addr + size > global_mem_size_ ||
src_addr + size > global_mem_size_)
return -1;

CHECK_FPGA_ERR(api_.fpgaCopyBuffer(fpga_, dest_addr, src_addr, size), {
return -1;
});
return 0;
}

int upload(uint64_t dev_addr, const void *host_ptr, uint64_t size) {
// check alignment
if (!is_aligned(dev_addr, CACHE_BLOCK_SIZE))
Expand Down
11 changes: 11 additions & 0 deletions runtime/rtlsim/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,17 @@ class vx_device {
return 0;
}

int copy(uint64_t dest_addr, uint64_t src_addr, uint64_t size) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > GLOBAL_MEM_SIZE || dest_addr + asize > GLOBAL_MEM_SIZE)
return -1;

ram_.enable_acl(false);
ram_.copy(dest_addr, src_addr, size);
ram_.enable_acl(true);
return 0;
}

int start(uint64_t krnl_addr, uint64_t args_addr) {
// ensure prior run completed
if (future_.valid()) {
Expand Down
17 changes: 17 additions & 0 deletions runtime/simx/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,23 @@ class vx_device {
return 0;
}

int copy(uint64_t dest_addr, uint64_t src_addr, uint64_t size) {
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > GLOBAL_MEM_SIZE || dest_addr + asize > GLOBAL_MEM_SIZE)
return -1;
#ifdef VM_ENABLE
uint64_t pAddr_src = page_table_walk(src_addr);
uint64_t pAddr_dest = page_table_walk(dest_addr);
DBGPRINT(" [RT:copy] Copy data from vAddr = 0x%lx (pAddr=0x%lx) to vAddr = 0x%lx (pAddr=0x%lx)\n", src_addr, pAddr_src, dest_addr, pAddr_dest);
src_addr = pAddr_src;
dest_addr = pAddr_dest;
#endif
ram_.enable_acl(false);
ram_.copy(dest_addr, src_addr, size);
ram_.enable_acl(true);
return 0;
}

int start(uint64_t krnl_addr, uint64_t args_addr) {
// ensure prior run completed
if (future_.valid()) {
Expand Down
4 changes: 4 additions & 0 deletions runtime/stub/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ extern int vx_copy_from_dev(void* host_ptr, vx_buffer_h hbuffer, uint64_t src_of
return (g_callbacks.copy_from_dev)(host_ptr, hbuffer, src_offset, size);
}

extern int vx_copy_dev_to_dev(vx_buffer_h hdest_buffer, uint64_t dest_offset, vx_buffer_h hsrc_buffer, uint64_t src_offset, uint64_t size) {
return (g_callbacks.copy_dev_to_dev)(hdest_buffer, dest_offset, hsrc_buffer, src_offset, size);
}

extern int vx_start(vx_device_h hdevice, vx_buffer_h hkernel, vx_buffer_h harguments) {
int profiling_mode = get_profiling_mode();
if (profiling_mode != 0) {
Expand Down
64 changes: 64 additions & 0 deletions runtime/xrt/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,70 @@ class vx_device {
return 0;
}

int copy(uint64_t dest_addr, uint64_t src_addr, uint64_t size) {
if (dest_addr == src_addr) {
return 0;
}

// bound checking
if (dest_addr + size > global_mem_size_ ||
src_addr + size > global_mem_size_)
return -1;

uint64_t offset = 0;
while (offset < size) {
uint64_t curr_src = src_addr + offset;
uint64_t curr_dest = dest_addr + offset;

uint64_t src_rem = CACHE_BLOCK_SIZE - (curr_src % CACHE_BLOCK_SIZE);
uint64_t dest_rem = CACHE_BLOCK_SIZE - (curr_dest % CACHE_BLOCK_SIZE);

uint64_t chunk_size = (src_rem < dest_rem) ? src_rem : dest_rem;
if (chunk_size > size - offset) {
chunk_size = size - offset;
}

uint32_t src_bo_idx, dst_bo_idx;
uint64_t src_bo_off, dst_bo_off;
xrt_buffer_t src_buf, dst_buf;

CHECK_ERR(this->get_bank_info(curr_src, &src_bo_idx, &src_bo_off), {
return err;
});
#ifdef BANK_INTERLEAVE
src_bo_off += (curr_src % CACHE_BLOCK_SIZE);
#endif

CHECK_ERR(this->get_buffer(src_bo_idx, &src_buf), {
return err;
});

CHECK_ERR(this->get_bank_info(curr_dest, &dst_bo_idx, &dst_bo_off), {
return err;
});
#ifdef BANK_INTERLEAVE
dst_bo_off += (curr_dest % CACHE_BLOCK_SIZE);
#endif

CHECK_ERR(this->get_buffer(dst_bo_idx, &dst_buf), {
return err;
});

#ifdef CPP_API
dst_buf.copy(src_buf, chunk_size, src_bo_off, dst_bo_off);
#else
CHECK_ERR(xrtBOCopy(dst_buf, src_buf, chunk_size, src_bo_off, dst_bo_off), {
dump_xrt_error(xrtDevice_, err);
return err;
});
#endif

offset += chunk_size;
}

return 0;
}

int upload(uint64_t dev_addr, const void *src, uint64_t size) {
auto host_ptr = (const uint8_t *)src;

Expand Down
18 changes: 18 additions & 0 deletions sim/common/mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,24 @@ void RAM::write(const void* data, uint64_t addr, uint64_t size) {
}
}

void RAM::copy(uint64_t dest_addr, uint64_t src_addr, uint64_t size) {
if (check_acl_) {
if (acl_mngr_.check(src_addr, size, 0x1) == false ||
acl_mngr_.check(dest_addr, size, 0x2) == false) {
throw BadAddress();
}
}
if (dest_addr > src_addr) {
for (uint64_t i = 0; i < size; i++) {
*this->get(dest_addr + i) = *this->get(src_addr + i);
}
} else if (dest_addr < src_addr) {
for (uint64_t i = size; i > 0; i--) {
*this->get(dest_addr + i - 1) = *this->get(src_addr + i - 1);
}
}
}

void RAM::set_acl(uint64_t addr, uint64_t size, int flags) {
if (capacity_ != 0 && (addr + size)> capacity_) {
throw OutOfRange();
Expand Down
1 change: 1 addition & 0 deletions sim/common/mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ class RAM : public MemDevice {

void read(void* data, uint64_t addr, uint64_t size) override;
void write(const void* data, uint64_t addr, uint64_t size) override;
void copy (uint64_t dest_addr, uint64_t src_addr, uint64_t size);

void loadBinImage(const char* filename, uint64_t destination);
void loadHexImage(const char* filename);
Expand Down
10 changes: 10 additions & 0 deletions sim/opaesim/fpga.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,16 @@ extern fpga_result fpgaReadMMIO64(fpga_handle handle, uint32_t mmio_num, uint64_
return FPGA_OK;
}

extern fpga_result fpgaCopyBuffer(fpga_handle handle, uint64_t dest, uint64_t src, uint64_t size) {
if (NULL == handle)
return FPGA_INVALID_PARAM;

auto sim = reinterpret_cast<opae_sim*>(handle);
sim->copy(dest, src, size);

return FPGA_OK;
}

extern const char *fpgaErrStr(fpga_result e) {
return "";
}
Expand Down
11 changes: 11 additions & 0 deletions sim/opaesim/opae_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,13 @@ class opae_sim::Impl {
device_->vcp2af_sRxPort_c0_mmioWrValid = 0;
}

void copy(uint64_t dest, uint64_t src, uint64_t size) {

std::lock_guard<std::mutex> guard(mutex_);

ram_->copy(dest, src, size);
}

private:

void reset() {
Expand Down Expand Up @@ -565,6 +572,10 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
impl_->write_mmio64(mmio_num, offset, value);
}

void opae_sim::copy(uint64_t dest, uint64_t src, uint64_t size) {
impl_->copy(dest, src, size);
}

void opae_sim::read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
impl_->read_mmio64(mmio_num, offset, value);
}
2 changes: 2 additions & 0 deletions sim/opaesim/opae_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class opae_sim {

void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value);

void copy(uint64_t dest, uint64_t src, uint64_t size);

private:

class Impl;
Expand Down
15 changes: 15 additions & 0 deletions sim/xrtsim/xrt_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,21 @@ extern int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset
return buffer->sim->mem_read(buffer->bank, buffer->addr + offset, size, dst);
}

extern int xrtBOCopy(xrtBufferHandle dst, xrtBufferHandle src, size_t size, size_t src_offset, size_t dst_offset) {
if (dst == nullptr || src == nullptr)
return -1;
auto dst_buffer = reinterpret_cast<buffer_t*>(dst);
auto src_buffer = reinterpret_cast<buffer_t*>(src);
int err = dst_buffer->sim->mem_copy(
dst_buffer->bank,
src_buffer->bank,
dst_buffer->addr + dst_offset,
src_buffer->addr + src_offset,
size
);
return err;
}

extern int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset) {
return 0;
}
Expand Down
2 changes: 2 additions & 0 deletions sim/xrtsim/xrt_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ int xrtBOWrite(xrtBufferHandle bhdl, const void* src, size_t size, size_t offset

int xrtBORead(xrtBufferHandle bhdl, void* dst, size_t size, size_t offset);

int xrtBOCopy(xrtBufferHandle dst, xrtBufferHandle src, size_t size, size_t src_offset, size_t dst_offset);

int xrtBOSync(xrtBufferHandle bhdl, enum xclBOSyncDirection dir, size_t size, size_t offset);

int xrtKernelWriteRegister(xrtKernelHandle kernelHandle, uint32_t offset, uint32_t data);
Expand Down
14 changes: 14 additions & 0 deletions sim/xrtsim/xrt_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,16 @@ class xrt_sim::Impl {
return 0;
}

int mem_copy(uint32_t bank_id_dest , uint32_t bank_id_src, uint64_t dest_addr, uint64_t src_addr, uint64_t size) {
std::lock_guard<std::mutex> guard(mutex_);
if( bank_id_dest >= PLATFORM_MEMORY_NUM_BANKS || bank_id_src >= PLATFORM_MEMORY_NUM_BANKS)
return -1;
uint64_t dest_base_addr = bank_id_dest * mem_bank_size_ + dest_addr;
uint64_t src_base_addr = bank_id_src * mem_bank_size_ + src_addr;
ram_->copy(dest_base_addr, src_base_addr, size);
return 0;
}

int register_write(uint32_t offset, uint32_t value) {
std::lock_guard<std::mutex> guard(mutex_);

Expand Down Expand Up @@ -649,6 +659,10 @@ int xrt_sim::mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* data
return impl_->mem_read(bank_id, addr, size, data);
}

int xrt_sim::mem_copy(uint32_t bank_id_dest , uint32_t bank_id_src, uint64_t dest_addr, uint64_t src_addr, uint64_t size) {
return impl_->mem_copy(bank_id_dest, bank_id_src, dest_addr, src_addr, size);
}

int xrt_sim::register_write(uint32_t offset, uint32_t value) {
return impl_->register_write(offset, value);
}
Expand Down
2 changes: 2 additions & 0 deletions sim/xrtsim/xrt_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class xrt_sim {

int mem_read(uint32_t bank_id, uint64_t addr, uint64_t size, void* value);

int mem_copy(uint32_t bank_id_dest , uint32_t bank_id_src, uint64_t dest_addr, uint64_t src_addr, uint64_t size);

int register_write(uint32_t offset, uint32_t value);

int register_read(uint32_t offset, uint32_t* value);
Expand Down
9 changes: 8 additions & 1 deletion tests/opencl/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ all:
$(MAKE) -C kmeans
$(MAKE) -C blackscholes
$(MAKE) -C bfs
$(MAKE) -C copybuf


run-simx:
$(MAKE) -C vecadd run-simx
Expand All @@ -43,6 +45,9 @@ run-simx:
$(MAKE) -C kmeans run-simx
$(MAKE) -C blackscholes run-simx
$(MAKE) -C bfs run-simx
$(MAKE) -C copybuf run-simx



run-rtlsim:
$(MAKE) -C vecadd run-rtlsim
Expand All @@ -64,6 +69,7 @@ run-rtlsim:
$(MAKE) -C kmeans run-rtlsim
$(MAKE) -C blackscholes run-rtlsim
$(MAKE) -C bfs run-rtlsim
$(MAKE) -C copybuf run-rtlsim

clean:
$(MAKE) -C vecadd clean
Expand All @@ -85,4 +91,5 @@ clean:
$(MAKE) -C guassian clean
$(MAKE) -C kmeans clean
$(MAKE) -C blackscholes clean
$(MAKE) -C bfs clean
$(MAKE) -C bfs clean
$(MAKE) -C copybuf clean
Loading