Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@
driver/xrt/src/m2m
driver/xrt/build
.vscode
coyote_build*
*xcu55c-fsvh2892-2L-e*
*_prj
*.gen
*.ip_user_files
*.cache
*.srcs
**/fpga_ips.txt
2 changes: 2 additions & 0 deletions driver/xrt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ set(ACCL_DOCS_RST
)

set(EN_COYOTE ON)
set(EN_AVX 1 CACHE STRING "AVX environment.")
if(EN_COYOTE)
message("Enable Coyote")
set(ACCL_HEADERS
Expand All @@ -88,6 +89,7 @@ if(EN_COYOTE)
file(GLOB COYOTE_SOURCE "${COYOTE_SOURCE_PATH}/*.cpp")

if(EN_AVX)
add_definitions(-DEN_AVX)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -mavx -march=native -O3")
else()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -march=native -O1")
Expand Down
251 changes: 251 additions & 0 deletions driver/xrt/CMakeLists_GPU.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# /*******************************************************************************
# Copyright (C) 2022 Xilinx, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# *******************************************************************************/

cmake_minimum_required(VERSION 3.9)
project(accl VERSION 0.1.0 DESCRIPTION "ACCL")

set(CMAKE_CXX_STANDARD 17)


set(EN_GPU 1)
if(NOT DEFINED ROCM_PATH)
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed")
elseif(DEFINED ENV{HIP_PATH})
set(ROCM_PATH "$ENV{HIP_PATH}/.." CACHE PATH "Path to which ROCM has been installed")
else()
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCM has been installed")
endif()
endif()

file(STRINGS "${ROCM_PATH}/.info/version" ROCM_VERSION)
message("-- Found ROCm: ${ROCM_VERSION}")

if (NOT DEFINED CMAKE_CXX_COMPILER)
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/hipcc)
endif()

if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
endif()
endif()

if(NOT DEFINED HCC_PATH)
if(DEFINED ENV{HCC_PATH})
set(HCC_PATH $ENV{HCC_PATH} CACHE PATH "Path to which HCC has been installed")
else()
set(HCC_PATH "${ROCM_PATH}/hcc" CACHE PATH "Path to which HCC has been installed")
endif()
set(HCC_HOME "${HCC_PATH}")
endif()

if(NOT DEFINED HIP_CLANG_PATH)
if(NOT DEFINED ENV{HIP_CLANG_PATH})
set(HIP_CLANG_PATH "${ROCM_PATH}/llvm/bin" CACHE PATH "Path to which HIP compatible clang binaries have been installed")
else()
set(HIP_CLANG_PATH $ENV{HIP_CLANG_PATH} CACHE PATH "Path to which HIP compatible clang binaries have been installed")
endif()
endif()

find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
else()
message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.")
endif()
find_package(hip REQUIRED)

set(CYT_LANG ${CYT_LANG} HIP)

if (NOT CMAKE_LIBRARY_OUTPUT_DIRECTORY)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")
endif()

if (NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
endif()

# Consider switching to PROJECT_IS_TOP_LEVEL from CMake 3.21 (2021)
# (https://cmake.org/cmake/help/latest/variable/PROJECT_IS_TOP_LEVEL.html)
get_directory_property(HAS_PARENT PARENT_DIRECTORY)

set(ACCL_SOURCE_PATH ${CMAKE_CURRENT_LIST_DIR}/src)
set(ACCL_HEADER_PATH ${CMAKE_CURRENT_LIST_DIR}/include)
set(ACCL_DOCS_PATH ${CMAKE_CURRENT_LIST_DIR}/docs)
set(ACCL_REPO_ROOT ${CMAKE_CURRENT_LIST_DIR}/../../)
set(ZMQ_INTF_DIR ${ACCL_REPO_ROOT}/test/model/zmq)
set(COYOTE_SOURCE_PATH ${CMAKE_CURRENT_LIST_DIR}/../../test/refdesigns/Coyote/sw/src)
set(COYOTE_HEADER_PATH ${CMAKE_CURRENT_LIST_DIR}/../../test/refdesigns/Coyote/sw/include)

set(ACCL_HEADERS
${ACCL_HEADER_PATH}/accl.hpp
${ACCL_HEADER_PATH}/common.hpp
${ACCL_HEADER_PATH}/communicator.hpp
${ACCL_HEADER_PATH}/constants.hpp
${ACCL_HEADER_PATH}/simdevice.hpp
${ACCL_HEADER_PATH}/simbuffer.hpp
${ACCL_HEADER_PATH}/xrtdevice.hpp
${ACCL_HEADER_PATH}/acclrequest.hpp
)

set(ACCL_SOURCES
${ACCL_SOURCE_PATH}/accl.cpp
${ACCL_SOURCE_PATH}/common.cpp
${ACCL_SOURCE_PATH}/communicator.cpp
${ACCL_SOURCE_PATH}/constants.cpp
${ACCL_SOURCE_PATH}/simdevice.cpp
${ACCL_SOURCE_PATH}/simbuffer.cpp
${ACCL_SOURCE_PATH}/xrtdevice.cpp
${ZMQ_INTF_DIR}/zmq_client.cpp
${ZMQ_INTF_DIR}/zmq_common.cpp
)

set(ACCL_DOCS_RST
${ACCL_DOCS_PATH}/index.rst
${ACCL_DOCS_PATH}/Cpp_reference/index.rst
${ACCL_DOCS_PATH}/Cpp_reference/accl.rst
${ACCL_DOCS_PATH}/Cpp_reference/buffer.rst
${ACCL_DOCS_PATH}/Cpp_reference/cclo.rst
${ACCL_DOCS_PATH}/Cpp_reference/communicator.rst
${ACCL_DOCS_PATH}/Cpp_reference/misc.rst
)

set(EN_COYOTE ON)
set(EN_AVX 1 CACHE STRING "AVX environment.")
if(EN_COYOTE)
message("Enable Coyote")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} /mnt/scratch/jooertli/ACCL_CYT_V2_EGR_HOST_GPU/test/refdesigns/Coyote/cmake)
find_package(CoyoteSW REQUIRED)
set(ACCL_HEADERS
${ACCL_HEADERS}
${ACCL_HEADER_PATH}/coyotebuffer.hpp
${ACCL_HEADER_PATH}/coyotedevice.hpp
)
set(ACCL_SOURCES
${ACCL_SOURCES}
${ACCL_SOURCE_PATH}/coyotedevice.cpp
)
file(GLOB COYOTE_SOURCE "${COYOTE_SOURCE_PATH}/*.cpp")

if(EN_AVX)
add_definitions(-DEN_AVX)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -mavx -march=native -O3")
else()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -march=native -O1")
endif()
endif(EN_COYOTE)


set(ACCL_INCLUDE_PATH ${CMAKE_CURRENT_LIST_DIR}/include ${ZMQ_INTF_DIR})
if (HAS_PARENT)
set(ACCL_INCLUDE_PATH ${CMAKE_CURRENT_LIST_DIR}/include ${ZMQ_INTF_DIR} PARENT_SCOPE)
endif (HAS_PARENT)


if(EN_COYOTE)
set(ACCL_SOURCES ${ACCL_SOURCES} ${COYOTE_SOURCE})
set(ACCL_INCLUDE_PATH ${ACCL_INCLUDE_PATH} ${COYOTE_HEADER_PATH})
endif(EN_COYOTE)


add_library(accl SHARED ${ACCL_SOURCES})
target_include_directories(accl PUBLIC ${ACCL_INCLUDE_PATH})

# XRT
if (NOT EXISTS $ENV{XILINX_XRT})
message(FATAL_ERROR "Xilinx XRT not found, make sure to source setup.sh")
endif ()

target_link_directories(accl PUBLIC $ENV{XILINX_XRT}/lib)
target_link_libraries(accl PUBLIC xilinxopencl xrt_coreutil xrt_core)

target_include_directories(accl PUBLIC /opt/rocm/include /opt/rocm/include/hsa)
target_link_libraries(accl PUBLIC hip::device numa pthread drm drm_amdgpu rt dl hsa-runtime64 hsakmt)

target_include_directories(accl PUBLIC $ENV{XILINX_XRT}/include)

# ZMQ
target_link_libraries(accl PUBLIC zmq pthread)

# Json
find_package(jsoncpp REQUIRED)
target_link_libraries(accl PUBLIC jsoncpp_lib)
get_target_property(JSON_INC_PATH jsoncpp_lib INTERFACE_INCLUDE_DIRECTORIES)
target_include_directories(accl PUBLIC ${JSON_INC_PATH})


if (ACCL_DEBUG)
target_compile_definitions(accl PUBLIC ACCL_DEBUG)
message("Defining ACCL_DEBUG")
endif (ACCL_DEBUG)

set_target_properties(accl PROPERTIES
VERSION ${PROJECT_VERSION}
SOVERSION 1
PUBLIC_HEADER include/accl.hpp
)

# docs
find_package(Doxygen)

# Add the cmake folder so the FindSphinx module is found
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake" ${CMAKE_MODULE_PATH})
find_package(Sphinx)

if (HAS_PARENT)
set(DOC_TARGET accl_docs)
else (HAS_PARENT)
set(DOC_TARGET docs)
endif (HAS_PARENT)

if (DOXYGEN_FOUND AND SPHINX_FOUND)
set(DOXYGEN_INDEX_FILE ${CMAKE_CURRENT_LIST_DIR}/docs/xml/index.xml)
set(SPHINX_INDEX_FILE ${CMAKE_CURRENT_LIST_DIR}/docs/sphinx/index.html)
add_custom_command(OUTPUT ${DOXYGEN_INDEX_FILE}
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
BYPRODUCTS
${CMAKE_CURRENT_LIST_DIR}/docs/xml
${CMAKE_CURRENT_LIST_DIR}/docs/latex
${CMAKE_CURRENT_LIST_DIR}/docs/html
DEPENDS ${ACCL_HEADERS}
MAIN_DEPENDENCY ${CMAKE_CURRENT_LIST_DIR}/Doxyfile
COMMENT "Generating API documentation with Doxygen"
VERBATIM )

add_custom_target(${DOC_TARGET}_doxygen ALL DEPENDS ${DOXYGEN_INDEX_FILE})

add_custom_command(OUTPUT ${SPHINX_INDEX_FILE}
COMMAND ${SPHINX_EXECUTABLE} -b html ${CMAKE_CURRENT_LIST_DIR}/docs ${CMAKE_CURRENT_LIST_DIR}/docs/sphinx
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
BYPRODUCTS
${CMAKE_CURRENT_LIST_DIR}/docs/sphinx
DEPENDS ${ACCL_DOCS_RST}
MAIN_DEPENDENCY ${DOXYGEN_INDEX_FILE}
COMMENT "Generating API documentation with Sphinx"
VERBATIM
)

add_custom_target(${DOC_TARGET} ALL DEPENDS ${SPHINX_INDEX_FILE})

set_target_properties(${DOC_TARGET}_doxygen ${DOC_TARGET} PROPERTIES EXCLUDE_FROM_ALL 1 EXCLUDE_FROM_DEFAULT_BUILD 1)
else()
message("Doxygen needs to be installed to generate the doxygen documentation")
endif()
4 changes: 2 additions & 2 deletions driver/xrt/include/accl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class ACCL {
*/
void initialize(const std::vector<rank_t> &ranks, int local_rank,
int n_egr_rx_bufs = 16, addr_t egr_rx_buf_size = 1024,
addr_t max_egr_size = 1024, addr_t max_rndzv_size = 32*1024);
addr_t max_egr_size = 1024, addr_t max_rndzv_size = 32*1024, bool rxEager_host = false);

/**
* Get the return code of the last ACCL call.
Expand Down Expand Up @@ -1101,7 +1101,7 @@ ACCLRequest *barrier(communicatorId comm_id = GLOBAL_COMM,
void configure_arithmetic();

void setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size,
const std::vector<int> &devicemem);
const std::vector<int> &devicemem, bool host=false);
void setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, int devicemem) {
std::vector<int> mems = {devicemem};
return setup_eager_rx_buffers(n_egr_rx_bufs, egr_rx_buf_size, mems);
Expand Down
37 changes: 27 additions & 10 deletions driver/xrt/include/accl/coyotebuffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@
#pragma once
#include "buffer.hpp"
#include "common.hpp"
#include "cProcess.hpp"
#include "cThread.hpp"
#include "cDefs.hpp"
#include "coyotedevice.hpp"
#include <cstdlib>
#include <cstring>
#include <iomanip>
#define GPU_EN 0
#if GPU_EN == 1
#include <hip/hip_runtime.h>
#define DEFAULT_GPU_ID 0
#endif

/** @file coyotebuffer.hpp */

Expand Down Expand Up @@ -57,14 +62,18 @@ template <typename dtype> class CoyoteBuffer : public Buffer<dtype> {
size_t page_size = 1ULL << 21;
this->buffer_size = length * sizeof(dtype);
this->n_pages = (buffer_size + page_size - 1) / page_size;
std::cerr << "CoyoteBuffer contructor called! page_size:"<<page_size<<", buffer_size:"<<buffer_size<<",n_pages:"<<n_pages<< std::endl;

this->aligned_buffer = (dtype *)this->device->coyote_proc->getMem({fpga::CoyoteAlloc::HUGE_2M, n_pages});
#if GPU_EN == 1
if (hipSetDevice(DEFAULT_GPU_ID)) { throw std::runtime_error("Couldn't select GPU!"); }
this->aligned_buffer = (dtype *)this->device->coyote_proc->getMem({coyote::CoyoteAllocType::GPU, static_cast<uint32_t>(this->buffer_size), true, DEFAULT_GPU_ID});
#else
this->aligned_buffer = (dtype *)this->device->coyote_proc->getMem({coyote::CoyoteAllocType::HPF, this->buffer_size, true});
#endif

this->update_buffer(this->aligned_buffer, (addr_t)this->aligned_buffer);

std::cerr << "Allocation successful! Allocated buffer: "<<std::setbase(16)<<(uint64_t)this->aligned_buffer << std::setbase(10) <<", Size: " << this->_size << std::endl;

//buffers in coyote per default on host memory
host_flag = true;


Expand Down Expand Up @@ -112,22 +121,30 @@ template <typename dtype> class CoyoteBuffer : public Buffer<dtype> {
*/
void sync_from_device() override
{
std::cerr << "calling sync: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl;
std::cerr << "sync_from_device at address: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl;

this->device->coyote_proc->invoke({fpga::CoyoteOper::SYNC, this->aligned_buffer, (uint32_t)this->_size, true, true, 0, false});
coyote::syncSg sg;
memset(&sg, 0, sizeof(coyote::syncSg));
sg.addr = this->aligned_buffer;
sg.len = this->size();
this->device->coyote_proc->invoke(coyote::CoyoteOper::LOCAL_SYNC, sg);

this->host_flag = true;
}

/**
* Sync the data from the host to the device.
*
*
*/
void sync_to_device() override
{
std::cerr << "calling offload: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl;
std::cerr << "sync_to_device at address: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl;

this->device->coyote_proc->invoke({fpga::CoyoteOper::OFFLOAD, this->aligned_buffer, (uint32_t)this->_size, true, true, 0, false});
coyote::syncSg sg;
memset(&sg, 0, sizeof(coyote::syncSg));
sg.addr = this->aligned_buffer;
sg.len = this->size();
this->device->coyote_proc->invoke(coyote::CoyoteOper::LOCAL_OFFLOAD, sg);

this->host_flag = false;
}
Expand All @@ -144,7 +161,7 @@ template <typename dtype> class CoyoteBuffer : public Buffer<dtype> {
// }
// }

std::cerr << "Free user buffer from cProc cPid:"<< std::setbase(10)<<this->device->coyote_proc->getCpid()<<", buffer_size:"<<buffer_size<<","<<std::setbase(16) << (uint64_t)this->aligned_buffer<<std::endl;
std::cerr << "Free user buffer from cProc cPid:"<< std::setbase(10)<<this->device->coyote_proc->getCtid()<<", buffer_size:"<<buffer_size<<","<<std::setbase(16) << (uint64_t)this->aligned_buffer<<std::endl;
this->device->coyote_proc->freeMem(this->aligned_buffer);
return;
}
Expand Down
Loading