Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
9cd6d57
WIP POC of dispatcher
vidyasagar-amd Nov 4, 2025
e4f160c
Dispatcher python workflow setup.
vidyasagar-amd Nov 5, 2025
9b76b3b
Dispatcher cleanup and updates.
vidyasagar-amd Nov 5, 2025
d218186
Fixes to python paths
vidyasagar-amd Nov 14, 2025
67f4f05
Cleaning up code
vidyasagar-amd Nov 14, 2025
4938c89
Improving dispatcher support for different arch
vidyasagar-amd Nov 25, 2025
eeb1289
Fix formatting errors
vidyasagar-amd Nov 25, 2025
7cc69e1
Cleaning up examples
vidyasagar-amd Nov 26, 2025
da18876
Improving codegeneration
vidyasagar-amd Nov 28, 2025
9e62753
Improving and fixing C++ examples
vidyasagar-amd Nov 28, 2025
3f70fb3
Adding conv functionality (fwd,bwd,bwdw) and examples.
vidyasagar-amd Nov 29, 2025
8817a1c
Fixes based on feedback.
vidyasagar-amd Dec 2, 2025
064e056
Further fixes based on feedback.
vidyasagar-amd Dec 2, 2025
c2c80d6
Adding stress test for autogeneration and autocorrection, and fixing …
vidyasagar-amd Dec 2, 2025
9cf9844
Another round of improvements based on feedback.
vidyasagar-amd Dec 3, 2025
fd617cf
Trimming out unnecessary code.
vidyasagar-amd Dec 3, 2025
93b66b1
Fixing the multi-D implementation.
vidyasagar-amd Dec 3, 2025
6041807
Using gpu verification for gemms and fixing convolutions tflops calcu…
vidyasagar-amd Dec 4, 2025
af839ac
Fix counter usage issue and arch filtering per ops.
vidyasagar-amd Dec 4, 2025
8b8f9f8
Adding changelog and other fixes.
vidyasagar-amd Dec 4, 2025
d8a30a4
Improve examples and resolve critical bugs.
vidyasagar-amd Dec 5, 2025
d89f06c
Reduce build time for python examples.
vidyasagar-amd Dec 5, 2025
9bb2366
Fixing minor bug.
vidyasagar-amd Dec 5, 2025
446624c
Fix compilation error.
vidyasagar-amd Dec 5, 2025
85a6bcd
Improve installation instructions for dispatcher.
vidyasagar-amd Dec 5, 2025
94cc05a
Add docker based installation instructions for dispatcher.
vidyasagar-amd Dec 5, 2025
e090841
Fixing arch-based filtering to match tile engine.
vidyasagar-amd Dec 5, 2025
148565a
Remove dead code and fix arch filtering.
vidyasagar-amd Dec 6, 2025
8497dab
Minor bugfix.
vidyasagar-amd Dec 6, 2025
bae640e
Updates after rebase.
vidyasagar-amd Jan 5, 2026
61c6826
Trimming code.
vidyasagar-amd Jan 5, 2026
53d61e0
Fix copyright headers.
vidyasagar-amd Jan 5, 2026
7a95bb6
Consolidate examples, cut down code.
vidyasagar-amd Jan 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,23 @@ CMakeUserPresets.json
# Python cache
__pycache__/

# Cache directories
.cache/
.ck_tile_cache/
ck_tile_cache/
**/kernel_cache/
**/.kernel_cache/

# Dispatcher kernel cache (user-generated, can be large)
dispatcher/**/kernel_cache/
dispatcher/**/.kernel_cache/
dispatcher/**/cached_kernels/
dispatcher/**/*.hsaco
dispatcher/**/*.co

# Dispatcher generated JSON exports
dispatcher/**/*_kernels.json
dispatcher/**/dispatcher_kernels.json

# Generated test data
test_data/*
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj
## Composable Kernel 1.2.0 for ROCm 7.2.0

### Added
* Added CK-Tile dispatcher - a unified kernel dispatch, code generation and architecture-based kernel filtering system with with C++ and Python frontends starting with GEMM support.
* Added support for bf16 data type to grouped_gemm and grouped_gemm_preshuffle.
* Added Col-Col-Row-Col layout support for aquant mode in blockscale GEMM.
* Added support for mixed precision fp8 x bf8 universal GEMM and weight preshuffle GEMM.
Expand Down
117 changes: 117 additions & 0 deletions dispatcher/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT

cmake_minimum_required(VERSION 3.16)

project(ck_tile_dispatcher VERSION 1.0.0 LANGUAGES CXX)

# C++17 required
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# Find HIP for headers (needed for validation kernels)
find_package(hip QUIET)
if(NOT hip_FOUND)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip)
find_package(hip REQUIRED)
endif()

# Dispatcher library
add_library(ck_tile_dispatcher
src/registry.cpp
src/dispatcher.cpp
)

# Enable PIC for Python bindings
set_target_properties(ck_tile_dispatcher PROPERTIES
POSITION_INDEPENDENT_CODE ON
)

target_include_directories(ck_tile_dispatcher
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
)

# Link against CK Tile headers (header-only)
target_include_directories(ck_tile_dispatcher
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
$<INSTALL_INTERFACE:include>
)

# Link against HIP headers if available
if(hip_FOUND)
target_link_libraries(ck_tile_dispatcher PUBLIC hip::host)
endif()

# Compiler warnings
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
target_compile_options(ck_tile_dispatcher PRIVATE
-Wall -Wextra -Wpedantic
)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
target_compile_options(ck_tile_dispatcher PRIVATE
/W4
)
endif()

# Optional: Build tests
option(BUILD_DISPATCHER_TESTS "Build dispatcher unit tests" OFF)
if(BUILD_DISPATCHER_TESTS)
enable_testing()
add_subdirectory(tests)
endif()

# Optional: Build Python bindings
option(BUILD_DISPATCHER_PYTHON "Build Python bindings for dispatcher" OFF)
if(BUILD_DISPATCHER_PYTHON)
add_subdirectory(python)
endif()

# Optional: Codegen for tile_engine integration
option(DISPATCHER_AUTO_GENERATE_WRAPPERS "Auto-generate wrappers from tile_engine" OFF)
if(DISPATCHER_AUTO_GENERATE_WRAPPERS)
add_subdirectory(codegen)
endif()

# Optional: Build examples
option(BUILD_DISPATCHER_EXAMPLES "Build dispatcher examples" OFF)
if(BUILD_DISPATCHER_EXAMPLES)
add_subdirectory(examples)
endif()

# Optional: Build ctypes bindings
option(BUILD_DISPATCHER_BINDINGS "Build language bindings for dispatcher" OFF)
if(BUILD_DISPATCHER_BINDINGS)
add_subdirectory(bindings/ctypes)
endif()

# If codegen is enabled, add generated include directory
if(DISPATCHER_AUTO_GENERATE_WRAPPERS AND DISPATCHER_GENERATED_INCLUDE_DIR)
target_include_directories(ck_tile_dispatcher
PUBLIC
$<BUILD_INTERFACE:${DISPATCHER_GENERATED_INCLUDE_DIR}>
)
endif()

# Installation
install(TARGETS ck_tile_dispatcher
EXPORT ck_tile_dispatcher_targets
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib
RUNTIME DESTINATION bin
)

install(DIRECTORY include/
DESTINATION include
FILES_MATCHING PATTERN "*.hpp"
)

install(EXPORT ck_tile_dispatcher_targets
FILE ck_tile_dispatcher_targets.cmake
NAMESPACE ck_tile::
DESTINATION lib/cmake/ck_tile_dispatcher
)

Loading