-
Notifications
You must be signed in to change notification settings - Fork 265
Addition of Stream-K tests using Tile Engine #3514
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,306 @@ | ||
| # Copyright (c) Advanced Micro Devices, Inc., or its affiliates. | ||
| # SPDX-License-Identifier: MIT | ||
|
|
||
| # ============================================================================ | ||
| # GEMM Tile Engine Unit Tests | ||
| # | ||
| # This CMake file creates unit tests for tile_engine generated GEMM kernels. | ||
| # It follows the exact same build patterns as tile_engine for consistency | ||
| # and reliability. Each kernel configuration gets its own test executable. | ||
| # ============================================================================ | ||
|
|
||
| # Locate tile_engine GEMM scripts directory | ||
| set(TILE_ENGINE_GEMM_DIR "${PROJECT_SOURCE_DIR}/tile_engine/ops/gemm_streamk") | ||
|
|
||
| if(NOT EXISTS ${TILE_ENGINE_GEMM_DIR}) | ||
| message(WARNING "Tile engine directory not found: ${TILE_ENGINE_GEMM_DIR}") | ||
| return() | ||
| endif() | ||
|
|
||
| # ============================================================================ | ||
| # create_individual_gemm_test_target | ||
| # | ||
| # Creates a single test executable for a specific kernel configuration. | ||
| # Mirrors tile_engine's create_individual_gemm_target function for consistency. | ||
| # | ||
| # Parameters: | ||
| # datatype - Data type (fp16, bf16, fp32, etc.) | ||
| # layout - Matrix layout (rcr, rrr, ccr, crr) | ||
| # config_name - Configuration file name without .json extension | ||
| # trait - Kernel trait combination string | ||
| # tile_config - Tile configuration parameters | ||
| # config_json - Full path to JSON configuration file | ||
| # ============================================================================ | ||
| function(create_individual_gemm_test_target datatype layout config_name trait tile_config config_json) | ||
| set(target_name "test_gemm_streamk_tile_engine_${datatype}_${layout}_${config_name}_${trait}_${tile_config}") | ||
| set(working_path "${CMAKE_CURRENT_BINARY_DIR}/${datatype}/${layout}/${config_name}") | ||
|
|
||
| # Generated header path (already created during cmake configuration) | ||
| set(test_header "${working_path}/gemm_streamk_single_${datatype}_${layout}_${trait}_${tile_config}.hpp") | ||
| set(test_params_header "${working_path}/test_params.hpp") | ||
|
|
||
| # Verify header exists (should have been generated during cmake configuration) | ||
| if(NOT EXISTS ${test_header}) | ||
| message(WARNING "Generated header not found: ${test_header}") | ||
| return() | ||
| endif() | ||
|
|
||
| # Verify test parameters header exists | ||
| if(NOT EXISTS ${test_params_header}) | ||
| message(WARNING "Test parameters header not found: ${test_params_header}") | ||
| return() | ||
| endif() | ||
|
|
||
|
|
||
| # Create GTest executable for this kernel configuration | ||
| add_gtest_executable(${target_name} | ||
| ${CMAKE_CURRENT_SOURCE_DIR}/test_gemm_streamk_simple.cpp | ||
| ) | ||
|
|
||
| # Configure GPU architectures for HIP compilation | ||
| set_property(TARGET ${target_name} PROPERTY HIP_ARCHITECTURES ${GEMM_TEST_GPU_TARGETS}) | ||
|
|
||
| # Define preprocessor macros for generated header location and test parameters | ||
| target_compile_definitions(${target_name} PRIVATE | ||
| GEMM_SINGLE_INSTANCE_HPP="${test_header}" | ||
| GEMM_TEST_PARAMS_HPP="${test_params_header}" | ||
| ) | ||
|
|
||
| # Include directories for headers and dependencies | ||
| target_include_directories(${target_name} PRIVATE | ||
| ${PROJECT_SOURCE_DIR}/include | ||
| ${PROJECT_BINARY_DIR}/include | ||
| ${PROJECT_SOURCE_DIR} # Root directory for tile_engine access | ||
| ${GTEST_INCLUDE_DIRS} | ||
| ) | ||
|
|
||
| # Compiler options matching tile_engine requirements | ||
| target_compile_options(${target_name} PRIVATE | ||
| -Wno-undefined-func-template # Suppress template warnings | ||
| -Wno-float-equal # Allow floating point comparisons | ||
| --offload-compress # Enable GPU code compression | ||
| -include ${test_header} # Auto-include generated header | ||
| ) | ||
|
|
||
| # Add FP8 format definitions for proper data type interpretation | ||
| if(CK_USE_OCP_FP8) | ||
| target_compile_options(${target_name} PRIVATE -DCK_TILE_USE_OCP_FP8) | ||
| endif() | ||
|
|
||
| message(STATUS " Created test target: ${target_name}") | ||
| endfunction() | ||
|
|
||
| # ============================================================================ | ||
| # build_gemm_test_targets | ||
| # | ||
| # Builds all test targets for a specific datatype/layout/config combination. | ||
| # Uses tile_engine's two-step process: list kernels, then generate tests. | ||
| # | ||
| # Parameters: | ||
| # datatype - Data type (fp16, bf16, fp32, etc.) | ||
| # layout - Matrix layout (rcr, rrr, ccr, crr) | ||
| # config_name - Configuration file name without .json extension | ||
| # ============================================================================ | ||
| function(build_gemm_test_targets datatype layout config_name) | ||
| set(working_path "${CMAKE_CURRENT_BINARY_DIR}/${datatype}/${layout}/${config_name}") | ||
|
|
||
| # Locate and validate configuration file | ||
| set(config_filename "${config_name}.json") | ||
| set(json_blob "${CMAKE_CURRENT_SOURCE_DIR}/configs/${config_filename}") | ||
|
|
||
| if(NOT EXISTS ${json_blob}) | ||
| message(WARNING "Test config file not found: ${json_blob}") | ||
| return() | ||
| endif() | ||
|
|
||
| # Prepare build directory for this configuration | ||
| file(MAKE_DIRECTORY ${working_path}) | ||
|
|
||
| # STEP 1: Discovery phase - list all valid kernel configurations | ||
| execute_process( | ||
| COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_GEMM_DIR}/gemm_streamk_instance_builder.py | ||
| --working_path ${working_path} | ||
| --datatype ${datatype} | ||
| --layout ${layout} | ||
| --config_json ${json_blob} | ||
| --list_kernels | ||
| WORKING_DIRECTORY ${TILE_ENGINE_GEMM_DIR} | ||
| RESULT_VARIABLE ret | ||
| OUTPUT_VARIABLE list_output | ||
| ERROR_VARIABLE list_error | ||
| ) | ||
|
|
||
| if(NOT ret EQUAL 0) | ||
| message(WARNING "Failed to list kernels for ${datatype}_${layout}_${config_name}: ${list_error}") | ||
| return() | ||
| endif() | ||
|
|
||
| # Verify kernel list file was generated | ||
| if(NOT EXISTS ${working_path}/gemm_kernel_list.txt) | ||
| message(STATUS "No kernels found for ${datatype}_${layout}_${config_name} (validation filtered out all combinations)") | ||
| return() | ||
| endif() | ||
|
|
||
| message(STATUS "Building tests for ${datatype}_${layout}_${config_name}") | ||
|
|
||
| # STEP 2a: Extract test parameters from config | ||
| set(test_params_file "${working_path}/test_params.hpp") | ||
| execute_process( | ||
| COMMAND ${Python3_EXECUTABLE} -u ${CMAKE_CURRENT_SOURCE_DIR}/extract_test_params.py | ||
| --config_file ${json_blob} | ||
| --output_file ${test_params_file} | ||
| WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | ||
| RESULT_VARIABLE extract_ret | ||
| OUTPUT_VARIABLE extract_output | ||
| ERROR_VARIABLE extract_error | ||
| ) | ||
|
|
||
| if(NOT extract_ret EQUAL 0) | ||
| message(WARNING "Failed to extract test parameters for ${datatype}_${layout}: ${extract_error}") | ||
| return() | ||
| endif() | ||
|
|
||
| # STEP 2b: Header generation phase - generate headers using --gen_single | ||
| message(STATUS " Generating headers using --gen_single...") | ||
|
|
||
| file(STRINGS ${working_path}/gemm_kernel_list.txt kernel_lines) | ||
| set(gen_count 0) | ||
|
|
||
| foreach(line IN LISTS kernel_lines) | ||
| # Parse kernel specification format: kernel_name|tile_config|trait_combo | ||
| string(REPLACE "|" ";" parts "${line}") | ||
| list(LENGTH parts parts_len) | ||
| if(parts_len EQUAL 3) | ||
| list(GET parts 0 kernel_name) | ||
| list(GET parts 1 tile_config) | ||
| list(GET parts 2 trait_combo) | ||
|
|
||
| # Generate header using --gen_single | ||
| execute_process( | ||
| COMMAND ${Python3_EXECUTABLE} -u ${TILE_ENGINE_GEMM_DIR}/gemm_streamk_instance_builder.py | ||
| --working_path ${working_path} | ||
| --datatype ${datatype} | ||
| --layout ${layout} | ||
| --config_json ${json_blob} | ||
| --gen_single | ||
| --kernel_name "${kernel_name}" | ||
| --tile_config "${tile_config}" | ||
| --trait_combo "${trait_combo}" | ||
| WORKING_DIRECTORY ${TILE_ENGINE_GEMM_DIR} | ||
| RESULT_VARIABLE gen_ret | ||
| OUTPUT_VARIABLE gen_output | ||
| ERROR_VARIABLE gen_error | ||
| ) | ||
|
|
||
| if(NOT gen_ret EQUAL 0) | ||
| message(WARNING "Failed to generate header for ${kernel_name}: ${gen_error}") | ||
| else() | ||
| math(EXPR gen_count "${gen_count} + 1") | ||
| endif() | ||
| endif() | ||
| endforeach() | ||
|
|
||
| message(STATUS " Generated ${gen_count} headers for ${datatype}_${layout}") | ||
|
|
||
| # STEP 3: Target creation phase - create test targets | ||
| message(STATUS " Creating test targets...") | ||
| file(STRINGS ${working_path}/gemm_kernel_list.txt kernel_lines) | ||
| set(test_count 0) | ||
| foreach(line IN LISTS kernel_lines) | ||
| # Parse kernel specification format: kernel_name|tile_config|trait_combo | ||
| string(REPLACE "|" ";" parts "${line}") | ||
| list(LENGTH parts parts_len) | ||
| if(parts_len EQUAL 3) | ||
| list(GET parts 0 kernel_name) | ||
| list(GET parts 1 tile_config) | ||
| list(GET parts 2 trait_combo) | ||
|
|
||
| # Generate test target for this kernel configuration | ||
| create_individual_gemm_test_target("${datatype}" "${layout}" "${config_name}" "${trait_combo}" "${tile_config}" "${json_blob}") | ||
| math(EXPR test_count "${test_count} + 1") | ||
| endif() | ||
| endforeach() | ||
| message(STATUS " Created ${test_count} test targets for ${datatype}_${layout}") | ||
| endfunction()# ============================================================================ | ||
| # MAIN EXECUTION - Test Target Generation | ||
| # ============================================================================ | ||
|
|
||
| message(STATUS "=== Starting StreamK GEMM Tile Engine Test Configuration ===") | ||
| message(STATUS "SUPPORTED_GPU_TARGETS: ${SUPPORTED_GPU_TARGETS}") | ||
|
|
||
| # GPU architecture filtering - only build tests for supported architectures | ||
| set(GEMM_TEST_GPU_TARGETS "") | ||
| set(DESIRED_TARGETS "gfx908;gfx90a;gfx942;gfx950;gfx1201") | ||
|
|
||
| foreach(target IN LISTS SUPPORTED_GPU_TARGETS) | ||
| if(target IN_LIST DESIRED_TARGETS) | ||
| list(APPEND GEMM_TEST_GPU_TARGETS ${target}) | ||
| message(STATUS " Adding GPU target for tests: ${target}") | ||
| endif() | ||
| endforeach() | ||
|
|
||
| # Early exit if no compatible GPU architectures are available | ||
| if(NOT GEMM_TEST_GPU_TARGETS) | ||
| message(WARNING "Skipping StreamK GEMM Tile Engine tests: No supported GPU targets (gfx908, gfx90a, gfx942, gfx950, gfx1201) found in SUPPORTED_GPU_TARGETS: ${SUPPORTED_GPU_TARGETS}") | ||
| return() | ||
| endif() | ||
|
|
||
| message(STATUS "Building StreamK GEMM tile engine tests for GPU targets: ${GEMM_TEST_GPU_TARGETS}") | ||
|
|
||
| # Enable parallel compilation optimizations | ||
| # Set up job pools for better parallel compilation control | ||
| set_property(GLOBAL PROPERTY JOB_POOLS | ||
| compile_heavy=4 # Limit heavy compilations to prevent OOM | ||
| compile_normal=16 # Allow more parallel normal compilations | ||
| ) | ||
|
|
||
| # Enable compiler cache if available and explicitly requested | ||
| # Disabled by default due to permission issues in CI environments | ||
| option(ENABLE_CCACHE_TESTS "Enable ccache for test compilation" OFF) | ||
| if(ENABLE_CCACHE_TESTS) | ||
| find_program(CCACHE_PROGRAM ccache) | ||
| if(CCACHE_PROGRAM) | ||
| set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM}) | ||
| message(STATUS "Using ccache for faster test compilation") | ||
| else() | ||
| message(WARNING "ccache requested but not found") | ||
| endif() | ||
| else() | ||
| message(STATUS "ccache disabled for tests (use -DENABLE_CCACHE_TESTS=ON to enable)") | ||
| endif() | ||
|
|
||
| # ============================================================================ | ||
| # Test Configuration Matrix - Clean Focused Design | ||
| # ============================================================================ | ||
|
|
||
| # All supported data types and layouts for comprehensive testing | ||
| # Note: fp64 not included (no MFMA hardware support) | ||
| set(TEST_DATATYPES "fp16;bf16") | ||
| set(TEST_LAYOUTS "rcr;rrr;ccr;crr") | ||
|
|
||
| # ============================================================================ | ||
| # Test Target Generation - Datatype-Specific Categories | ||
| # ============================================================================ | ||
|
|
||
| # 1. SIMPLE TEST: Test for basic functionality with data types (fp16, bf16) | ||
| # These data types can use larger warp tiles due to smaller memory footprint | ||
| set(SIMPLE_TEST_CONFIG "simple_test_config") | ||
| set(SIMPLE_TEST_CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/configs/${SIMPLE_TEST_CONFIG}.json") | ||
| set(SIMPLE_DATATYPES "fp16;bf16") | ||
|
|
||
| if(EXISTS ${SIMPLE_TEST_CONFIG_FILE}) | ||
| message(STATUS "Processing simple test config: ${SIMPLE_TEST_CONFIG} (fp16, bf16)") | ||
| foreach(datatype IN LISTS SIMPLE_DATATYPES) | ||
| # fp16, bf16: testing all layouts (rcr, rrr, ccr, crr) | ||
| foreach(layout IN LISTS TEST_LAYOUTS) | ||
| build_gemm_test_targets("${datatype}" "${layout}" "${SIMPLE_TEST_CONFIG}") | ||
| endforeach() | ||
| endforeach() | ||
| else() | ||
| message(WARNING "Simple test config file not found: ${SIMPLE_TEST_CONFIG_FILE}") | ||
| endif() | ||
| # ============================================================================ | ||
|
|
||
|
|
||
| message(STATUS "StreamK GEMM tile engine tests configured with datatype-specific design:") | ||
| message(STATUS " - Simple test: fp16/bf16 (all layouts)") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| # Stream-K GEMM Tile Engine Unit Tests | ||
|
|
||
| ## How It Works | ||
|
|
||
| This unit test system integrates **tile_engine's kernel generation** into automated testing: | ||
|
|
||
| 1. **Uses tile_engine scripts directly**: Same Python scripts that generate tile_engine kernels | ||
| 2. **JSON-based configuration**: Define test parameters in JSON files (like tile_engine) | ||
| 3. **Build-time generation**: CMake calls tile_engine scripts to generate kernel headers | ||
| 4. **Individual test executables**: Each kernel configuration becomes a separate test | ||
| 5. **Tile_engine verification**: Uses exact same error thresholds and validation as tile_engine | ||
|
|
||
| ## Tile Engine Integration | ||
|
|
||
| ``` | ||
| JSON Config → tile_engine Python scripts → Generated Headers → Test Executables | ||
| ``` | ||
|
|
||
| - **`--list_kernels`**: Get available kernel configurations from JSON | ||
| - **`--gen_individual`**: Generate all kernel headers in parallel during CMake configuration | ||
| - **`--gen_single`**: Generate individual kernel header for each configuration | ||
|
Comment on lines
+20
to
+21
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The difference between these two is not very clear for me |
||
| - **Same verification**: Uses tile_engine's adaptive error thresholds and reference calculations | ||
| - **Same patterns**: Follows tile_engine's tensor initialization, stride calculation, and kernel launching | ||
|
|
||
| ### Config-Specific Test Parameters | ||
|
|
||
| Each test configuration can specify optimized problem sizes in its JSON file: | ||
| - **`test_params.problem_sizes`**: Array of `{m, n, k, split_k}` configurations | ||
| - **CMake extraction**: `extract_test_params.py` generates config-specific test parameter files | ||
| - **Build integration**: Each test target uses parameters appropriate for its kernel configuration | ||
| - **Optimized testing**: Different configs test different problem sizes that showcase their strengths | ||
|
|
||
|
|
||
| The key idea: **Unit tests that use tile_engine's exact kernel generation and verification methodology** instead of creating separate test infrastructure. | ||
|
|
||
| ## Test Configurations | ||
|
|
||
| ### 1. **Simple Test** (`simple_test_config.json`) | ||
arai713 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| - **Purpose**: Basic functionality validation for fp16/bf16 data types | ||
| - **Config**: 128x128x32, warp 2x2x1, warp_tile 32x32x16 | ||
| - **Traits**: compv3 pipeline only | ||
| - **Coverage**: All 4 layouts (rcr, rrr, ccr, crr) for fp16, bf16 | ||
|
|
||
| ## Data Type Support | ||
| - ✅ **fp16, bf16**: Fully supported - all layouts (rcr, rrr, ccr, crr) | ||
| - ❌ **fp64**: Not supported (hardware MFMA limitation) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fp64 mfma is supported on gfx9 (minus gfx908). Is this more of a CK limitation? |
||
| - ⏳ **fp32, bf8, pk-int4-t**: Not yet supported by gemm_instance_builder (will be added later) | ||
|
|
||
| ## Test Result Behavior | ||
|
|
||
| Tests automatically handle unsupported configurations through runtime validation: | ||
| - **PASSED**: Kernel executed correctly with results within error thresholds ✅ | ||
| - **SKIPPED**: Kernel validation returned "Arguments not supported" (expected for certain problem sizes/configurations) ⚠️ | ||
| - **FAILED**: Actual error or incorrect computation results ❌ | ||
|
|
||
| When a kernel's `IsSupportedArgument()` check fails (e.g., due to vector alignment requirements, dimension constraints, or padding limitations), the test is automatically skipped rather than failed. This allows comprehensive testing across various problem sizes while gracefully handling configurations that don't meet specific kernel requirements. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My opinion is there should never be a set of arguments passed to the test suite that are not supported. Because in a scenario, where IsSupportedAruguments()'s implementation is changed, for instance someone unintentionally reduces the vector alignment requirements, it will result in valid kernels just being skipped rather than failing. So, we might miss this regression. IsSupportedArgument() is pretty fragile in a sense because it calls a lot of getter functions from all over the place. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tabbed in.
Also there are job pools set up here. Where are they used?