Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,7 @@ test_data/*
# The experimental/builder directory should be tracked despite matching build*
!experimental/builder
!experimental/builder/**
experimental/builder/src/instances/*
!experimental/builder/src/instances/*.in
!experimental/builder/src/instances/*.inc
experimental/builder/src/*.inc
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,10 @@ option(BUILD_MHA_LIB "Build the static library for flash attention" OFF)

add_subdirectory(library)

if (CK_EXPERIMENTAL_BUILDER)
add_subdirectory(experimental/builder)
endif()

if(NOT GPU_ARCHS AND USER_GPU_TARGETS AND NOT MIOPEN_REQ_LIBS_ONLY)
rocm_package_setup_component(tests
LIBRARY_NAME composablekernel
Expand Down Expand Up @@ -735,10 +739,6 @@ if (NOT MIOPEN_REQ_LIBS_ONLY)
add_subdirectory(profiler)
endif()

if (CK_EXPERIMENTAL_BUILDER)
add_subdirectory(experimental/builder)
endif()

if(CK_USE_CODEGEN AND (SUPPORTED_GPU_TARGETS MATCHES "gfx9" OR GPU_ARCHS))
add_subdirectory(codegen)
endif()
Expand Down
37 changes: 35 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ def cmake_build(Map conf=[:]){
if (params.NINJA_BUILD_TRACE) {
echo "running ninja build trace"
}
if (params.RUN_BUILDER_TESTS && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
if ((params.RUN_BUILDER_TESTS || params.RUN_FULL_CONV_TILE_TESTS) && !setup_args.contains("-DCK_CXX_STANDARD=") && !setup_args.contains("gfx10") && !setup_args.contains("gfx11")) {
setup_args = " -D CK_EXPERIMENTAL_BUILDER=ON " + setup_args
}
setup_cmd = conf.get(
Expand Down Expand Up @@ -1091,7 +1091,7 @@ CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;RUN_
0 19 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true;RUN_ALL_UNIT_TESTS=true;FORCE_CI=true
0 17 * * * % BUILD_DOCKER=true;COMPILER_VERSION=amd-mainline;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false;NINJA_BUILD_TRACE=true;RUN_ALL_UNIT_TESTS=true;FORCE_CI=true
0 15 * * * % BUILD_INSTANCES_ONLY=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true;FORCE_CI=true
0 13 * * * % RUN_AITER_TESTS=true;BUILD_LEGACY_OS=true;USE_SCCACHE=false;RUN_PERFORMANCE_TESTS=false;FORCE_CI=true
0 13 * * * % RUN_FULL_CONV_TILE_TESTS=true;RUN_AITER_TESTS=true;BUILD_LEGACY_OS=true;USE_SCCACHE=false;RUN_PERFORMANCE_TESTS=false;FORCE_CI=true
0 11 * * * % RUN_PYTORCH_TESTS=true;RUN_CODEGEN_TESTS=false;USE_SCCACHE=false;RUN_PERFORMANCE_TESTS=false;BUILD_GFX101=false;BUILD_GFX103=false;BUILD_GFX11=false;BUILD_GFX12=false;BUILD_GFX90A=false;FORCE_CI=true''' : ""

pipeline {
Expand Down Expand Up @@ -1255,6 +1255,10 @@ pipeline {
name: "RUN_AITER_TESTS",
defaultValue: false,
description: "Run AITER tests with latest CK develop branch (default: OFF)")
booleanParam(
name: "RUN_FULL_CONV_TILE_TESTS",
defaultValue: false,
description: "Run AITER tests with latest CK develop branch (default: OFF)")
string(
name: 'aiter_branch',
defaultValue: 'main',
Expand Down Expand Up @@ -1423,6 +1427,35 @@ pipeline {
}
}
}
stage("Run Full Grouped Conv Tile Tests")
{
when {
beforeAgent true
expression { env.SHOULD_RUN_CI.toBoolean() }
}
parallel
{
stage("Run Full Grouped Conv Tile Tests on gfx90a")
{
when {
beforeAgent true
expression { params.RUN_FULL_CONV_TILE_TESTS.toBoolean() }
}
agent{ label rocmnode("gfx90a")}
environment{
setup_args = "NO_CK_BUILD"
execute_args = """ python3 ../experimental/builder/src/generate_instances.py --mode=profiler && \
../script/cmake-ck-dev.sh ../ gfx90a && \
make -j64 test_grouped_convnd_fwd_tile && \
./bin/test_grouped_convnd_fwd_tile"""
}
steps{
buildHipClangJobAndReboot(setup_args:setup_args, build_type: 'Release', execute_cmd: execute_args)
cleanWs()
}
}
}
}
stage("Run Grouped Conv Large Case Tests")
{
when {
Expand Down
2 changes: 2 additions & 0 deletions experimental/builder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
if(BUILD_TESTING)
add_subdirectory(test)
endif()

add_subdirectory(src)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to make the instance generation code part of the CK Builder code? This is not really builder implementation but rather an application of the builder. I think the more natural place would under /library/src/ck_tile since we probably want to share this code between CK Tile Profiler and CK Tile Tests.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternative, we could consider placing the source code under tile_engine if we'll unify the code generation approaches.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, I would like to do this in next PR to move this script under tile engine and align this to tile engine implementation

Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,26 @@ struct ConvTileFactory
using GemmPipeline = typename internal::TilePipelineType<
BLOCK_GEMM.pipeline_version>::template GemmPipeline<UniversalGemmProblem>;

using ConvEpilogue = ck_tile::CShuffleEpilogue<ck_tile::CShuffleEpilogueProblem<
typename Types::ADataType,
typename Types::BDataType,
typename Types::DsDataTypes,
typename Types::AccDataType,
typename Types::EDataType,
typename GroupedConvTraitsType::ImplicitGemmDsLayout,
typename GroupedConvTraitsType::FixedGemmParams::ELayout,
typename Ops::CDEElementwiseOp,
BLOCK.per_block.m,
BLOCK.per_block.n,
BLOCK_GEMM.warps.m,
BLOCK_GEMM.warps.n,
BLOCK_GEMM.warp_tile.m,
BLOCK_GEMM.warp_tile.n,
BLOCK_GEMM.warp_tile.k,
GroupedConvTraitsType::FixedGemmParams::TransposeC,
// TODO:: This template parameter will be moved inside the kernel
BLOCK_GEMM.num_wave_groups,
GroupedConvTraitsType::FixedGemmParams::FixedVectorSize,
SCALAR_PER_VECTOR.c>>;
using ConvEpilogue = ck_tile::CShuffleEpilogue<
ck_tile::CShuffleEpilogueProblem<typename Types::ADataType,
typename Types::BDataType,
typename Types::DsDataTypes,
typename Types::AccDataType,
typename Types::EDataType,
typename GroupedConvTraitsType::ImplicitGemmDsLayout,
typename GroupedConvTraitsType::FixedGemmParams::ELayout,
typename Ops::CDEElementwiseOp,
BLOCK.per_block.m,
BLOCK.per_block.n,
BLOCK_GEMM.warps.m,
BLOCK_GEMM.warps.n,
BLOCK_GEMM.warp_tile.m,
BLOCK_GEMM.warp_tile.n,
BLOCK_GEMM.warp_tile.k,
GroupedConvTraitsType::FixedGemmParams::TransposeC,
BLOCK_GEMM.num_wave_groups,
GroupedConvTraitsType::FixedGemmParams::FixedVectorSize,
SCALAR_PER_VECTOR.c>>;

using Instance = typename internal::GroupedConvolutionTileKernel<SIGNATURE,
GroupedConvTraitsType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "ck_tile/builder/testing/testing_reflect.hpp"
#include "ck_tile/builder/testing/filter_extent.hpp"
#include "ck_tile/builder/testing/tensor_buffer.hpp"
#include "ck_tile/host/convolution_parameter.hpp"
#include "ck_tile/builder/testing/tensor_initialization.hpp"
#include "ck_tile/builder/testing/tensor_descriptor.hpp"
#include "ck_tile/builder/testing/validation.hpp"
Expand Down Expand Up @@ -93,6 +94,8 @@ struct Args<SIGNATURE>
Ops::WeiElementwiseOp b_elementwise_op;
Ops::OutElementwiseOp cde_elementwise_op;

int k_batch = 1;

/// This function returns the `TensorDescriptor` corresponding to
/// the input-tensor of the convolution problem. This can then
/// be used to, for example, allocate memory.
Expand Down Expand Up @@ -169,6 +172,36 @@ struct Args<SIGNATURE>
to_vector(this->input_left_pad),
to_vector(this->input_right_pad));
}

/// Convert the Args structure into a CK Tile conv_param structure.
/// This function is mainly used to be able to use the existing
/// CK Tile functionality to obtain tensor descriptors.
ck_tile::conv::ConvParam to_ck_tile_conv_param() const
{
const auto to_vector = [](const auto& extent) {
if constexpr(SPATIAL_DIM == 1)
return std::vector<ck::index_t>{ck::index_t(extent.width)};
else if constexpr(SPATIAL_DIM == 2)
return std::vector<ck::index_t>{ck::index_t(extent.height),
ck::index_t(extent.width)};
else
return std::vector<ck::index_t>{ck::index_t(extent.depth),
ck::index_t(extent.height),
ck::index_t(extent.width)};
};

return ck_tile::conv::ConvParam(SPATIAL_DIM,
this->lengths.groups,
this->lengths.batch_size,
this->lengths.output_channels,
this->lengths.input_channels,
to_vector(this->lengths.filter),
to_vector(this->lengths.image),
to_vector(this->filter_strides),
to_vector(this->filter_dilation),
to_vector(this->input_left_pad),
to_vector(this->input_right_pad));
}
};

/// @brief `Inputs` specialization for forward convolution.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once

#include "ck_tile/builder/testing/conv_fwd.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/builder/factory/helpers/ck/conv_elementwise_op.hpp"
#include <type_traits>
#include <array>
Expand Down Expand Up @@ -93,10 +94,11 @@ concept CkConvInstance = detail::CkConvInstance<Conv, SIGNATURE>;
/// @see run()
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE>
void run(CkConvInstance<SIGNATURE> auto& conv,
const Args<SIGNATURE>& args,
const Inputs<SIGNATURE>& inputs,
const Outputs<SIGNATURE>& outputs)
float run(CkConvInstance<SIGNATURE> auto& conv,
const Args<SIGNATURE>& args,
const Inputs<SIGNATURE>& inputs,
const Outputs<SIGNATURE>& outputs,
const StreamConfig s_conf = {})
{
constexpr auto spatial_dim = SIGNATURE.spatial_dim;

Expand Down Expand Up @@ -147,7 +149,7 @@ void run(CkConvInstance<SIGNATURE> auto& conv,
throw std::runtime_error("invalid argument");
}

conv.MakeInvoker().Run(ck_args, {});
return conv.MakeInvoker().Run(ck_args, s_conf);
}

} // namespace ck_tile::builder::test
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT

#pragma once

#include "ck_tile/builder/testing/conv_fwd.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/builder/factory/helpers/ck/conv_elementwise_op.hpp"
#include <type_traits>
#include <array>

/// This file contains the implementation details for invoking/testing
/// grouped convolution operations in CK Tile. The main item is the
/// `run()` function, which is the main implementation used to invoke
/// CK grouped forward convolution kernels.

namespace ck_tile::builder::test {

namespace detail {

/// @brief Concept for checking whether this is the CK Tile convolution
/// implementation.
///
/// This is the same as `::ck_tile::builder::test::CkConvInstance`, except
/// with some utility aliases. For that reason, its moved to this detail
/// namespace.
template <typename Conv, auto SIGNATURE>
concept CkTileConvInstance = requires(Conv&) {
{ Conv::BlockSize() };
};

} // namespace detail

/// @brief Concept for checking whether a convolution is invoked like CK Tile.
///
/// This concept is used to tell whether a convolution implementation is
/// likely to be an "CK Tile" implementation - that is, whether we should
/// invoke it as an CK Tile kernel. This is mainly used with `run()` to
/// differentiate which implementation that should be invoked.
///
/// - SIGNATURE is the operation signature.
/// - Conv is a convolution instance created by the CK Builder API.
template <typename Conv, auto SIGNATURE>
concept CkTileConvInstance = detail::CkTileConvInstance<Conv, SIGNATURE>;

/// @brief `run()` specialization for forward convolution and CK Tile.
///
/// @tparam SIGNATURE Forward convolution signature.
/// @throws std::runtime_error if the arguments werent actually valid for the
/// operation. This should be caught and reported by the testing framework.
///
/// @see run()
template <auto SIGNATURE>
requires ValidConvSignature<SIGNATURE> && ConvDirectionIsForward<SIGNATURE>
float run(CkTileConvInstance<SIGNATURE> auto& conv,
const Args<SIGNATURE>& args,
const Inputs<SIGNATURE>& inputs,
const Outputs<SIGNATURE>& outputs,
const ck_tile::stream_config s_conf = {})
{
using Conv = std::remove_reference_t<decltype(conv)>;
const auto param = args.to_ck_tile_conv_param();

ck_tile::GroupedConvFwdHostArgs<> host_args(
param, inputs.input, inputs.weight, {}, outputs.output, args.k_batch);

auto kargs = Conv::MakeKernelArgs(host_args);

const dim3 grids = Conv::GridSize(kargs);
const dim3 blocks = Conv::BlockSize();

if(!Conv::IsSupportedArgument(kargs))
{
std::cout << "Not supported!";
return 0.f;
}

constexpr index_t minimum_occupancy =
Conv::GemmPipeline::Scheduler == ck_tile::GemmPipelineScheduler::Intrawave ? 1 : 2;

return ck_tile::launch_kernel(
s_conf, ck_tile::make_kernel<minimum_occupancy>(conv, grids, blocks, 0, kargs));
}

} // namespace ck_tile::builder::test
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ template <auto SIGNATURE>
// for now, just concern outselves with reference and see when the
// rest of the bwd/weight plumbing is there.
ConvDirectionIsForward<SIGNATURE>
void run(RefConvInstance<SIGNATURE> auto& conv,
const Args<SIGNATURE>& args,
const Inputs<SIGNATURE>& inputs,
const Outputs<SIGNATURE>& outputs)
float run(RefConvInstance<SIGNATURE> auto& conv,
const Args<SIGNATURE>& args,
const Inputs<SIGNATURE>& inputs,
const Outputs<SIGNATURE>& outputs)
{
// We don't want to compute the output dims manually, just get
// them via the existing infrastructure
Expand Down Expand Up @@ -109,6 +109,7 @@ void run(RefConvInstance<SIGNATURE> auto& conv,
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_);
return 0.f;
}

} // namespace ck_tile::builder::test
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,25 @@ struct FilterExtent<3>
size_t depth = 1;
};

template <int SPATIAL_DIM>
inline FilterExtent<SPATIAL_DIM> filter_extent_from_vector(const std::vector<std::size_t>& vec);

template <>
inline FilterExtent<1> filter_extent_from_vector<1>(const std::vector<std::size_t>& vec)
{
return FilterExtent<1>{.width = vec[0]};
}

template <>
inline FilterExtent<2> filter_extent_from_vector<2>(const std::vector<std::size_t>& vec)
{
return FilterExtent<2>{.width = vec[1], .height = vec[0]};
}

template <>
inline FilterExtent<3> filter_extent_from_vector<3>(const std::vector<std::size_t>& vec)
{
return FilterExtent<3>{.width = vec[2], .height = vec[1], .depth = vec[0]};
}

} // namespace ck_tile::builder::test
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,10 @@ struct TensorDescriptor
size_t x = 1;
for(size_t i = 0; i < RANK; ++i)
{
if(lengths[indices[i]] == 1)
{
continue;
}
if(strides[indices[i]] != x)
return false;

Expand All @@ -443,6 +447,15 @@ struct TensorDescriptor
return TensorDescriptor<DT, 1>(lengths, strides);
}

/// @brief Print tensor descriptor details.
///
/// Print tensor descriptor details - lengths and strides.
friend std::ostream& operator<<(std::ostream& os, const TensorDescriptor<DT, RANK>& tensor_desc)
{
os << tensor_desc.inner_descriptor_;
return os;
}

private:
ck_tile::HostTensorDescriptor inner_descriptor_;
};
Expand Down
Loading