Skip to content

Commit 60b8083

Browse files
committed
Added the EntryOp for cuda tile IR
1 parent fd70514 commit 60b8083

File tree

7 files changed

+169
-7
lines changed

7 files changed

+169
-7
lines changed

mlir/cuda-tile/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,17 @@ message(STATUS "MLIR_INCLUDE_DIR include dir: ${MLIR_INCLUDE_DIR}")
2020
# This is for non-conda users.
2121
find_package(LLVM CONFIG PATHS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/lib/cmake/llvm)
2222
find_package(MLIR CONFIG PATHS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/lib/cmake/mlir)
23-
23+
find_package(CUDAToolkit REQUIRED)
2424
# set(MLIR_TABLEGEN_EXE ${CMAKE_CURRENT_SOURCE_DIR}/third_party/bin/mlir-tblgen)
25+
message(STATUS "CUDA Toolkit found: ${CUDAToolkit_INCLUDE_DIRS}")
26+
message(STATUS "CUDA_TILE_SOURCE_DIR include dir: ${CUDA_TILE_SOURCE_DIR}")
27+
message(STATUS "CUDA_TILE_BINARY_DIR include dir: ${CUDA_TILE_BINARY_DIR}")
2528

2629
include_directories(${LLVM_INCLUDE_DIR})
2730
include_directories(${MLIR_INCLUDE_DIR})
31+
include_directories(${CUDAToolkit_INCLUDE_DIRS})
32+
include_directories(${CUDA_TILE_SOURCE_DIR}/include)
33+
include_directories(${CUDA_TILE_BINARY_DIR}/include)
2834

2935
include(LLVMDistributionSupport)
3036
include(TableGen)

mlir/cuda-tile/Toy/CMakeLists.txt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
find_package(CUDAToolkit REQUIRED)
2-
3-
message(STATUS "CUDA Toolkit found: ${CUDAToolkit_INCLUDE_DIRS}")
4-
51
# For a better template to copy, see examples/standalone
62
include_directories(include)
73
add_subdirectory(include)
@@ -30,6 +26,7 @@ add_executable(
3026
mlir/ShapeInferencePass.cpp
3127
mlir/ToyCombine.cpp
3228
mlir/LowerToGpu.cpp
29+
mlir/LowerToCudaTile.cpp
3330
)
3431

3532
add_dependencies(toy-cuda
@@ -62,5 +59,6 @@ target_link_libraries(toy-cuda
6259
MLIRSupport
6360
MLIRTargetLLVMIRExport
6461
MLIRTransforms
62+
CudaTileDialect
6563
cuda_shim
6664
)

mlir/cuda-tile/Toy/include/toy/Passes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ std::unique_ptr<mlir::Pass> createLowerToLLVMPass();
3232

3333
std::unique_ptr<mlir::Pass> createGpuOutlinePass(std::string grid="1,1,1");
3434

35+
std::unique_ptr<mlir::Pass> createCudaTileLoweringPass();
36+
3537
} // namespace toy
3638
} // namespace mlir
3739

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#include "cuda_tile/Dialect/CudaTile/IR/Types.h"
2+
#include "mlir/IR/Attributes.h"
3+
#include "mlir/IR/Block.h"
4+
#include "mlir/IR/Builders.h"
5+
#include "mlir/IR/BuiltinOps.h"
6+
#include "mlir/IR/BuiltinTypes.h"
7+
#include "mlir/IR/DialectRegistry.h"
8+
#include "mlir/IR/IRMapping.h"
9+
#include "mlir/IR/Operation.h"
10+
#include "mlir/IR/SymbolTable.h"
11+
#include "mlir/IR/Types.h"
12+
#include "mlir/IR/Value.h"
13+
#include "mlir/Pass/Pass.h"
14+
#include "mlir/Support/LLVM.h"
15+
#include "mlir/Support/TypeID.h"
16+
#include "toy/Dialect.h"
17+
#include "toy/Passes.h"
18+
#include "llvm/ADT/STLExtras.h"
19+
#include "llvm/ADT/SmallPtrSet.h"
20+
#include "llvm/ADT/SmallSet.h"
21+
#include "llvm/ADT/SmallVector.h"
22+
#include "llvm/ADT/StringExtras.h"
23+
#include "llvm/ADT/StringRef.h"
24+
#include "llvm/Support/Casting.h"
25+
#include "llvm/Support/DebugLog.h"
26+
27+
#include "cuda_tile/Dialect/CudaTile/IR/Dialect.h"
28+
#include "cuda_tile/Dialect/CudaTile/IR/Ops.h"
29+
30+
#include <memory>
31+
#include <string>
32+
33+
#define DEBUG_TYPE "toy-to-cuda-tile"
34+
35+
//===----------------------------------------------------------------------===//
36+
// ToyToCudaTileLoweringPass
37+
//===----------------------------------------------------------------------===//
38+
39+
namespace {
40+
struct ToyToCudaTileLoweringPass
41+
: public mlir::PassWrapper<ToyToCudaTileLoweringPass,
42+
mlir::OperationPass<mlir::ModuleOp>> {
43+
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ToyToCudaTileLoweringPass)
44+
45+
llvm::StringRef getArgument() const override { return "toy-to-cuda-tile"; }
46+
47+
void getDependentDialects(mlir::DialectRegistry &registry) const override {
48+
registry.insert<mlir::cuda_tile::CudaTileDialect>();
49+
}
50+
51+
void runOnOperation() final;
52+
};
53+
}; // namespace
54+
55+
mlir::cuda_tile::ModuleOp createCudaModuleOp(mlir::OpBuilder &builder,
56+
mlir::ModuleOp &moduleOp) {
57+
mlir::OpBuilder::InsertionGuard guard(builder);
58+
59+
builder.setInsertionPoint(moduleOp.getBody(), moduleOp.getBody()->end());
60+
auto cudaTileModuleOp = mlir::cuda_tile::ModuleOp::create(
61+
builder, moduleOp.getLoc(), "cuda_tile_module");
62+
63+
LDBG() << "Created CudaTile Module: \n" << cudaTileModuleOp << "\n";
64+
return cudaTileModuleOp;
65+
}
66+
67+
void ToyToCudaTileLoweringPass::runOnOperation() {
68+
auto moduleOp = getOperation();
69+
70+
// Here we would implement the actual lowering logic from Toy GPUFuncOp
71+
// to CudaTile operations. For now, we just log that the pass is running.
72+
// LDBG() << "Running Toy to CudaTile lowering on GPUFuncOp: " << moduleOp
73+
// << "\n";
74+
75+
mlir::OpBuilder builder(moduleOp.getContext());
76+
// 1. Create new cuda_tile.module Op in the last section.
77+
auto cudaTileModuleOp = createCudaModuleOp(builder, moduleOp);
78+
// mlir::SymbolTable cudaTileSymbolTable(cudaTileModuleOp);
79+
80+
moduleOp->walk([&](mlir::toy::GPUFuncOp gfunOp) {
81+
mlir::OpBuilder::InsertionGuard guard(builder);
82+
// setInsertionPointToEnd expects a Block*, so take the address of the
83+
// single block inside the cuda_tile.module region.
84+
builder.setInsertionPointToEnd(&cudaTileModuleOp.getBodyRegion().front());
85+
auto gfunc_name =
86+
gfunOp->getAttrOfType<mlir::StringAttr>("sym_name").getValue();
87+
llvm::SmallVector<mlir::Type, 8> newArgTypes;
88+
89+
LDBG() << "Lowering GPU function: " << gfunc_name << "\n";
90+
LDBG() << "Converting input type into cuda tile type" << "\n";
91+
92+
for (mlir::Type t : gfunOp.getFunctionType().getInputs()) {
93+
LDBG() << "Original arg type: " << t << "\n";
94+
auto tt = llvm::dyn_cast<mlir::TensorType>(t);
95+
auto elemType = tt.getElementType();
96+
auto ptrElem = mlir::cuda_tile::PointerType::get(elemType);
97+
auto newType = mlir::cuda_tile::TileType::get({}, ptrElem);
98+
LDBG() << "The new arg type for cuda tile: " << newType << "\n";
99+
newArgTypes.push_back(newType);
100+
}
101+
102+
LDBG() << "Converting result type into cuda tile type" << "\n";
103+
for (mlir::Type t : gfunOp.getFunctionType().getResults()) {
104+
LDBG() << "Original result type: " << t << "\n";
105+
auto tt = llvm::dyn_cast<mlir::TensorType>(t);
106+
auto elemType = tt.getElementType();
107+
auto ptrElem = mlir::cuda_tile::PointerType::get(elemType);
108+
auto newType = mlir::cuda_tile::TileType::get({}, ptrElem);
109+
LDBG() << "The new arg type for cuda tile: " << newType << "\n";
110+
newArgTypes.push_back(newType);
111+
}
112+
113+
auto newFnType = builder.getFunctionType(newArgTypes, {});
114+
auto fname = builder.getStringAttr(gfunc_name);
115+
auto argTypes = builder.getTypeArrayAttr(newArgTypes);
116+
auto cudaEntryOp = mlir::cuda_tile::EntryOp::create(
117+
builder, gfunOp.getLoc(), fname, newFnType,
118+
/*arg_attrs=*/{}, /*res_attrs=*/{}, {});
119+
auto bb = cudaEntryOp.addEntryBlock();
120+
builder.setInsertionPointToStart(bb);
121+
auto retOp = mlir::cuda_tile::ReturnOp::create(builder, gfunOp.getLoc());
122+
123+
LDBG() << "Created CudaTile Entry Op: \n" << cudaEntryOp << "\n";
124+
});
125+
}
126+
127+
namespace mlir::toy {
128+
129+
std::unique_ptr<mlir::Pass> createCudaTileLoweringPass() {
130+
return std::make_unique<ToyToCudaTileLoweringPass>();
131+
};
132+
133+
}; // namespace mlir::toy

mlir/cuda-tile/Toy/toyc.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,8 @@ static int loadAndProcessMLIRGPU(mlir::MLIRContext &context,
333333

334334
// Now process the toy mlir with gpu outline pass.
335335
optPM.addPass(mlir::toy::createGpuOutlinePass(assignGrid));
336-
// pm.addPass(mlir::toy::createCudaTileLoweringPass());
336+
// mlir::OpPassManager &gpuOptPM = pm.nest<mlir::toy::GPUFuncOp>();
337+
pm.addPass(mlir::toy::createCudaTileLoweringPass());
337338
// pm.addPass(mlir::toy::createLowerGpuHostToLLVMPass());
338339
// bool isLoweringToAffine = emitAction >= Action::DumpGpuAffine;
339340
// if (isLoweringToAffine) {
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
module {
2+
toy.func @main() {
3+
%0 = toy.constant dense<[[1.000000e+00, 2.000000e+00, 3.000000e+00], [4.000000e+00, 5.000000e+00, 6.000000e+00]]> : tensor<2x3xf32>
4+
%1 = toy.constant dense<[[1.100000e+01, 1.200000e+01, 1.300000e+01], [1.400000e+01, 1.500000e+01, 1.600000e+01]]> : tensor<2x3xf32>
5+
%2 = toy.launch_gpu @outlined_gpu_kernel_0(%1, %0) {grid = array<i64: 4, 2, 1>} : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x2xf32>
6+
toy.print %2 : tensor<2x2xf32>
7+
%3 = toy.constant dense<[[7.000000e+00, 8.000000e+00, 9.000000e+00], [1.000000e+01, 1.100000e+01, 1.200000e+01]]> : tensor<2x3xf32>
8+
%4 = toy.launch_gpu @outlined_gpu_kernel_1(%0, %3, %1) {grid = array<i64: 4, 2, 1>} : (tensor<2x3xf32>, tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
9+
toy.print %4 : tensor<2x3xf32>
10+
toy.return
11+
}
12+
toy.gpu_func @outlined_gpu_kernel_0(%arg0: tensor<2x3xf32>, %arg1: tensor<2x3xf32>) -> tensor<2x2xf32> {
13+
%0 = toy.transpose(%arg0 : tensor<2x3xf32>) to tensor<3x2xf32>
14+
%1 = toy.matmul(%arg1 : tensor<2x3xf32>, %0 : tensor<3x2xf32>) to tensor<2x2xf32>
15+
toy.return %1 : tensor<2x2xf32>
16+
}
17+
toy.gpu_func @outlined_gpu_kernel_1(%arg0: tensor<2x3xf32>, %arg1: tensor<2x3xf32>, %arg2: tensor<2x3xf32>) -> tensor<2x3xf32> {
18+
%0 = toy.mul %arg0, %arg1 : tensor<2x3xf32>
19+
%1 = toy.add %0, %arg2 : tensor<2x3xf32>
20+
toy.return %1 : tensor<2x3xf32>
21+
}
22+
}

mlir/cuda-tile/scripts/build_cuda_tile.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ cmake -G Ninja -S ${WORKSPACEROOT}/third_party/cuda-tile -B build \
1717
-DLLVM_ENABLE_ASSERTIONS=OFF \
1818
-DCUDA_TILE_ENABLE_BINDINGS_PYTHON=OFF \
1919
-DCUDA_TILE_ENABLE_TESTING=OFF \
20-
-DCUDA_TILE_INSTALL_DIR=${WORKSPACEROOT}/third_party/cuda \
20+
-DCMAKE_INSTALL_PREFIX=${WORKSPACEROOT}/third_party/cuda \
2121
-DCUDA_TILE_USE_LLVM_INSTALL_DIR=${WORKSPACEROOT}/third_party/llvm
2222

2323
cmake --build build

0 commit comments

Comments
 (0)