Change float type to FP32

Alwaysproblem · Alwaysproblem · commit 8a0ba30e81b0 · 2025-12-31T04:47:45.000Z
diff --git a/mlir/cuda-tile/Toy/include/toy/Ops.td b/mlir/cuda-tile/Toy/include/toy/Ops.td
@@ -20,6 +20,8 @@ include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "toy/ShapeInferenceInterface.td"
 
+def F32ElementsAttr : FloatElementsAttr<32>;
+
 // Provide a definition of the 'toy' dialect in the ODS framework so that we
 // can define our operations.
 def Toy_Dialect : Dialect {
@@ -57,15 +59,15 @@ def ConstantOp : Toy_Op<"constant", [Pure]> {
 
     ```mlir
       %0 = toy.constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]>
-                        : tensor<2x3xf64>
+                        : tensor<2x3xf32>
     ```
   }];
 
   // The constant operation takes an attribute as the only input.
-  let arguments = (ins F64ElementsAttr:$value);
+  let arguments = (ins F32ElementsAttr:$value);
 
   // The constant operation returns a single value of TensorType.
-  let results = (outs F64Tensor);
+  let results = (outs F32Tensor);
 
   // Indicate that the operation has a custom parser and printer method.
   let hasCustomAssemblyFormat = 1;
@@ -80,7 +82,7 @@ def ConstantOp : Toy_Op<"constant", [Pure]> {
     }]>,
 
     // Build a constant with a given constant floating-point value.
-    OpBuilder<(ins "double":$value)>
+    OpBuilder<(ins "float":$value)>
   ];
 
   // Indicate that additional verification for this operation is necessary.
@@ -99,8 +101,8 @@ def AddOp : Toy_Op<"add",
     The shapes of the tensor operands are expected to match.
   }];
 
-  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
-  let results = (outs F64Tensor);
+  let arguments = (ins F32Tensor:$lhs, F32Tensor:$rhs);
+  let results = (outs F32Tensor);
 
   // Indicate that the operation has a custom parser and printer method.
   let hasCustomAssemblyFormat = 1;
@@ -130,8 +132,8 @@ def CastOp : Toy_Op<"cast", [
     mismatching constant dimension.
   }];
 
-  let arguments = (ins F64Tensor:$input);
-  let results = (outs F64Tensor:$output);
+  let arguments = (ins F32Tensor:$input);
+  let results = (outs F32Tensor:$output);
 
   let assemblyFormat = "$input attr-dict `:` type($input) `to` type($output)";
 }
@@ -152,9 +154,9 @@ def FuncOp : Toy_Op<"func", [
 
     ```mlir
     toy.func @main() {
-      %0 = toy.constant dense<5.500000e+00> : tensor<f64>
-      %1 = toy.reshape(%0 : tensor<f64>) to tensor<2x2xf64>
-      toy.print %1 : tensor<2x2xf64>
+      %0 = toy.constant dense<5.500000e+00> : tensor<f32>
+      %1 = toy.reshape(%0 : tensor<f32>) to tensor<2x2xf32>
+      toy.print %1 : tensor<2x2xf32>
       toy.return
     }
     ```
@@ -205,7 +207,7 @@ def GenericCallOp : Toy_Op<"generic_call",
 
     ```mlir
      %4 = toy.generic_call @my_func(%1, %3)
-           : (tensor<2x3xf64>, tensor<2x3xf64>) -> tensor<*xf64>
+           : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<*xf32>
     ```
 
     This is only valid if a function named "my_func" exists and takes two
@@ -216,13 +218,13 @@ def GenericCallOp : Toy_Op<"generic_call",
   // callee, and inputs for the call.
   let arguments = (ins
     FlatSymbolRefAttr:$callee,
-    Variadic<F64Tensor>:$inputs,
+    Variadic<F32Tensor>:$inputs,
     OptionalAttr<DictArrayAttr>:$arg_attrs,
     OptionalAttr<DictArrayAttr>:$res_attrs
   );
 
   // The generic call operation returns a single value of TensorType.
-  let results = (outs F64Tensor);
+  let results = (outs F32Tensor);
 
   // Specialize assembly printing and parsing using a declarative format.
   let assemblyFormat = [{
@@ -247,8 +249,8 @@ def MulOp : Toy_Op<"mul",
     tensors. The shapes of the tensor operands are expected to match.
   }];
 
-  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
-  let results = (outs F64Tensor);
+  let arguments = (ins F32Tensor:$lhs, F32Tensor:$rhs);
+  let results = (outs F32Tensor);
 
   // Indicate that the operation has a custom parser and printer method.
   let hasCustomAssemblyFormat = 1;
@@ -271,8 +273,8 @@ def PrintOp : Toy_Op<"print"> {
   }];
 
   // The print operation takes an input tensor to print.
-  // We also allow a F64MemRef to enable interop during partial lowering.
-  let arguments = (ins AnyTypeOf<[F64Tensor, F64MemRef]>:$input);
+  // We also allow a F32MemRef to enable interop during partial lowering.
+  let arguments = (ins AnyTypeOf<[F32Tensor, F32MemRef]>:$input);
 
   let assemblyFormat = "$input attr-dict `:` type($input)";
 }
@@ -288,11 +290,11 @@ def ReshapeOp : Toy_Op<"reshape", [Pure]> {
     the same number of elements but different shapes. For example:
 
     ```mlir
-       %0 = toy.reshape (%arg1 : tensor<10xf64>) to tensor<5x2xf64>
+       %0 = toy.reshape (%arg1 : tensor<10xf32>) to tensor<5x2xf32>
     ```
   }];
 
-  let arguments = (ins F64Tensor:$input);
+  let arguments = (ins F32Tensor:$input);
 
   let assemblyFormat = [{
     `(` $input `:` type($input) `)` attr-dict `to` type(results)
@@ -302,7 +304,7 @@ def ReshapeOp : Toy_Op<"reshape", [Pure]> {
   let hasCanonicalizer = 1;
 
   // We expect that the reshape operation returns a statically shaped tensor.
-  let results = (outs StaticShapeTensorOf<[F64]>);
+  let results = (outs StaticShapeTensorOf<[F32]>);
 }
 
 //===----------------------------------------------------------------------===//
@@ -319,16 +321,16 @@ def ReturnOp : Toy_Op<"return", [Pure, HasParent<"FuncOp">,
     the operation. For example:
 
     ```mlir
-      toy.func @foo() -> tensor<2xf64> {
+      toy.func @foo() -> tensor<2xf32> {
         ...
-        toy.return %0 : tensor<2xf64>
+        toy.return %0 : tensor<2xf32>
       }
     ```
   }];
 
   // The return operation takes an optional input operand to return. This
   // value must match the return type of the enclosing function.
-  let arguments = (ins Variadic<F64Tensor>:$input);
+  let arguments = (ins Variadic<F32Tensor>:$input);
 
   // The return operation only emits the input in the format if it is present.
   let assemblyFormat = "($input^ `:` type($input))? attr-dict ";
@@ -355,8 +357,8 @@ def TransposeOp : Toy_Op<"transpose",
     [Pure, DeclareOpInterfaceMethods<ShapeInferenceOpInterface>]> {
   let summary = "transpose operation";
 
-  let arguments = (ins F64Tensor:$input);
-  let results = (outs F64Tensor);
+  let arguments = (ins F32Tensor:$input);
+  let results = (outs F32Tensor);
 
   let assemblyFormat = [{
     `(` $input `:` type($input) `)` attr-dict `to` type(results)
@@ -386,8 +388,8 @@ def MatMulOp : Toy_Op<"matmul",
     tensors. The shapes of the tensor operands are expected to match.
   }];
 
-  let arguments = (ins F64Tensor:$lhs, F64Tensor:$rhs);
-  let results = (outs Res<F64Tensor, "",
+  let arguments = (ins F32Tensor:$lhs, F32Tensor:$rhs);
+  let results = (outs Res<F32Tensor, "",
                           [MemWrite<DefaultResource>,
                            MemAlloc<DefaultResource>]>:$output);
 
diff --git a/mlir/cuda-tile/Toy/mlir/Dialect.cpp b/mlir/cuda-tile/Toy/mlir/Dialect.cpp
@@ -169,8 +169,8 @@ static void printBinaryOp(mlir::OpAsmPrinter &printer, mlir::Operation *op) {
 /// The builder is passed as an argument, so is the state that this method is
 /// expected to fill in order to build the operation.
 void ConstantOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
-                       double value) {
-  auto dataType = RankedTensorType::get({}, builder.getF64Type());
+                       float value) {
+  auto dataType = RankedTensorType::get({}, builder.getF32Type());
   auto dataAttribute = DenseElementsAttr::get(dataType, value);
   ConstantOp::build(builder, state, dataType, dataAttribute);
 }
@@ -238,7 +238,7 @@ llvm::LogicalResult ConstantOp::verify() {
 
 void AddOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
                   mlir::Value lhs, mlir::Value rhs) {
-  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addTypes(UnrankedTensorType::get(builder.getF32Type()));
   state.addOperands({lhs, rhs});
 }
 
@@ -319,7 +319,7 @@ void FuncOp::print(mlir::OpAsmPrinter &p) {
 void GenericCallOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
                           StringRef callee, ArrayRef<mlir::Value> arguments) {
   // Generic call always returns an unranked Tensor initially.
-  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addTypes(UnrankedTensorType::get(builder.getF32Type()));
   state.addOperands(arguments);
   state.addAttribute("callee",
                      mlir::SymbolRefAttr::get(builder.getContext(), callee));
@@ -353,7 +353,7 @@ MutableOperandRange GenericCallOp::getArgOperandsMutable() {
 
 void MulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
                   mlir::Value lhs, mlir::Value rhs) {
-  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addTypes(UnrankedTensorType::get(builder.getF32Type()));
   state.addOperands({lhs, rhs});
 }
 
@@ -412,7 +412,7 @@ llvm::LogicalResult ReturnOp::verify() {
 
 void TransposeOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
                         mlir::Value value) {
-  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addTypes(UnrankedTensorType::get(builder.getF32Type()));
   state.addOperands(value);
 }
 
@@ -443,7 +443,7 @@ llvm::LogicalResult TransposeOp::verify() {
 
 void MatMulOp::build(mlir::OpBuilder &builder, mlir::OperationState &state,
                      mlir::Value lhs, mlir::Value rhs) {
-  state.addTypes(UnrankedTensorType::get(builder.getF64Type()));
+  state.addTypes(UnrankedTensorType::get(builder.getF32Type()));
   state.addOperands({lhs, rhs});
 }
 
diff --git a/mlir/cuda-tile/Toy/mlir/LowerToLLVM.cpp b/mlir/cuda-tile/Toy/mlir/LowerToLLVM.cpp
@@ -109,8 +109,18 @@ class PrintOpLowering : public OpConversionPattern<toy::PrintOp> {
     // Generate a call to printf for the current element of the loop.
     auto elementLoad =
         memref::LoadOp::create(rewriter, loc, op.getInput(), loopIvs);
+
+    // Varargs promotion: float -> double
+    Value arg = elementLoad;
+    Type t = elementLoad.getType();
+    if (t.isF32()) {
+      arg = arith::ExtFOp::create(rewriter, loc, rewriter.getF64Type(), arg);
+    } else if (!t.isF64()) {
+      return rewriter.notifyMatchFailure(op, "toy.print only supports f32/f64");
+    }
+
     LLVM::CallOp::create(rewriter, loc, getPrintfType(context), printfRef,
-                         ArrayRef<Value>({formatSpecifierCst, elementLoad}));
+                         ArrayRef<Value>({formatSpecifierCst, arg}));
 
     // Notify the rewriter that this operation has been removed.
     rewriter.eraseOp(op);
diff --git a/mlir/cuda-tile/Toy/mlir/MLIRGen.cpp b/mlir/cuda-tile/Toy/mlir/MLIRGen.cpp
@@ -258,22 +258,22 @@ class MLIRGenImpl {
   /// Example, the source level statement:
   ///   var a<2, 3> = [[1, 2, 3], [4, 5, 6]];
   /// will be converted to:
-  ///   %0 = "toy.constant"() {value: dense<tensor<2x3xf64>,
+  ///   %0 = "toy.constant"() {value: dense<tensor<2x3xf32>,
   ///     [[1.000000e+00, 2.000000e+00, 3.000000e+00],
-  ///      [4.000000e+00, 5.000000e+00, 6.000000e+00]]>} : () -> tensor<2x3xf64>
+  ///      [4.000000e+00, 5.000000e+00, 6.000000e+00]]>} : () -> tensor<2x3xf32>
   ///
   mlir::Value mlirGen(LiteralExprAST &lit) {
     auto type = getType(lit.getDims());
 
     // The attribute is a vector with a floating point value per element
     // (number) in the array, see `collectData()` below for more details.
-    std::vector<double> data;
+    std::vector<float> data;
     data.reserve(llvm::product_of(lit.getDims()));
     collectData(lit, data);
 
     // The type of this attribute is tensor of 64-bit floating-point with the
     // shape of the literal.
-    mlir::Type elementType = builder.getF64Type();
+    mlir::Type elementType = builder.getF32Type();
     auto dataType = mlir::RankedTensorType::get(lit.getDims(), elementType);
 
     // This is the actual attribute that holds the list of values for this
@@ -292,9 +292,9 @@ class MLIRGenImpl {
   ///  [[1, 2], [3, 4]]
   /// we will generate:
   ///  [ 1, 2, 3, 4 ]
-  /// Individual numbers are represented as doubles.
+  /// Individual numbers are represented as floats.
   /// Attributes are the way MLIR attaches constant to operations.
-  void collectData(ExprAST &expr, std::vector<double> &data) {
+  void collectData(ExprAST &expr, std::vector<float> &data) {
     if (auto *lit = dyn_cast<LiteralExprAST>(&expr)) {
       for (auto &value : lit->getValues())
         collectData(*value, data);
@@ -444,10 +444,10 @@ class MLIRGenImpl {
   mlir::Type getType(ArrayRef<int64_t> shape) {
     // If the shape is empty, then this type is unranked.
     if (shape.empty())
-      return mlir::UnrankedTensorType::get(builder.getF64Type());
+      return mlir::UnrankedTensorType::get(builder.getF32Type());
 
     // Otherwise, we use the given shape.
-    return mlir::RankedTensorType::get(shape, builder.getF64Type());
+    return mlir::RankedTensorType::get(shape, builder.getF32Type());
   }
 
   /// Build an MLIR type from a Toy AST variable type (forward to the generic