Alwaysproblem
diff --git a/‎mlir/optmization/.clang-format‎
Lines changed: 2 additions & 0 deletions b/‎mlir/optmization/.clang-format‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mlir/optmization/.devcontainer/Dockerfile‎
Lines changed: 32 additions & 0 deletions b/‎mlir/optmization/.devcontainer/Dockerfile‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎mlir/optmization/.devcontainer/devcontainer.json‎
Lines changed: 75 additions & 0 deletions b/‎mlir/optmization/.devcontainer/devcontainer.json‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎mlir/optmization/.devcontainer/noop.txt‎
Lines changed: 3 additions & 0 deletions b/‎mlir/optmization/.devcontainer/noop.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎mlir/optmization/.envsetup.sh‎
Lines changed: 1 addition & 0 deletions b/‎mlir/optmization/.envsetup.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir/optmization/.pre-commit-config.yaml‎
Lines changed: 19 additions & 0 deletions b/‎mlir/optmization/.pre-commit-config.yaml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎mlir/optmization/CMakeLists.txt‎
Lines changed: 38 additions & 0 deletions b/‎mlir/optmization/CMakeLists.txt‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎mlir/optmization/README.md‎
Lines changed: 130 additions & 0 deletions b/‎mlir/optmization/README.md‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎mlir/optmization/build.sh‎
Lines changed: 26 additions & 0 deletions b/‎mlir/optmization/build.sh‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎mlir/optmization/build_with_conda.sh‎
Lines changed: 25 additions & 0 deletions b/‎mlir/optmization/build_with_conda.sh‎
Lines changed: 25 additions & 0 deletions
@@ -0,0 +1,2 @@
+BasedOnStyle: LLVM
+AlwaysBreakTemplateDeclarations: Yes
@@ -0,0 +1,32 @@
+FROM alwaysproblem/fastdev-u2204:zsh
+
+ARG UID=1000
+ARG GID=1000
+
+RUN echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy main" > /etc/apt/sources.list.d/llvm.list \
+    && echo "deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy main" >> /etc/apt/sources.list.d/llvm.list \
+    && echo "# 18" >> /etc/apt/sources.list.d/llvm.list \
+    && echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" >> /etc/apt/sources.list.d/llvm.list \
+    && echo "deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" >> /etc/apt/sources.list.d/llvm.list \
+    && echo "# 19" >> /etc/apt/sources.list.d/llvm.list \
+    && echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-19 main" >> /etc/apt/sources.list.d/llvm.list \
+    && echo "deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-19 main" >> /etc/apt/sources.list.d/llvm.list \
+    && wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc \
+    && apt update -y && \
+    apt install -y \
+    python3 python3-dev python3-setuptools python3-pip \
+    libtinfo-dev zlib1g-dev \
+    build-essential cmake ninja-build \
+    clang-19 clang-tidy-19 clangd-19 cmake-format \
+    clang-format-19 lldb-19 lld-19 libfmt-dev libspdlog-dev \
+    && update-alternatives --install /usr/bin/clang clang /usr/bin/clang-19 100 \
+    && update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-19 100 \
+    && update-alternatives --install /usr/bin/clangd clangd /usr/bin/clangd-19 100 \
+    && update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-19 100 \
+    && update-alternatives --install /usr/bin/clang-tidy clang-tidy /usr/bin/clang-tidy-19 100 \
+    && update-alternatives --install /usr/bin/lld lld /usr/bin/lld-19 100 \
+    && update-alternatives --install /usr/bin/lldb lldb /usr/bin/lldb-19 100
+
+RUN git config --global --add safe.directory '*' && \
+    /root/.local/bin/setup_new_user ${UID} ${GID} && \
+    python3 -m pip install pre-commit compdb
@@ -0,0 +1,75 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda
+{
+  "remoteUser": "root",
+  "name": "mlir-example",
+  "workspaceMount": "source=${localWorkspaceFolder},target=${localWorkspaceFolder}/../../../MLcompiler-tutorial/mlir/${localWorkspaceFolderBasename},type=bind",
+  "workspaceFolder": "/root/Desktop/dockerVolumn/MLcompiler-tutorial/mlir/${localWorkspaceFolderBasename}",
+  "build": {
+    "context": "${localWorkspaceFolder}/.devcontainer",
+    "dockerfile": "Dockerfile",
+    "options": [
+      "--net=host"
+    ],
+    "args": {
+      "UID": "1000",
+      "GID": "1000"
+    }
+  },
+  // Features to add to the dev container. More info: https://containers.dev/features.
+  // "features": {},
+  // Use 'forwardPorts' to make a list of ports inside the container available locally.
+  // "forwardPorts": [],
+  // Use 'postCreateCommand' to run commands after the container is created.
+  // "postCreateCommand": "python --version",
+  // Configure tool-specific properties.
+  // "customizations": {},
+  // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
+  // "remoteUser": "root"
+  "privileged": true,
+  // "capAdd": ["SYS_PTRACE"],
+  "mounts": [
+    {
+      "source": "${localWorkspaceFolder}/../../../",
+      "target": "/root/Desktop/dockerVolumn",
+      "type": "bind"
+    }
+  ],
+  "runArgs": [
+    // "--cap-add=SYS_PTRACE",
+    // "--security-opt",
+    // "seccomp=unconfined",
+    "--name",
+    // "${localEnv:USER}-tvm",
+    "yyx-mlir-example",
+    // "-v",
+    // "/data/rech/yongxiy/Desktop/dockerVolumn:/root/Desktop/dockerVolumn"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "jeff-hykin.better-cpp-syntax",
+        "aaron-bond.better-comments",
+        "ms-vscode.cpptools-themes",
+        "revng.llvm-ir",
+        "jakob-erzar.llvm-tablegen",
+        "MomenAbdelkarim-WyattCalandro-LuisPrieto.mlir",
+        "ms-vscode.cpptools",
+        "ms-vscode.cpptools-extension-pack",
+        "twxs.cmake",
+        "josetr.cmake-language-support-vscode",
+        "ms-vscode.cmake-tools",
+        "cheshirekow.cmake-format",
+        "yzhang.markdown-all-in-one",
+        "bierner.markdown-preview-github-styles",
+        "bierner.markdown-mermaid",
+        "DavidAnson.vscode-markdownlint",
+        "llvm-vs-code-extensions.vscode-mlir",
+        "llvm-vs-code-extensions.vscode-clangd",
+        "llvm-vs-code-extensions.lldb-dap",
+        "mutantdino.resourcemonitor",
+        "hoovercj.vscode-power-mode"
+      ]
+    }
+  }
+}
@@ -0,0 +1,3 @@
+This file copied into the container along with environment.yml* from the parent
+folder. This file is included to prevents the Dockerfile COPY instruction from
+failing if no environment.yml is found.
@@ -0,0 +1 @@
+source /root/miniconda3/etc/profile.d/conda.sh && conda activate mlir
@@ -0,0 +1,19 @@
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.3.0
+  hooks:
+    - id: check-yaml
+    - id: trailing-whitespace
+    - id: end-of-file-fixer
+
+- repo: https://github.com/pre-commit/mirrors-clang-format
+  rev: 'v14.0.6'
+  hooks:
+  - id: clang-format
+    types_or: [c++, c]
+
+- repo: https://github.com/cheshirekow/cmake-format-precommit
+  rev: v0.6.10
+  hooks:
+  - id: cmake-format
+  - id: cmake-lint
@@ -0,0 +1,38 @@
+cmake_minimum_required(VERSION 3.10)
+
+# note: fix ztd terminfo not found
+project(mlir-example LANGUAGES C CXX)
+
+# ############## For conda users.################################
+find_package(LLVM CONFIG REQUIRED)
+find_package(MLIR CONFIG REQUIRED)
+# set(MLIR_TABLEGEN_EXE /root/anaconda3/envs/mlir/bin/mlir-tblgen)
+# ##############################################################################
+
+message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
+message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
+message(STATUS "Found MLIR ${MLIR_PACKAGE_VERSION}")
+message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}")
+message(STATUS "Found MLIRTableGen: ${MLIR_TABLEGEN_EXE}")
+message(STATUS "LLVM_INCLUDE_DIR include dir: ${LLVM_INCLUDE_DIR}")
+message(STATUS "MLIR_INCLUDE_DIR include dir: ${MLIR_INCLUDE_DIR}")
+
+# This is for non-conda users. find_package(LLVM CONFIG PATHS
+# ${CMAKE_CURRENT_SOURCE_DIR}/third_party/lib/cmake/llvm) find_package(MLIR
+# CONFIG PATHS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/lib/cmake/mlir)
+# set(MLIR_TABLEGEN_EXE ${CMAKE_CURRENT_SOURCE_DIR}/third_party/bin/mlir-tblgen)
+
+include_directories(${LLVM_INCLUDE_DIR})
+include_directories(${MLIR_INCLUDE_DIR})
+
+include(LLVMDistributionSupport)
+include(TableGen)
+include(AddMLIR)
+include(AddLLVM)
+# include(HandleLLVMOptions)
+
+# note: fix the llvm::cl undefined reference problem
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fno-rtti")
+# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
+
+add_subdirectory(explore)
@@ -0,0 +1,130 @@
+# Optimization Overview
+
+This tutorial series walks through key optimization techniques in ML compilers using MLIR, ordered by pedagogical progression. Each stage builds on concepts from the previous one.
+
+## Environment Setup
+
+### Environment Preparation with conda (Optional)
+
+- OS must be higher than ubuntu 22.04.
+- install gcc-13 and g++-13
+
+```bash
+apt update -y && \
+apt install -yq gcc-13 g++-13
+# apt install -yq software-properties-common \
+# add-apt-repository -y ppa:ubuntu-toolchain-r/test \
+# apt update -y
+# apt install -yq gcc-11 g++-11
+update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 20
+update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 20
+```
+
+- install cmake and ninja you can choose one way you like. conda is best for me.
+
+```bash
+conda create -n mlir -y
+conda activate mlir
+# conda install cmake ninja clang-format clang lld ncurses mlir llvm -c conda-forge
+conda install cmake ninja clang-format clang clang-tools mlir zlib spdlog fmt lit llvm=19.* -c conda-forge -y
+# create -n mlir cmake ninja clang-format clang mlir zlib spdlog fmt lit llvm -c conda-forge -y
+```
+
+- build example with conda
+
+```bash
+cd example
+bash build_with_conda.sh all
+```
+
+### Environment Preparation with dev containers
+
+Please choose the `Dev Containers: Open Folder in Container...`
+
+- build example with dev containers
+
+```bash
+cd example
+bash scripts/sync_deps.sh
+bash scripts/build_deps.sh
+bash build.sh all
+```
+
+## Configure the Clangd
+
+```bash
+cd example
+# after you configure the project with cmake, you can configure the clangd by run the following command
+compdb -p build list > compile_commands.json
+```
+
+## Plan
+
+### Phase 1: MatMul (Foundation)
+
+**Goal:** Establish core optimization vocabulary and mechanics.
+
+| Topic                | Description                                                                                                                                             |
+| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Structured Op        | Define and lower a matmul via `linalg.generic` / named ops; understand the iteration domain, indexing maps, and payload.                                |
+| Tiling               | Apply `scf.forall` / `scf.for` tile-and-fuse to decompose the M×N×K loop nest; explore tile-size trade-offs.                                            |
+| Locality             | Demonstrate cache-friendly access via loop permutation (MKN vs MNK), packing, and micro-kernel promotion to registers.                                  |
+| Simple Cost Model    | Introduce a basic analytical model (FLOPs, memory traffic, arithmetic intensity) to guide tile-size selection.                                          |
+| Pipeline Abstraction | Compose the above into a reusable pass pipeline: tile → promote → vectorize → lower, showing how MLIR pass infrastructure orchestrates transformations. |
+
+**Deliverable:** An end-to-end optimized matmul that is competitive with a naive BLAS call, with clear before/after IR at every stage.
+
+---
+
+### Phase 2: Conv2D + Activation Fusion (Spatial & Fusion)
+
+**Goal:** Extend tiling to spatial dimensions and introduce operator fusion.
+
+| Topic          | Description                                                                                                                                                |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Fusion         | Fuse an elementwise activation (ReLU, GELU) into the convolution producer-consumer pair; understand producer-consumer analysis and the legality of fusion. |
+| Spatial Tiling | Tile output height and width dimensions; manage the resulting input tile expansion due to the kernel window (halo).                                        |
+| Layout         | Explore NHWC vs NCHW (and packed variants like NCHWc); understand how data layout affects vectorization and memory access patterns.                        |
+| Halo / Reuse   | Handle overlapping input regions across tiles; compute the halo size from kernel size, stride, and dilation; demonstrate data reuse.                       |
+
+**Deliverable:** A fused conv2d + activation kernel with explicit spatial tiling, demonstrating measurable speedup from fusion and layout selection.
+
+---
+
+### Phase 3: LayerNorm / Softmax (Reduction Scheduling)
+
+**Goal:** Tackle reduction-heavy operations where numerical stability and scheduling are tightly coupled.
+
+| Topic                      | Description                                                                                                                                    |
+| -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
+| Reduction Scheduling       | Implement multi-pass (mean → variance → normalize) vs single-pass (Welford) reduction strategies; tile reductions across threads.              |
+| Scratch Buffer             | Allocate and manage intermediate buffers (`memref.alloca` / workspace) for partial results; understand buffer lifetime and placement.          |
+| Numerics–Schedule Coupling | Show how the softmax "max-subtract" trick and log-sum-exp rewriting are not just numerical choices but directly constrain the legal schedules. |
+
+**Deliverable:** A numerically stable, tiled LayerNorm/Softmax implementation with clear discussion of how algorithmic rewrites enable (or block) specific schedules.
+
+---
+
+### Phase 4: Subgraph Fusion & Memory Planning (Graph Level)
+
+**Goal:** Move from single-op to multi-op / graph-level optimization.
+
+| Topic                    | Description                                                                                                                                                      |
+| ------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Graph Scheduling         | Decide fusion groups and execution order across a small subgraph (e.g., matmul → bias → layernorm); model the trade-off between parallelism and memory pressure. |
+| Peak Memory Optimization | Apply operator reordering, in-place updates, and buffer sharing (liveness analysis) to minimize peak memory; visualize the memory waterline before/after.        |
+
+**Deliverable:** A small end-to-end subgraph whose peak memory and kernel count are jointly optimized, with tooling to visualize the memory timeline.
+
+---
+
+### Suggested Timeline
+
+| Week | Phase                         | Key Milestone                                |
+| ---- | ----------------------------- | -------------------------------------------- |
+| 1–3  | Phase 1 – MatMul              | Tiled + vectorized matmul with pass pipeline |
+| 4–5  | Phase 2 – Conv2D + Activation | Fused conv2d-relu with spatial tiling        |
+| 6–7  | Phase 3 – LayerNorm / Softmax | Numerically stable tiled reduction           |
+| 8–9  | Phase 4 – Subgraph Fusion     | Graph-level fusion with memory planning      |
+| 10   | Wrap-up                       | Benchmarking, profiling, and write-up        |
+
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+_target=${1:-'all'}
+
+rm -rf build
+mkdir build
+
+_workspaceFolder=$(pwd)
+
+cd build
+
+# For non-conda users:
+cmake .. -Wno-dev -G Ninja \
+  -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE \
+  -DCMAKE_BUILD_TYPE:STRING=Debug \
+  -DCMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc \
+  -DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/g++ \
+  -DMLIR_DIR=${_workspaceFolder}/third_party/llvm/lib/cmake/mlir \
+  -DLLVM_DIR=${_workspaceFolder}/third_party/llvm/lib/cmake/llvm \
+  -DCMAKE_MODULE_PATH="${_workspaceFolder}/third_party/llvm/lib/cmake/mlir;${_workspaceFolder}/third_party/llvm/lib/cmake/llvm" \
+  -DMLIR_TABLEGEN_EXE=${_workspaceFolder}/third_party/llvm/bin/mlir-tblgen
+
+# ninja
+cmake \
+  --build ${_workspaceFolder}/build \
+  --config Debug --target ${_target}
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+_target=${1:-'all'}
+
+rm -rf build
+mkdir build
+
+_workspaceFolder=$(pwd)
+
+cd build
+
+
+cmake ..  -G Ninja --no-warn-unused-cli \
+  -Wno-dev \
+  -DCMAKE_MODULE_PATH="/root/miniconda3/envs/mlir/lib/cmake/mlir;/root/miniconda3/envs/mlir/lib/cmake/llvm" \
+  -DMLIR_TABLEGEN_EXE:FILEPATH=/root/miniconda3/envs/mlir/bin/mlir-tblgen \
+  -DCMAKE_EXPORT_COMPILE_COMMANDS:BOOL=TRUE \
+  -DCMAKE_BUILD_TYPE:STRING=Debug \
+  -DCMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc \
+  -DCMAKE_CXX_COMPILER:FILEPATH=/usr/bin/g++
+
+# ninja
+cmake \
+  --build ${_workspaceFolder}/build \
+  --config Debug --target ${_target}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+BasedOnStyle: LLVM`
	`2`	`+AlwaysBreakTemplateDeclarations: Yes`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+This file copied into the container along with environment.yml* from the parent`
	`2`	`+folder. This file is included to prevents the Dockerfile COPY instruction from`
	`3`	`+failing if no environment.yml is found.`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+source /root/miniconda3/etc/profile.d/conda.sh && conda activate mlir`