PSAL-POSTECH · YWHyuk · Dec 5, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/.github/workflows/docker-tutorial-image.yml b/.github/workflows/docker-tutorial-image.yml
@@ -0,0 +1,35 @@
+name: Docker image for tutorial
+
+on:
+  push:
+    branches: [ "tutorial" ]
+
+jobs:
+  build:
+    runs-on: self-hosted
+
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      # Step 1: Checkout the repository
+      - name: Checkout Code
+        uses: actions/checkout@v4
+
+      # Step 2: Log in to GitHub Container Registry
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      # Step 3: Build and Push Docker Image
+      - name: Build and Push Docker Image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          file: ./Dockerfile.ksc2025
+          push: true
+          tags: ghcr.io/psal-postech/torchsim_ksc2025:latest
diff --git a/.github/workflows/pytorchsim_test.yml b/.github/workflows/pytorchsim_test.yml
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,9 @@
 __pycache__/
 TOGSim/build/
 .vscode
+*.txt
+*.ipynb_checkpoints
+output
+togsim_results/*
+outputs/*
+experiments/artifact/logs/*
diff --git a/PyTorchSimFrontend/extension_codecache.py b/PyTorchSimFrontend/extension_codecache.py
@@ -15,7 +15,7 @@ def hash_prefix(hash_value):
     return hash_value[1:12]
 
 def get_write_path(src_code):
-    return os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(get_hash(src_code.strip())))
+    return os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(get_hash(src_code.strip())))
 
 def dump_metadata(args, arg_attributes, path):
     meta_path = os.path.join(path, "meta.txt")
@@ -27,19 +27,6 @@ def dump_metadata(args, arg_attributes, path):
             file.write(f'{arg_name}=({arg_attribute[0]}, {arg.dtype}, {arg.shape})\n')
     return
 
-def llvm_compile_command(input, output):
-    opt_output = f"{input[:-3]}_opt.ll"
-    return [re.sub(r"[ \n]+", " ",
-        f"""
-            {extension_config.CONFIG_TORCHSIM_LLVM_PATH}/opt --load-pass-plugin={extension_config.CONFIG_TORCHSIM_CUSTOM_PASS_PATH}/libLowerGemminiPass.so -S -march=riscv64 --passes=LowerGemminiPass {input} -o {opt_output}
-        """,
-    ).strip(),
-            re.sub(r"[ \n]+", " ",
-        f"""
-            {extension_config.CONFIG_TORCHSIM_LLVM_PATH}/llc -march=riscv64 -mattr=+m,+f,+d,+a,+c,+v -O2 {opt_output} -o {output}
-        """,
-    ).strip()]
-
 def mlir_compile_command(filename, vectorlane_size, vlen=256):
     return [re.sub(r"[ \n]+", " ",
         f"""
@@ -165,7 +152,7 @@ def load(cls, source_code,
         else:
             link_option = ""
         # Generate LLVM kernel calller and binary for validation
-        if extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE:
+        if extension_config.pytorchsim_functional_mode:
             # Use custom malloc to avoid size error
             new_link_option = link_option + " -Wl,--wrap=malloc -Wl,--wrap=free"
             cmds = mlir_compile_command(new_input_path, vectorlane_size, vlen=vlen)
@@ -182,7 +169,7 @@ def load(cls, source_code,
                     print("Error output:", e.output)
                     assert(0)
 
-                val_llvm_caller = MLIRKernelCallerCodeGen(extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE, arg_attributes)
+                val_llvm_caller = MLIRKernelCallerCodeGen(extension_config.pytorchsim_functional_mode, arg_attributes)
                 val_llvm_caller.generate_wrapper_file(write_path, validation_wrapper_name)
                 val_llvm_caller.compile_wih_kernel(write_path, key, validation_wrapper_name,
                                                    validation_binary_name, new_link_option)
@@ -213,7 +200,7 @@ def load(cls, source_code,
                 print("Error output:", e.output)
                 assert(0)
 
-            if not extension_config.CONFIG_TORCHSIM_TIMING_MODE:
+            if not extension_config.pytorchsim_timing_mode:
                 return key
 
             # Generate MLIR kernel calller and binary for cycle calculation
@@ -280,26 +267,26 @@ def dummy_simulator(*args, **kwargs):
             lock = FileLock(os.path.join(lock_dir, key + ".lock"), timeout=LOCK_TIMEOUT)
             with lock:
                 # Run simulator pass
-                result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(key))
+                result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(key))
                 # Dump arguments and meta data
                 dump_metadata(args, arg_attributes, result_path)
                 runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path)
-                if not autotune and (extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE or validate):
+                if not autotune and (extension_config.pytorchsim_functional_mode or validate):
                     funcsim = FunctionalSimulator(result_path, key)
                     funcsim.run_spike(args, arg_attributes,
                                     runtime_path, self.validation_binary_name,
                                     vectorlane_size=vectorlane_size, spad_info=spad_info,
-                                    cleanup=extension_config.CONFIG_CLEANUP_DUMP_ARGS, silent_mode=silent_mode)
-                if not extension_config.CONFIG_TORCHSIM_TIMING_MODE:
+                                    silent_mode=silent_mode)
+                if not extension_config.pytorchsim_timing_mode:
                     return
 
                 onnx_path = os.path.join(result_path, "tile_graph.onnx")
                 attribute_path = os.path.join(runtime_path, "attribute")
                 togsim_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "TOGSim")
-                backsim = TOGSimulator(togsim_path, extension_config.CONFIG_TOGSIM_CONFIG)
-                backsim.vectorlane_size = vectorlane_size
-                attribute_path = backsim.create_attribute_file(attribute_path, args, loop_size=loop_size)
-                result_path = backsim.simulation(onnx_path, attribute_path, silent_mode=silent_mode)
+                TOGSim = TOGSimulator(togsim_path, extension_config.CONFIG_TOGSIM_CONFIG)
+                TOGSim.vectorlane_size = vectorlane_size
+                attribute_path = TOGSim.create_attribute_file(attribute_path, args, loop_size=loop_size)
+                result_path = TOGSim.simulation(onnx_path, attribute_path, silent_mode=silent_mode)
                 result = TOGSimulator.get_result_from_file(result_path)
                 return result
 
@@ -310,23 +297,20 @@ def dryrun_simulator(*args, **kwargs):
             lock = FileLock(os.path.join(lock_dir, key + ".lock"), timeout=LOCK_TIMEOUT)
             with lock:
                 # Run simulator pass
-                result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(key))
+                result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(key))
                 # Dump arguments and meta data
                 dump_metadata(args, arg_attributes, result_path)
                 runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path)
-                if not extension_config.CONFIG_TORCHSIM_TIMING_MODE:
-                    return
 
                 # Todo. Support valude dependent mode for graph mode
-                if False: # extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE:
+                if False: # extension_config.pytorchsim_functional_mode:
                     funcsim = FunctionalSimulator(result_path, key)
                     funcsim.run_spike(args, arg_attributes,
                                     runtime_path, self.validation_binary_name,
-                                    vectorlane_size=vectorlane_size, spad_info=spad_info,
-                                    cleanup=extension_config.CONFIG_CLEANUP_DUMP_ARGS)
+                                    vectorlane_size=vectorlane_size, spad_info=spad_info)
             return result_path, runtime_path, None
 
-        is_dryrun = int(os.environ.get('TOGSIM_DRYRUN', default=False)) and not autotune
+        is_dryrun = int(os.environ.get('TOGSIM_EAGER_MODE', default=False)) and not autotune
         target_simulator = dryrun_simulator if is_dryrun else dummy_simulator
         target_simulator.arg_attributes = arg_attributes
         target_simulator.future = future

diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py
@@ -1,126 +1,89 @@
 import os
 import sys
-import tempfile
 import importlib
+import json
+
+CONFIG_TORCHSIM_DIR = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')
+CONFIG_GEM5_PATH = os.environ.get('GEM5_PATH', default="/workspace/gem5/build/RISCV/gem5.opt")
+CONFIG_TORCHSIM_LLVM_PATH = os.environ.get('TORCHSIM_LLVM_PATH', default="/usr/bin")
+
+CONFIG_TORCHSIM_DUMP_MLIR_IR = int(os.environ.get("TORCHSIM_DUMP_MLIR_IR", default=False))
+CONFIG_TORCHSIM_DUMP_LLVM_IR = int(os.environ.get("TORCHSIM_DUMP_LLVM_IR", default=False))
 
 def __getattr__(name):
+    # TOGSim config
+    config_path = os.environ.get('TOGSIM_CONFIG',
+                default=f"{CONFIG_TORCHSIM_DIR}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json")
+    if name == "CONFIG_TOGSIM_CONFIG":
+        return config_path
+    config_json = json.load(open(config_path, 'r'))
 
     # Hardware info config
-    if name == "CONFIG_VECTOR_LANE":
-        return int(os.environ.get("TORCHSIM_VECTOR_LANE", default=128))
-    if name == "CONFIG_VECTOR_LANE_STRIDE":
-        return int(os.environ.get("TORCHSIM_VECTOR_LANE_STRIDE", default=2))
+    if name == "vpu_num_lanes":
+        return config_json["vpu_num_lanes"]
     if name == "CONFIG_SPAD_INFO":
         return {
           "spad_vaddr" : 0xD0000000,
           "spad_paddr" : 0x2000000000,
-          "spad_size" : int(os.environ.get("TORCHSIM_SPAD_SIZE", default=128)) << 10 # Note: spad size per lane
+          "spad_size" : config_json["vpu_spad_size_kb_per_lane"] << 10 # Note: spad size per lane
         }
+
     if name == "CONFIG_PRECISION":
-          return 4 # 32bit
+        return 4 # 32bit
     if name == "CONFIG_NUM_CORES":
-          return 1
-    if name == "CONFIG_VLEN":
-          return 256 # 256bits / 32bits = 8 [elements]
-
-    # Tile size config
-    if name == "CONFIG_TORCHSIM_DIR":
-          return os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')
-
-    if name == "CONFIG_TORCHSIM_DUMP_PATH":
-          return os.environ.get('TORCHSIM_DUMP_PATH', default = f"{tempfile.gettempdir()}/torchinductor")
-    if name == "CONFIG_TORCHSIM_DUMP_FILE":
-          return int(os.environ.get('TORCHSIM_DUMP_FILE', default=True))
-    if name == "CONFIG_TORCHSIM_FUNCTIONAL_MODE":
-          return int(os.environ.get('TORCHSIM_FUNCTIONAL_MODE', default=True))
-    if name == "CONFIG_TORCHSIM_TIMING_MODE":
-          return int(os.environ.get("TORCHSIM_TIMING_MODE", True))
-    if name == "CONFIG_CLEANUP_DUMP_ARGS":
-          return int(os.environ.get('CLEANUP_DUMP_ARGS', default=False))
-
-    # LLVM PATH
-    if name == "CONFIG_TORCHSIM_LLVM_PATH":
-        return os.environ.get('TORCHSIM_LLVM_PATH', default="/usr/bin")
-    if name == "CONFIG_TORCHSIM_CUSTOM_PASS_PATH":
-        return os.environ.get('TORCHSIM_CUSTOM_PASS_PATH',
-                                              default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/GemminiLowerPass/build")
-    if name == "CONFIG_TORCHSIM_DUMP_MLIR_IR":
-        return int(os.environ.get("TORCHSIM_DUMP_MLIR_IR", default=False))
-    if name == "CONFIG_TORCHSIM_DUMP_LLVM_IR":
-        return int(os.environ.get("TORCHSIM_DUMP_LLVM_IR", default=False))
-
-    # TOGSim config
-    if name == "CONFIG_TOGSIM_CONFIG":
-        return os.environ.get('TORCHSIM_CONFIG',
-                default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json")
-    if name == "CONFIG_TOGSIM_EAGER_MODE":
-        return int(os.environ.get("TOGSIM_EAGER_MODE", default=False))
-    if name == "CONFIG_TOGSIM_DRYRUN":
-        return int(os.environ.get('TOGSIM_DRYRUN', default=False))
-    if name == "CONFIG_TOGSIM_DEBUG_LEVEL":
-        return os.environ.get("TOGSIM_DEBUG_LEVEL", "")
+        return config_json["num_cores"]
+    if name == "vpu_vector_length_bits":
+        return config_json["vpu_vector_length_bits"]
+
+    if name == "pytorchsim_functional_mode":
+        return config_json['pytorchsim_functional_mode']
+    if name == "pytorchsim_timing_mode":
+        return config_json['pytorchsim_timing_mode']
+
+    # Mapping strategy
+    if name == "codegen_mapping_strategy":
+        codegen_mapping_strategy = config_json["codegen_mapping_strategy"]
+        assert(codegen_mapping_strategy in ["heuristic", "autotune", "external-then-heuristic", "external-then-autotune"]), "Invalid mapping strategy!"
+        return codegen_mapping_strategy
+
+    if name == "codegen_external_mapping_file":
+        return config_json["codegen_external_mapping_file"]
+
+    # Autotune config
+    if name == "codegen_autotune_max_retry":
+        return config_json["codegen_autotune_max_retry"]
+    if name == "codegen_autotune_template_topk":
+        return config_json["codegen_autotune_template_topk"]
 
-    # GEM5 config
-    if name == "CONFIG_GEM5_PATH":
-        return os.environ.get('GEM5_PATH', default="/workspace/gem5/build/RISCV/gem5.opt")
-    if name == "CONFIG_GEM5_SCRIPT_PATH":
-        return os.environ.get('GEM5_SCRIPT_PATH',
-                                      default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/gem5_script/script_systolic.py")
-
-    # AUTOTUNE config
-    if name == "CONFIG_AUTOTUNE":
-        return int(os.environ.get('AUTOTUNE', default=False))
-    if name == "CONFIG_AUTOTUNE_TEMPLATE":
-        return int(os.environ.get('AUTOTUNE_TEMPLATE', default=False))
-    if name == "CONFIG_MAX_AUTOTUNE_TRY":
-        return int(os.environ.get('MAX_AUTOTUNE_TRY', default=10))
-    if name == "CONFIG_AUTOTUNE_TEMPLATE_TOPK":
-        return int(os.environ.get('AUTOTUNE_TEMPLATE_TOPK', default=4))
-
-    # For block sparse
-    if name == "CONFIG_BLOCK_SPARSE":
-        return int(os.environ.get('BLOCK_SPARSE', default=0))
-
-    # For GEMM tile size
-    if name == "CONFIG_MANUAL_TILE_SIZE":
-        return int(os.environ.get('TORCHSIM_MANUAL_TILE_SIZE', default=False))
-    if name == "CONFIG_TILE_M":
-        return int(os.getenv("TORCHSIM_TILE_M", __getattr__("CONFIG_VECTOR_LANE")))
-    if name == "CONFIG_TILE_N":
-        return int(os.getenv("TORCHSIM_TILE_N", __getattr__("CONFIG_VECTOR_LANE")))
-    if name == "CONFIG_TILE_K":
-        return int(os.getenv("TORCHSIM_TILE_K", __getattr__("CONFIG_VECTOR_LANE")))
-
-    if name == "CONFIG_SUBTILE":
-        return int(os.environ.get('TORCHSIM_SUBTILE', default=True))
-    if name == "CONFIG_MANUAL_SUBTILE_SIZE":
-        return int(os.environ.get('TORCHSIM_MANUAL_SUBTILE_SIZE', default=False))
-    if name == "CONFIG_SUBTILE_M":
-        return int(os.environ.get('TORCHSIM_SUBTILE_M', default=__getattr__("CONFIG_VECTOR_LANE")))
-    if name == "CONFIG_SUBTILE_N":
-        return int(os.environ.get('TORCHSIM_SUBTILE_N', default=__getattr__("CONFIG_VECTOR_LANE")))
-    if name == "CONFIG_SUBTILE_K":
-        return int(os.environ.get('TORCHSIM_SUBTILE_K', default=__getattr__("CONFIG_VECTOR_LANE")))
-
-    if name == "CONFIG_GEMM_CHEATSHEET_PATH":
-          return os.environ.get('TORCHSIM_GEMM_CHEATSHEET_PATH',
-                          default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/validation/gemm_tpuv3_cheatsheet.json")
     # Compiler Optimization
-    if name == "CONFIG_COMPILER_OPTIMIZATION":
-          return os.environ.get('TORCHSIM_COMPILER_OPTIMIZATION', default="all")  # options: all, none, custom
+    if name == "codegen_compiler_optimization":
+        return config_json["codegen_compiler_optimization"]
+
     # Advanced fusion options
     if name == "CONFIG_FUSION":
-          return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "fusion" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
+        return True if (__getattr__("codegen_compiler_optimization") == "all" or "fusion" in __getattr__("codegen_compiler_optimization")) else False
     if name == "CONFIG_FUSION_REDUCTION_EPILOGUE":
-          return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_epliogue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
+        return True if (__getattr__("codegen_compiler_optimization") == "all" or "reduction_epliogue" in __getattr__("codegen_compiler_optimization")) else False
     if name == "CONFIG_FUSION_REDUCTION_REDUCTION":
-          return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_reduction" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
+        return True if (__getattr__("codegen_compiler_optimization") == "all" or "reduction_reduction" in __getattr__("codegen_compiler_optimization")) else False
     if name == "CONFIG_FUSION_PROLOGUE":
-          return True if ((__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all") or ("prologue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION"))) else False
+        return True if ((__getattr__("codegen_compiler_optimization") == "all") or ("prologue" in __getattr__("codegen_compiler_optimization"))) else False
     if name == "CONFIG_SINGLE_BATCH_CONV":
-          return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "single_batch_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
+        return True if (__getattr__("codegen_compiler_optimization") == "all" or "single_batch_conv" in __getattr__("codegen_compiler_optimization")) else False
     if name == "CONFIG_MULTI_TILE_CONV":
-          return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "multi_tile_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
+        return True if (__getattr__("codegen_compiler_optimization") == "all" or "multi_tile_conv" in __getattr__("codegen_compiler_optimization")) else False
+    if name == "CONFIG_SUBTILE":
+        return True if (__getattr__("codegen_compiler_optimization") == "all" or "subtile" in __getattr__("codegen_compiler_optimization")) else False
+
+    if name == "CONFIG_TOGSIM_DEBUG_LEVEL":
+        return os.environ.get("TOGSIM_DEBUG_LEVEL", "")
+    if name == "CONFIG_TORCHSIM_DUMP_PATH":
+        return os.environ.get('TORCHSIM_DUMP_PATH', default = CONFIG_TORCHSIM_DIR)
+    if name == "CONFIG_TORCHSIM_LOG_PATH":
+        return os.environ.get('TORCHSIM_DUMP_LOG_PATH', default = os.path.join(CONFIG_TORCHSIM_DIR, "togsim_results"))
+
+    if name == "CONFIG_TOGSIM_EAGER_MODE":
+        return int(os.environ.get("TOGSIM_EAGER_MODE", default=False))
 
 # SRAM Buffer allocation plan
 def load_plan_from_module(module_path):

diff --git a/PyTorchSimFrontend/extension_op.py b/PyTorchSimFrontend/extension_op.py
@@ -46,7 +46,7 @@
 
 class MLIRExternKernelChoice(ExternKernelChoice):
     def call_name(self):
-        is_dryrun = int(os.environ.get('TOGSIM_DRYRUN', default=False))
+        is_dryrun = int(os.environ.get('TOGSIM_EAGER_MODE', default=False))
         if is_dryrun:
             return f"yield from sparse_mm_dummy_stonne_outer"
         return f"torch.ops.extension_op.{self.name}"
@@ -276,9 +276,9 @@ def sparse_mm_stonne_outer(a, b, out):
     onnx_path, attribute_path, c_result_path = prepare_outer_product_matrix(a, b, out)
 
     togsim_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "TOGSim")
-    stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/TOGSim/configs/stonne_single_c1_simple_noc.json'
-    backsim = TOGSimulator(togsim_path, stonne_config_path)
-    result_path = backsim.simulation(onnx_path)
+    stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/configs/stonne_single_c1_simple_noc.json'
+    TOGSim = TOGSimulator(togsim_path, stonne_config_path)
+    result_path = TOGSim.simulation(onnx_path)
     TOGSimulator.get_result_from_file(result_path)
 
     # Load result data