Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/docker-tutorial-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Docker image for tutorial

on:
push:
branches: [ "tutorial" ]

jobs:
build:
runs-on: self-hosted

permissions:
contents: read
packages: write

steps:
# Step 1: Checkout the repository
- name: Checkout Code
uses: actions/checkout@v4

# Step 2: Log in to GitHub Container Registry
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# Step 3: Build and Push Docker Image
- name: Build and Push Docker Image
uses: docker/build-push-action@v4
with:
context: .
file: ./Dockerfile.ksc2025
push: true
tags: ghcr.io/psal-postech/torchsim_ksc2025:latest
144 changes: 72 additions & 72 deletions .github/workflows/pytorchsim_test.yml

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
__pycache__/
TOGSim/build/
.vscode
*.txt
*.ipynb_checkpoints
output
togsim_results/*
outputs/*
experiments/artifact/logs/*
48 changes: 16 additions & 32 deletions PyTorchSimFrontend/extension_codecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def hash_prefix(hash_value):
return hash_value[1:12]

def get_write_path(src_code):
return os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(get_hash(src_code.strip())))
return os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(get_hash(src_code.strip())))

def dump_metadata(args, arg_attributes, path):
meta_path = os.path.join(path, "meta.txt")
Expand All @@ -27,19 +27,6 @@ def dump_metadata(args, arg_attributes, path):
file.write(f'{arg_name}=({arg_attribute[0]}, {arg.dtype}, {arg.shape})\n')
return

def llvm_compile_command(input, output):
opt_output = f"{input[:-3]}_opt.ll"
return [re.sub(r"[ \n]+", " ",
f"""
{extension_config.CONFIG_TORCHSIM_LLVM_PATH}/opt --load-pass-plugin={extension_config.CONFIG_TORCHSIM_CUSTOM_PASS_PATH}/libLowerGemminiPass.so -S -march=riscv64 --passes=LowerGemminiPass {input} -o {opt_output}
""",
).strip(),
re.sub(r"[ \n]+", " ",
f"""
{extension_config.CONFIG_TORCHSIM_LLVM_PATH}/llc -march=riscv64 -mattr=+m,+f,+d,+a,+c,+v -O2 {opt_output} -o {output}
""",
).strip()]

def mlir_compile_command(filename, vectorlane_size, vlen=256):
return [re.sub(r"[ \n]+", " ",
f"""
Expand Down Expand Up @@ -165,7 +152,7 @@ def load(cls, source_code,
else:
link_option = ""
# Generate LLVM kernel calller and binary for validation
if extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE:
if extension_config.pytorchsim_functional_mode:
# Use custom malloc to avoid size error
new_link_option = link_option + " -Wl,--wrap=malloc -Wl,--wrap=free"
cmds = mlir_compile_command(new_input_path, vectorlane_size, vlen=vlen)
Expand All @@ -182,7 +169,7 @@ def load(cls, source_code,
print("Error output:", e.output)
assert(0)

val_llvm_caller = MLIRKernelCallerCodeGen(extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE, arg_attributes)
val_llvm_caller = MLIRKernelCallerCodeGen(extension_config.pytorchsim_functional_mode, arg_attributes)
val_llvm_caller.generate_wrapper_file(write_path, validation_wrapper_name)
val_llvm_caller.compile_wih_kernel(write_path, key, validation_wrapper_name,
validation_binary_name, new_link_option)
Expand Down Expand Up @@ -213,7 +200,7 @@ def load(cls, source_code,
print("Error output:", e.output)
assert(0)

if not extension_config.CONFIG_TORCHSIM_TIMING_MODE:
if not extension_config.pytorchsim_timing_mode:
return key

# Generate MLIR kernel calller and binary for cycle calculation
Expand Down Expand Up @@ -280,26 +267,26 @@ def dummy_simulator(*args, **kwargs):
lock = FileLock(os.path.join(lock_dir, key + ".lock"), timeout=LOCK_TIMEOUT)
with lock:
# Run simulator pass
result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(key))
result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(key))
# Dump arguments and meta data
dump_metadata(args, arg_attributes, result_path)
runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path)
if not autotune and (extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE or validate):
if not autotune and (extension_config.pytorchsim_functional_mode or validate):
funcsim = FunctionalSimulator(result_path, key)
funcsim.run_spike(args, arg_attributes,
runtime_path, self.validation_binary_name,
vectorlane_size=vectorlane_size, spad_info=spad_info,
cleanup=extension_config.CONFIG_CLEANUP_DUMP_ARGS, silent_mode=silent_mode)
if not extension_config.CONFIG_TORCHSIM_TIMING_MODE:
silent_mode=silent_mode)
if not extension_config.pytorchsim_timing_mode:
return

onnx_path = os.path.join(result_path, "tile_graph.onnx")
attribute_path = os.path.join(runtime_path, "attribute")
togsim_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "TOGSim")
backsim = TOGSimulator(togsim_path, extension_config.CONFIG_TOGSIM_CONFIG)
backsim.vectorlane_size = vectorlane_size
attribute_path = backsim.create_attribute_file(attribute_path, args, loop_size=loop_size)
result_path = backsim.simulation(onnx_path, attribute_path, silent_mode=silent_mode)
TOGSim = TOGSimulator(togsim_path, extension_config.CONFIG_TOGSIM_CONFIG)
TOGSim.vectorlane_size = vectorlane_size
attribute_path = TOGSim.create_attribute_file(attribute_path, args, loop_size=loop_size)
result_path = TOGSim.simulation(onnx_path, attribute_path, silent_mode=silent_mode)
result = TOGSimulator.get_result_from_file(result_path)
return result

Expand All @@ -310,23 +297,20 @@ def dryrun_simulator(*args, **kwargs):
lock = FileLock(os.path.join(lock_dir, key + ".lock"), timeout=LOCK_TIMEOUT)
with lock:
# Run simulator pass
result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(key))
result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(key))
# Dump arguments and meta data
dump_metadata(args, arg_attributes, result_path)
runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path)
if not extension_config.CONFIG_TORCHSIM_TIMING_MODE:
return

# Todo. Support valude dependent mode for graph mode
if False: # extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE:
if False: # extension_config.pytorchsim_functional_mode:
funcsim = FunctionalSimulator(result_path, key)
funcsim.run_spike(args, arg_attributes,
runtime_path, self.validation_binary_name,
vectorlane_size=vectorlane_size, spad_info=spad_info,
cleanup=extension_config.CONFIG_CLEANUP_DUMP_ARGS)
vectorlane_size=vectorlane_size, spad_info=spad_info)
return result_path, runtime_path, None

is_dryrun = int(os.environ.get('TOGSIM_DRYRUN', default=False)) and not autotune
is_dryrun = int(os.environ.get('TOGSIM_EAGER_MODE', default=False)) and not autotune
target_simulator = dryrun_simulator if is_dryrun else dummy_simulator
target_simulator.arg_attributes = arg_attributes
target_simulator.future = future
Expand Down
163 changes: 63 additions & 100 deletions PyTorchSimFrontend/extension_config.py
Original file line number Diff line number Diff line change
@@ -1,126 +1,89 @@
import os
import sys
import tempfile
import importlib
import json

CONFIG_TORCHSIM_DIR = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')
CONFIG_GEM5_PATH = os.environ.get('GEM5_PATH', default="/workspace/gem5/build/RISCV/gem5.opt")
CONFIG_TORCHSIM_LLVM_PATH = os.environ.get('TORCHSIM_LLVM_PATH', default="/usr/bin")

CONFIG_TORCHSIM_DUMP_MLIR_IR = int(os.environ.get("TORCHSIM_DUMP_MLIR_IR", default=False))
CONFIG_TORCHSIM_DUMP_LLVM_IR = int(os.environ.get("TORCHSIM_DUMP_LLVM_IR", default=False))

def __getattr__(name):
# TOGSim config
config_path = os.environ.get('TOGSIM_CONFIG',
default=f"{CONFIG_TORCHSIM_DIR}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json")
if name == "CONFIG_TOGSIM_CONFIG":
return config_path
config_json = json.load(open(config_path, 'r'))

# Hardware info config
if name == "CONFIG_VECTOR_LANE":
return int(os.environ.get("TORCHSIM_VECTOR_LANE", default=128))
if name == "CONFIG_VECTOR_LANE_STRIDE":
return int(os.environ.get("TORCHSIM_VECTOR_LANE_STRIDE", default=2))
if name == "vpu_num_lanes":
return config_json["vpu_num_lanes"]
if name == "CONFIG_SPAD_INFO":
return {
"spad_vaddr" : 0xD0000000,
"spad_paddr" : 0x2000000000,
"spad_size" : int(os.environ.get("TORCHSIM_SPAD_SIZE", default=128)) << 10 # Note: spad size per lane
"spad_size" : config_json["vpu_spad_size_kb_per_lane"] << 10 # Note: spad size per lane
}

if name == "CONFIG_PRECISION":
return 4 # 32bit
return 4 # 32bit
if name == "CONFIG_NUM_CORES":
return 1
if name == "CONFIG_VLEN":
return 256 # 256bits / 32bits = 8 [elements]

# Tile size config
if name == "CONFIG_TORCHSIM_DIR":
return os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')

if name == "CONFIG_TORCHSIM_DUMP_PATH":
return os.environ.get('TORCHSIM_DUMP_PATH', default = f"{tempfile.gettempdir()}/torchinductor")
if name == "CONFIG_TORCHSIM_DUMP_FILE":
return int(os.environ.get('TORCHSIM_DUMP_FILE', default=True))
if name == "CONFIG_TORCHSIM_FUNCTIONAL_MODE":
return int(os.environ.get('TORCHSIM_FUNCTIONAL_MODE', default=True))
if name == "CONFIG_TORCHSIM_TIMING_MODE":
return int(os.environ.get("TORCHSIM_TIMING_MODE", True))
if name == "CONFIG_CLEANUP_DUMP_ARGS":
return int(os.environ.get('CLEANUP_DUMP_ARGS', default=False))

# LLVM PATH
if name == "CONFIG_TORCHSIM_LLVM_PATH":
return os.environ.get('TORCHSIM_LLVM_PATH', default="/usr/bin")
if name == "CONFIG_TORCHSIM_CUSTOM_PASS_PATH":
return os.environ.get('TORCHSIM_CUSTOM_PASS_PATH',
default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/GemminiLowerPass/build")
if name == "CONFIG_TORCHSIM_DUMP_MLIR_IR":
return int(os.environ.get("TORCHSIM_DUMP_MLIR_IR", default=False))
if name == "CONFIG_TORCHSIM_DUMP_LLVM_IR":
return int(os.environ.get("TORCHSIM_DUMP_LLVM_IR", default=False))

# TOGSim config
if name == "CONFIG_TOGSIM_CONFIG":
return os.environ.get('TORCHSIM_CONFIG',
default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json")
if name == "CONFIG_TOGSIM_EAGER_MODE":
return int(os.environ.get("TOGSIM_EAGER_MODE", default=False))
if name == "CONFIG_TOGSIM_DRYRUN":
return int(os.environ.get('TOGSIM_DRYRUN', default=False))
if name == "CONFIG_TOGSIM_DEBUG_LEVEL":
return os.environ.get("TOGSIM_DEBUG_LEVEL", "")
return config_json["num_cores"]
if name == "vpu_vector_length_bits":
return config_json["vpu_vector_length_bits"]

if name == "pytorchsim_functional_mode":
return config_json['pytorchsim_functional_mode']
if name == "pytorchsim_timing_mode":
return config_json['pytorchsim_timing_mode']

# Mapping strategy
if name == "codegen_mapping_strategy":
codegen_mapping_strategy = config_json["codegen_mapping_strategy"]
assert(codegen_mapping_strategy in ["heuristic", "autotune", "external-then-heuristic", "external-then-autotune"]), "Invalid mapping strategy!"
return codegen_mapping_strategy

if name == "codegen_external_mapping_file":
return config_json["codegen_external_mapping_file"]

# Autotune config
if name == "codegen_autotune_max_retry":
return config_json["codegen_autotune_max_retry"]
if name == "codegen_autotune_template_topk":
return config_json["codegen_autotune_template_topk"]

# GEM5 config
if name == "CONFIG_GEM5_PATH":
return os.environ.get('GEM5_PATH', default="/workspace/gem5/build/RISCV/gem5.opt")
if name == "CONFIG_GEM5_SCRIPT_PATH":
return os.environ.get('GEM5_SCRIPT_PATH',
default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/gem5_script/script_systolic.py")

# AUTOTUNE config
if name == "CONFIG_AUTOTUNE":
return int(os.environ.get('AUTOTUNE', default=False))
if name == "CONFIG_AUTOTUNE_TEMPLATE":
return int(os.environ.get('AUTOTUNE_TEMPLATE', default=False))
if name == "CONFIG_MAX_AUTOTUNE_TRY":
return int(os.environ.get('MAX_AUTOTUNE_TRY', default=10))
if name == "CONFIG_AUTOTUNE_TEMPLATE_TOPK":
return int(os.environ.get('AUTOTUNE_TEMPLATE_TOPK', default=4))

# For block sparse
if name == "CONFIG_BLOCK_SPARSE":
return int(os.environ.get('BLOCK_SPARSE', default=0))

# For GEMM tile size
if name == "CONFIG_MANUAL_TILE_SIZE":
return int(os.environ.get('TORCHSIM_MANUAL_TILE_SIZE', default=False))
if name == "CONFIG_TILE_M":
return int(os.getenv("TORCHSIM_TILE_M", __getattr__("CONFIG_VECTOR_LANE")))
if name == "CONFIG_TILE_N":
return int(os.getenv("TORCHSIM_TILE_N", __getattr__("CONFIG_VECTOR_LANE")))
if name == "CONFIG_TILE_K":
return int(os.getenv("TORCHSIM_TILE_K", __getattr__("CONFIG_VECTOR_LANE")))

if name == "CONFIG_SUBTILE":
return int(os.environ.get('TORCHSIM_SUBTILE', default=True))
if name == "CONFIG_MANUAL_SUBTILE_SIZE":
return int(os.environ.get('TORCHSIM_MANUAL_SUBTILE_SIZE', default=False))
if name == "CONFIG_SUBTILE_M":
return int(os.environ.get('TORCHSIM_SUBTILE_M', default=__getattr__("CONFIG_VECTOR_LANE")))
if name == "CONFIG_SUBTILE_N":
return int(os.environ.get('TORCHSIM_SUBTILE_N', default=__getattr__("CONFIG_VECTOR_LANE")))
if name == "CONFIG_SUBTILE_K":
return int(os.environ.get('TORCHSIM_SUBTILE_K', default=__getattr__("CONFIG_VECTOR_LANE")))

if name == "CONFIG_GEMM_CHEATSHEET_PATH":
return os.environ.get('TORCHSIM_GEMM_CHEATSHEET_PATH',
default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/validation/gemm_tpuv3_cheatsheet.json")
# Compiler Optimization
if name == "CONFIG_COMPILER_OPTIMIZATION":
return os.environ.get('TORCHSIM_COMPILER_OPTIMIZATION', default="all") # options: all, none, custom
if name == "codegen_compiler_optimization":
return config_json["codegen_compiler_optimization"]

# Advanced fusion options
if name == "CONFIG_FUSION":
return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "fusion" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
return True if (__getattr__("codegen_compiler_optimization") == "all" or "fusion" in __getattr__("codegen_compiler_optimization")) else False
if name == "CONFIG_FUSION_REDUCTION_EPILOGUE":
return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_epliogue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
return True if (__getattr__("codegen_compiler_optimization") == "all" or "reduction_epliogue" in __getattr__("codegen_compiler_optimization")) else False
if name == "CONFIG_FUSION_REDUCTION_REDUCTION":
return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_reduction" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
return True if (__getattr__("codegen_compiler_optimization") == "all" or "reduction_reduction" in __getattr__("codegen_compiler_optimization")) else False
if name == "CONFIG_FUSION_PROLOGUE":
return True if ((__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all") or ("prologue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION"))) else False
return True if ((__getattr__("codegen_compiler_optimization") == "all") or ("prologue" in __getattr__("codegen_compiler_optimization"))) else False
if name == "CONFIG_SINGLE_BATCH_CONV":
return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "single_batch_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
return True if (__getattr__("codegen_compiler_optimization") == "all" or "single_batch_conv" in __getattr__("codegen_compiler_optimization")) else False
if name == "CONFIG_MULTI_TILE_CONV":
return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "multi_tile_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False
return True if (__getattr__("codegen_compiler_optimization") == "all" or "multi_tile_conv" in __getattr__("codegen_compiler_optimization")) else False
if name == "CONFIG_SUBTILE":
return True if (__getattr__("codegen_compiler_optimization") == "all" or "subtile" in __getattr__("codegen_compiler_optimization")) else False

if name == "CONFIG_TOGSIM_DEBUG_LEVEL":
return os.environ.get("TOGSIM_DEBUG_LEVEL", "")
if name == "CONFIG_TORCHSIM_DUMP_PATH":
return os.environ.get('TORCHSIM_DUMP_PATH', default = CONFIG_TORCHSIM_DIR)
if name == "CONFIG_TORCHSIM_LOG_PATH":
return os.environ.get('TORCHSIM_DUMP_LOG_PATH', default = os.path.join(CONFIG_TORCHSIM_DIR, "togsim_results"))

if name == "CONFIG_TOGSIM_EAGER_MODE":
return int(os.environ.get("TOGSIM_EAGER_MODE", default=False))

# SRAM Buffer allocation plan
def load_plan_from_module(module_path):
Expand Down
8 changes: 4 additions & 4 deletions PyTorchSimFrontend/extension_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@

class MLIRExternKernelChoice(ExternKernelChoice):
def call_name(self):
is_dryrun = int(os.environ.get('TOGSIM_DRYRUN', default=False))
is_dryrun = int(os.environ.get('TOGSIM_EAGER_MODE', default=False))
if is_dryrun:
return f"yield from sparse_mm_dummy_stonne_outer"
return f"torch.ops.extension_op.{self.name}"
Expand Down Expand Up @@ -276,9 +276,9 @@ def sparse_mm_stonne_outer(a, b, out):
onnx_path, attribute_path, c_result_path = prepare_outer_product_matrix(a, b, out)

togsim_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "TOGSim")
stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/TOGSim/configs/stonne_single_c1_simple_noc.json'
backsim = TOGSimulator(togsim_path, stonne_config_path)
result_path = backsim.simulation(onnx_path)
stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/configs/stonne_single_c1_simple_noc.json'
TOGSim = TOGSimulator(togsim_path, stonne_config_path)
result_path = TOGSim.simulation(onnx_path)
TOGSimulator.get_result_from_file(result_path)

# Load result data
Expand Down
Loading
Loading