Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
sys.path.insert(0, str(d))

from simpler.task_interface import ( # noqa: E402 # type: ignore[import-not-found]
CallConfig, # pyright: ignore[reportAttributeAccessIssue]
ChipCallable, # pyright: ignore[reportAttributeAccessIssue]
ChipCallConfig, # pyright: ignore[reportAttributeAccessIssue]
ChipStorageTaskArgs, # pyright: ignore[reportAttributeAccessIssue]
ChipWorker, # pyright: ignore[reportAttributeAccessIssue]
CoreCallable, # pyright: ignore[reportAttributeAccessIssue]
Expand Down Expand Up @@ -458,7 +458,7 @@ def run_single_task(
golden_mod.compute_golden(golden_with_inputs, params)

# Run on device
config = CallConfig()
config = ChipCallConfig()
config.block_dim = runtime_config.get("block_dim", 24)
config.aicpu_thread_num = runtime_config.get("aicpu_thread_num", 3)

Expand Down
23 changes: 20 additions & 3 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,19 +118,36 @@ destroy_device_context(ctx);
### Layer 3: Python API (`python/bindings/task_interface.cpp` via nanobind)

```python
from simpler.task_interface import ChipWorker, ChipCallable, ChipStorageTaskArgs, CallConfig
from simpler.task_interface import ChipWorker, ChipCallable, ChipStorageTaskArgs, ChipCallConfig

worker = ChipWorker()
worker.init(host_lib_path, aicpu_path, aicore_path, sim_context_lib_path="")
worker.set_device(device_id)

config = CallConfig()
config = ChipCallConfig()
config.block_dim = 24
config.aicpu_thread_num = 3
worker.run(callable, args, config)
worker.finalize()
```

### Python Type Naming Convention

Layer 3 Python types use a **level-prefixed naming convention** that mirrors the
level model (see [Distributed Level Runtime](distributed_level_runtime.md)):

| Concept | L2 (Chip) type | L3+ (Distributed) type | Unified factory |
| ------- | -------------- | ---------------------- | --------------- |
| Worker | `ChipWorker` | `DistWorker` | `Worker(level=N)` |
| Callable | `ChipCallable` | *(planned)* | — |
| TaskArgs | `ChipStorageTaskArgs` | *(planned)* | — |
| Config | `ChipCallConfig` | *(planned)* | — |

The unified `Worker(level=N)` factory already routes to the correct backend.
When new level-specific types are added (e.g. `DistCallConfig`), each concept
should follow the same pattern: a `Chip*` concrete type for L2, a `Dist*`
concrete type for L3+, and optionally a factory function that routes by level.

## Execution Flow

### 1. Python Setup Phase
Expand Down Expand Up @@ -161,7 +178,7 @@ worker.set_device(device_id)
### 3. Execution Phase

```text
worker.run(callable, args, CallConfig(block_dim, aicpu_thread_num))
worker.run(callable, args, ChipCallConfig(block_dim, aicpu_thread_num))
└─→ run_runtime(ctx, runtime, callable, args, ...)
Expand Down
6 changes: 2 additions & 4 deletions docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ TEST PASSED
### Python API Example

```python
from simpler.task_interface import ChipWorker, CallConfig
from simpler.task_interface import ChipWorker
from runtime_builder import RuntimeBuilder

# Build or locate pre-built runtime binaries
Expand All @@ -155,9 +155,7 @@ worker.init(host_path=str(binaries.host_path),
worker.set_device(device_id=0)

# Execute callable on device
config = CallConfig()
config.block_dim = 24
worker.run(chip_callable, orch_args, config)
worker.run(chip_callable, orch_args, block_dim=24)

# Cleanup
worker.reset_device()
Expand Down
4 changes: 2 additions & 2 deletions examples/scripts/code_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def compute_golden(tensors: dict, params: dict) -> None:
# Argument construction — uses nanobind bindings from task_interface
# =============================================================================
from simpler.task_interface import ( # type: ignore[import-not-found]
CallConfig, # pyright: ignore[reportAttributeAccessIssue]
ChipCallable, # pyright: ignore[reportAttributeAccessIssue]
ChipCallConfig, # pyright: ignore[reportAttributeAccessIssue]
ChipStorageTaskArgs, # pyright: ignore[reportAttributeAccessIssue]
ChipWorker, # pyright: ignore[reportAttributeAccessIssue]
CoreCallable, # pyright: ignore[reportAttributeAccessIssue]
Expand Down Expand Up @@ -884,7 +884,7 @@ def _compile_one_kernel(kernel):
for k, v in initial_outputs.items():
outputs[k].copy_(v)

config = CallConfig()
config = ChipCallConfig()
config.block_dim = self.block_dim
config.aicpu_thread_num = self.aicpu_thread_num
if self.enable_profiling and round_idx == 0:
Expand Down
19 changes: 10 additions & 9 deletions python/bindings/task_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,15 +566,15 @@ NB_MODULE(_task_interface, m) {
return os.str();
});

// --- CallConfig ---
nb::class_<CallConfig>(m, "CallConfig")
// --- ChipCallConfig ---
nb::class_<ChipCallConfig>(m, "ChipCallConfig")
.def(nb::init<>())
.def_rw("block_dim", &CallConfig::block_dim)
.def_rw("aicpu_thread_num", &CallConfig::aicpu_thread_num)
.def_rw("enable_profiling", &CallConfig::enable_profiling)
.def("__repr__", [](const CallConfig &self) -> std::string {
.def_rw("block_dim", &ChipCallConfig::block_dim)
.def_rw("aicpu_thread_num", &ChipCallConfig::aicpu_thread_num)
.def_rw("enable_profiling", &ChipCallConfig::enable_profiling)
.def("__repr__", [](const ChipCallConfig &self) -> std::string {
std::ostringstream os;
os << "CallConfig(block_dim=" << self.block_dim << ", aicpu_thread_num=" << self.aicpu_thread_num
os << "ChipCallConfig(block_dim=" << self.block_dim << ", aicpu_thread_num=" << self.aicpu_thread_num
<< ", enable_profiling=" << (self.enable_profiling ? "True" : "False") << ")";
return os.str();
});
Expand All @@ -591,7 +591,8 @@ NB_MODULE(_task_interface, m) {
.def("finalize", &ChipWorker::finalize)
.def(
"run",
[](ChipWorker &self, const PyChipCallable &callable, ChipStorageTaskArgs &args, const CallConfig &config) {
[](ChipWorker &self, const PyChipCallable &callable, ChipStorageTaskArgs &args,
const ChipCallConfig &config) {
self.run(callable.buffer_.data(), &args, config);
},
nb::arg("callable"), nb::arg("args"), nb::arg("config")
Expand All @@ -600,7 +601,7 @@ NB_MODULE(_task_interface, m) {
"run_raw",
[](ChipWorker &self, uint64_t callable, uint64_t args, int block_dim, int aicpu_thread_num,
bool enable_profiling) {
CallConfig config;
ChipCallConfig config;
config.block_dim = block_dim;
config.aicpu_thread_num = aicpu_thread_num;
config.enable_profiling = enable_profiling;
Expand Down
8 changes: 4 additions & 4 deletions python/simpler/task_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
DIST_CHIP_MAILBOX_SIZE,
DIST_SUB_MAILBOX_SIZE,
ArgDirection,
CallConfig,
ChipCallable,
ChipCallConfig,
ChipStorageTaskArgs,
ContinuousTensor,
CoreCallable,
Expand Down Expand Up @@ -59,7 +59,7 @@
"ArgDirection",
"CoreCallable",
"ChipCallable",
"CallConfig",
"ChipCallConfig",
"ChipWorker",
"arg_direction_name",
"torch_dtype_to_datatype",
Expand Down Expand Up @@ -212,11 +212,11 @@ def run(self, callable, args, config=None, **kwargs):
Args:
callable: ChipCallable built from orchestration + kernel binaries.
args: ChipStorageTaskArgs for this invocation.
config: Optional CallConfig. If None, a default is created.
config: Optional ChipCallConfig. If None, a default is created.
**kwargs: Overrides applied to config (e.g. block_dim=24).
"""
if config is None:
config = CallConfig()
config = ChipCallConfig()
for k, v in kwargs.items():
setattr(config, k, v)
self._impl.run(callable, args, config)
Expand Down
4 changes: 2 additions & 2 deletions python/simpler/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,9 @@ def run(self, task_or_payload, args=None, **kwargs) -> None:
if self.level == 2:
assert self._chip_worker is not None
if isinstance(task_or_payload, WorkerPayload):
from .task_interface import CallConfig # noqa: PLC0415
from .task_interface import ChipCallConfig # noqa: PLC0415

config = CallConfig()
config = ChipCallConfig()
config.block_dim = task_or_payload.block_dim
config.aicpu_thread_num = task_or_payload.aicpu_thread_num
config.enable_profiling = task_or_payload.enable_profiling
Expand Down
4 changes: 2 additions & 2 deletions simpler_setup/scene_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,9 +353,9 @@ def build_callable(self, platform):

def _build_config(self, config_dict):
ensure_python_path()
from simpler.task_interface import CallConfig # noqa: PLC0415
from simpler.task_interface import ChipCallConfig # noqa: PLC0415

config = CallConfig()
config = ChipCallConfig()
config.block_dim = config_dict.get("block_dim", 1)
config.aicpu_thread_num = config_dict.get("aicpu_thread_num", 3)
return config
Expand Down
4 changes: 2 additions & 2 deletions src/common/worker/chip_worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,14 @@ void ChipWorker::finalize() {
}

void ChipWorker::run(const WorkerPayload &payload) {
CallConfig config;
ChipCallConfig config;
config.block_dim = payload.block_dim;
config.aicpu_thread_num = payload.aicpu_thread_num;
config.enable_profiling = payload.enable_profiling;
run(payload.callable, payload.args, config);
}

void ChipWorker::run(const void *callable, const void *args, const CallConfig &config) {
void ChipWorker::run(const void *callable, const void *args, const ChipCallConfig &config) {
if (!device_set_) {
throw std::runtime_error("ChipWorker device not set; call set_device() first");
}
Expand Down
4 changes: 2 additions & 2 deletions src/common/worker/chip_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include "dist_types.h"

struct CallConfig {
struct ChipCallConfig {
int block_dim = 24;
int aicpu_thread_num = 3;
bool enable_profiling = false;
Expand Down Expand Up @@ -55,7 +55,7 @@ class ChipWorker : public IWorker {
void run(const WorkerPayload &payload) override;

// Direct invocation (used by Python wrapper and internal tests).
void run(const void *callable, const void *args, const CallConfig &config);
void run(const void *callable, const void *args, const ChipCallConfig &config);

int device_id() const { return device_id_; }
bool initialized() const { return initialized_; }
Expand Down
20 changes: 10 additions & 10 deletions tests/ut/py/test_chip_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# See LICENSE in the root of the software repository for the full text of the License.
# -----------------------------------------------------------------------------------------------------------
# ruff: noqa: E402
"""Tests for CallConfig and ChipWorker state machine."""
"""Tests for ChipCallConfig and ChipWorker state machine."""

import sys
from pathlib import Path
Expand All @@ -19,22 +19,22 @@
if _python_dir not in sys.path:
sys.path.insert(0, _python_dir)

from _task_interface import CallConfig, _ChipWorker # pyright: ignore[reportMissingImports]
from _task_interface import ChipCallConfig, _ChipWorker # pyright: ignore[reportMissingImports]

# ============================================================================
# CallConfig tests
# ChipCallConfig tests
# ============================================================================


class TestCallConfig:
class TestChipCallConfig:
def test_defaults(self):
config = CallConfig()
config = ChipCallConfig()
assert config.block_dim == 24
assert config.aicpu_thread_num == 3
assert config.enable_profiling is False

def test_setters(self):
config = CallConfig()
config = ChipCallConfig()
config.block_dim = 32
config.aicpu_thread_num = 4
config.enable_profiling = True
Expand All @@ -43,7 +43,7 @@ def test_setters(self):
assert config.enable_profiling is True

def test_repr(self):
config = CallConfig()
config = ChipCallConfig()
r = repr(config)
assert "block_dim=24" in r
assert "enable_profiling=False" in r
Expand All @@ -65,7 +65,7 @@ def test_run_before_set_device_raises(self):
from _task_interface import ChipCallable, ChipStorageTaskArgs # noqa: PLC0415

worker = _ChipWorker()
config = CallConfig()
config = ChipCallConfig()
args = ChipStorageTaskArgs()

# Build a minimal ChipCallable for the test
Expand Down Expand Up @@ -112,11 +112,11 @@ def test_init_with_nonexistent_lib_raises(self):
class TestChipWorkerPython:
def test_import(self):
from simpler.task_interface import ( # noqa: PLC0415
CallConfig as PyCallConfig, # pyright: ignore[reportAttributeAccessIssue]
ChipCallConfig as PyChipCallConfig, # pyright: ignore[reportAttributeAccessIssue]
)
from simpler.task_interface import ChipWorker # noqa: PLC0415 # pyright: ignore[reportAttributeAccessIssue]

worker = ChipWorker()
assert worker.initialized is False
assert worker.device_set is False
assert isinstance(PyCallConfig(), CallConfig)
assert isinstance(PyChipCallConfig(), ChipCallConfig)
Loading