Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ src/cpp/build
AIEDebugLibrary.dll
*.lst
*.Identifier
ext/tests
10 changes: 7 additions & 3 deletions src/mldebug/aie_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,12 +243,16 @@ def _filter_tiles(self, tile_type):

def read_control_instr(self):
"""
Read and return the value of the control instruction from the memory tile spare register.
Read and return the value of the SPARE_REG control instruction from all memory tiles.

Returns:
int: Value from the SPARE_REG of memory tile (col=0, row=1).
dict[str, int]: Mapping of "MEM_TILE_{col}" to the SPARE_REG value for each memory tile.
"""
return self.impl.read_register(0, 1, self.aie_iface.Memory_tile_registers["SPARE_REG"])
spare_reg = self.aie_iface.Memory_tile_registers["SPARE_REG"]
return {
f"MEM_TILE_{c}": self.impl.read_register(c, r, spare_reg)
for c, r in self._filter_tiles(self.aie_iface.MEM_TILE_T)
}

def initialize_stamp(self):
"""
Expand Down
30 changes: 25 additions & 5 deletions src/mldebug/batch_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
InteractiveController builds on this for interactive stepping.
"""

import dataclasses
import json
import pathlib
import sys
import time
Expand Down Expand Up @@ -242,13 +244,14 @@ def schedule_layer_start(self, next_layer):
be.continue_aie()

# Poll stamps until breakpoint is hit
max_attempts = 1200
while max_attempts > 0:
if all(be.poll_core_status() for be in bes_to_poll):
break
timeout = 10
start_time = time.time()
while time.time() - start_time < timeout:
if self.args.backend == "test":
break
max_attempts -= 1
time.sleep(0.1)
if all(be.poll_core_status() for be in bes_to_poll):
break

# When combo events are used, it takes a few cycles to
# hit the breakpoint, so pc might have moved
Expand Down Expand Up @@ -298,6 +301,7 @@ def _process_err(self):
self.status_handle.get(p + "/" + "aie_status_error.txt")
else:
self.status_handle.get("aie_status_error.txt")
self._write_run_summary("FAIL")
sys.exit(1)

def _process_end_breakpoint(self, layer, it, sid):
Expand Down Expand Up @@ -341,6 +345,7 @@ def _process_start_breakpoint(self, layer, it, sid=0):

if self.args.exit_at_layer and layer.layer_order >= self.args.exit_at_layer:
LOGGER.log(f"[INFO] Exiting debugger at Layer: {layer.layer_order}")
self._write_run_summary("SUCCESS")
sys.exit(0)

if self.args.run_flags.layer_status and first_it:
Expand Down Expand Up @@ -470,6 +475,7 @@ def execute_and_dump(self):
self.impls[sid].continue_aie()
LOGGER.log("\nFinished Execution")
self._handle_fsp()
self._write_run_summary("SUCCESS")

def _handle_fsp(self):
"""Handle end-of-run logic for VAIML Failsafe Partition mode."""
Expand All @@ -487,3 +493,17 @@ def _handle_fsp(self):
"to load the next Failsafe Partition and wait for "
"`waiting for user input`. Then press Enter here."
)

def _write_run_summary(self, status):
"""
Record run state to run_summary.json
"""
rsf = self.args.top_output_dir + "/run_summary.json"
flags_dict = dataclasses.asdict(self.args.run_flags)
summary = {"status": status, "run_flags": flags_dict}

try:
with open(rsf, "w", encoding="utf-8") as fh:
json.dump(summary, fh, indent=2, default=str)
except (IOError, OSError) as e:
print(f"Unable to write run summary file. {e}")
13 changes: 13 additions & 0 deletions src/mldebug/client_debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,19 @@ def read_all_core_pc(self):
print(f"\n=== Stamp {sid} Core PC ===")
impl.read_all_core_pc()

def read_control_instr(self):
"""
Read the SPARE_REG control instruction from all memory tiles across all stamps.

Returns:
dict[str, int]: Merged mapping of "MEM_TILE_{col}" to SPARE_REG value, aggregated
from each per-stamp AIEUtil. Stamps own disjoint columns, so keys do not collide.
"""
result = {}
for utl in self.aie_utls:
result.update(utl.read_control_instr())
return result

#
# START Advanced Mode Specific functionality
#
Expand Down
4 changes: 2 additions & 2 deletions src/mldebug/input_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class RunFlags:
mock_hang: bool
dump_temps: bool
multistamp: bool
enable_tg: bool
disable_tg: bool


@dataclass
Expand Down Expand Up @@ -121,7 +121,7 @@ def get_flag(s, default=False):
get_flag("mock_hang"),
get_flag("dump_temps"),
get_flag("multistamp"),
get_flag("enable_tg", default=True)
get_flag("disable_tg")
)


Expand Down
2 changes: 1 addition & 1 deletion src/mldebug/interactive_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def _build_shell_namespace(self):
rreg = h.impl.read_register
preg = h.impl.print_register
wreg = h.impl.write_register
control_instr = h.aie_utls[0].read_control_instr
control_instr = h.read_control_instr
add_brkpt = h.add_breakpoint
status = h.status_handle.get
uc_status = h.status_handle.get_uc_status
Expand Down
16 changes: 10 additions & 6 deletions src/mldebug/layer_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def __init__(self, args):
self.mladf_report = None

has_bi = args.buffer_info and Path(args.buffer_info).is_file()
use_mladf = args.mladf_report and Path(args.mladf_report).is_file() and args.run_flags.enable_tg
use_mladf = args.mladf_report and Path(args.mladf_report).is_file() and not args.run_flags.disable_tg
data = None
# 1. Parse the buffer info to get Layout
if has_bi:
Expand Down Expand Up @@ -877,14 +877,18 @@ def _initialize_layers_from_workdir(self, args):
stamp.end_pc = f.final_lock_release_pc

# Under right conditions, we don't even go through iterations
if args.run_flags.skip_iter and args.run_flags.enable_tg:
if args.run_flags.skip_iter:
for idx, layer in enumerate(self.layers):
if idx >= len(self.layers) - 1:
layer.lcp.num_iter = 1
break
next_layer_stamps = self.layers[idx+1].stamps
if (layer.stamps[0].name != next_layer_stamps[0].name
and len(layer.stamps) == len(next_layer_stamps)
and all(layer.stamps[i].elf_name == next_layer_stamps[i].elf_name for i in range(len(layer.stamps)))
):
if args.run_flags.multistamp:
if (layer.stamps[0].name != next_layer_stamps[0].name
and len(layer.stamps) == len(next_layer_stamps)
and all(layer.stamps[i].elf_name == next_layer_stamps[i].elf_name for i in range(len(layer.stamps)))
):
layer.lcp.num_iter = 1
elif (layer.stamps[0].name != next_layer_stamps[0].name
and layer.stamps[0].elf_name == next_layer_stamps[0].elf_name ):
layer.lcp.num_iter = 1
6 changes: 6 additions & 0 deletions src/mldebug/memory_dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ def get_output_path(self, buffer=None, col=None, row=None, layer_order=None, bat
self._dir_cache.add(p)
return p

def get_base_output_dir(self):
"""
Get the base outputput directory. Used by run summary
"""
return self.output_dir

def write_data_to_file(self, data, fname):
"""
Write an array of data to file in text or binary format.
Expand Down
7 changes: 5 additions & 2 deletions src/mldebug/mldebug_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,14 @@ def debug(args, timestamp, subgraph_name=None, fsp="0", folder_name=None):
print(f"Debugging New Failsafe Partition: {fsp}\n")
output_dir = f"{folder_name}_{timestamp}/{subgraph_name}/{fsp}"
args.subgraph_name = subgraph_name
args.top_output_dir = f"{folder_name}_{timestamp}"
else:
output_dir = f"output_{time.strftime('%m%d%H%M%S')}"
args.top_output_dir = output_dir

if args.output_dir is not None:
output_dir = args.output_dir + "/" + output_dir
args.top_output_dir = args.output_dir + "/" + args.top_output_dir
launch_debug(args, output_dir)


Expand Down Expand Up @@ -329,7 +332,7 @@ def app():
"skip_iter",
"dump_temps",
"multistamp",
"enable_tg"
"disable_tg"
],
help="Specify one or more runtime flags:\n"
"skip_dump : Do not dump memory\n"
Expand All @@ -341,7 +344,7 @@ def app():
"skip_iter : Skip iterations in batch mode when possible\n"
#"dump_temps : Write intermediate (.lst) files to disk\n"
"multistamp : Enable N Stamp/Batch mode\n",
#"enable_tg : Enable Step to TG layers\n",
#"disable_tg : Disable Step to TG layers\n",
# 'mock_hang' : Simulate hang at one of the layers in test mode
metavar="<flag1> <flag2>",
)
Expand Down
Loading