Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.10.3] - 2026-01-07

### Bug Fixes

- Fix self-test creating pending session on macOS (avoid `uname -p` subprocess)
- Fix self-test output showing summary message instead of script output

## [0.10.2] - 2026-01-07

### Bug Fixes
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "shannot"
version = "0.10.2"
version = "0.10.3"
description = "Sandboxed system administration for LLM agents"
readme = "README.md"
license = {text = "Apache-2.0"}
Expand Down
6 changes: 4 additions & 2 deletions shannot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,12 @@ def cmd_status(args: argparse.Namespace) -> int:
result = run_local_self_test()
if result.success:
print(f" ✓ Self-test: passed ({result.elapsed_ms:.0f}ms)")
print(f" Output: {result.output!r}")
else:
print(" ✗ Self-test: FAILED")
print(f" Error: {result.error}")
if result.error:
print(f" Error: {result.error}")
if result.output:
print(f" Output: {result.output!r}")

if show_all:
print()
Expand Down
18 changes: 9 additions & 9 deletions shannot/selftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
from dataclasses import dataclass

# Minimal script that exercises basic sandbox functionality
# Uses platform.node() which verifies uname syscall virtualization
# Uses pure Python (no subprocess calls) to verify execution works
SELF_TEST_SCRIPT = """\
import platform
print('sandbox host:', platform.node())
import sys
print('sandbox ok:', sys.version_info[:2])
"""


Expand Down Expand Up @@ -67,11 +67,10 @@ def run_local_self_test() -> SelfTestResult:
elapsed_ms = (time.perf_counter() - start) * 1000

if result.returncode == 0:
# Extract output (last non-empty line from stdout)
# Extract first line of script output (skip summary messages)
stdout = result.stdout.decode().strip()
# Filter out any setup messages, get the actual script output
lines = [line for line in stdout.split("\n") if line.strip()]
output = lines[-1] if lines else ""
lines = [ln for ln in stdout.split("\n") if ln.strip() and not ln.startswith("***")]
output = lines[0] if lines else ""

return SelfTestResult(
success=True,
Expand Down Expand Up @@ -164,9 +163,10 @@ def run_remote_self_test(
elapsed_ms = (time.perf_counter() - start) * 1000

if result.returncode == 0:
# Extract first line of script output (skip summary messages)
stdout = result.stdout.decode().strip()
lines = [line for line in stdout.split("\n") if line.strip()]
output = lines[-1] if lines else ""
lines = [ln for ln in stdout.split("\n") if ln.strip() and not ln.startswith("***")]
output = lines[0] if lines else ""

return SelfTestResult(
success=True,
Expand Down
19 changes: 11 additions & 8 deletions test/test_selftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@ def test_script_content(self):
# Should parse without error
compile(SELF_TEST_SCRIPT, "<selftest>", "exec")

def test_script_uses_platform_node(self):
"""Test that the script uses platform.node()."""
assert "platform.node()" in SELF_TEST_SCRIPT
def test_script_uses_sys_version(self):
"""Test that the script uses sys.version_info (pure Python, no subprocess)."""
assert "sys.version_info" in SELF_TEST_SCRIPT
assert "sandbox ok" in SELF_TEST_SCRIPT
assert "print" in SELF_TEST_SCRIPT


Expand Down Expand Up @@ -79,7 +80,7 @@ def test_subprocess_success(self):
"""Test successful subprocess execution."""
mock_result = MagicMock()
mock_result.returncode = 0
mock_result.stdout = b"sandbox host: sandbox\n"
mock_result.stdout = b"sandbox ok: (3, 6)\n"
mock_result.stderr = b""

with (
Expand All @@ -90,7 +91,7 @@ def test_subprocess_success(self):
result = run_local_self_test()

assert result.success is True
assert result.output == "sandbox host: sandbox"
assert result.output == "sandbox ok: (3, 6)"
assert result.error is None
assert result.elapsed_ms > 0

Expand Down Expand Up @@ -130,10 +131,11 @@ def test_subprocess_timeout(self):
assert result.elapsed_ms == 30000

def test_output_parsing_multiline(self):
"""Test that we extract the last non-empty line from output."""
"""Test that we extract the first line, skipping summary messages."""
mock_result = MagicMock()
mock_result.returncode = 0
mock_result.stdout = b"Setup message\n\nsandbox host: sandbox\n"
# Script output comes first, summary message comes after
mock_result.stdout = b"sandbox ok: (3, 6)\n\n*** No commands or writes were queued. ***\n"
mock_result.stderr = b""

with (
Expand All @@ -144,4 +146,5 @@ def test_output_parsing_multiline(self):
result = run_local_self_test()

assert result.success is True
assert result.output == "sandbox host: sandbox"
# Should get the script output, not the summary message
assert result.output == "sandbox ok: (3, 6)"
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading