Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
.git/
.github/
.claude/
.venv/
.mypy_cache/
.pytest_cache/
.ruff_cache/
__pycache__/
*.pyc

app/tests/
app/dist/
app/check_coverage.py
app/AGENTS.md
app/CLAUDE.md

docs/
assets/
scripts/
deploy/
dist/

*.md
!app/pyproject.toml
LICENSE
mkdocs.yml
action.yml
3 changes: 1 addition & 2 deletions app/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ The dispatcher in `agent/invoke.py` routes based on the agent config type (`CliA
- `AGENT_MODEL`, `AGENT_MAX_TURNS`, `AGENT_CLI_PATH` — agent configuration.
- `ALLOWED_REPOS` — comma-separated repository full names to process (e.g. `owner/repo-a,owner/repo-b`). When unset, all repos are accepted.
- `REVIEWER_TRIGGERS` — comma-separated lifecycle events that auto-trigger the reviewer (e.g. `pr_opened,pr_push`).
- `CLEANUP_INTERVAL_HOURS` — workspace cleanup frequency (default: `6`; `0` disables).
- `LOG_LEVEL` — logging verbosity (default: `INFO`).

## File tree
Expand All @@ -100,5 +99,5 @@ nominal_code/
│ ├── runner/ # JobRunner protocol (base.py), ProcessRunner, KubernetesRunner
│ └── queue/ # JobQueue protocol (base.py), AsyncioJobQueue, RedisJobQueue
├── platforms/ # Platform protocol + GitHub/GitLab implementations (subpackages)
└── workspace/ # Git workspace management and cleanup
└── workspace/ # Git workspace management
```
123 changes: 115 additions & 8 deletions app/nominal_code/agent/api/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
import asyncio
import fnmatch
import logging
import re
from pathlib import Path
from typing import Any

from nominal_code.agent.sandbox import build_sanitized_env, sanitize_output
from nominal_code.llm.messages import ToolDefinition

logger: logging.Logger = logging.getLogger(__name__)
Expand All @@ -16,6 +18,21 @@
MAX_READ_LINES: int = 2000
MAX_LINE_LENGTH: int = 2000

SHELL_INJECTION_PATTERN: re.Pattern[str] = re.compile(
r"[$`|;&]|\b(eval|exec|source)\b",
)

DEFAULT_ALLOWED_CLONE_HOSTS: frozenset[str] = frozenset(
{
"github.com",
"gitlab.com",
}
)

GIT_CLONE_PATTERN: re.Pattern[str] = re.compile(
r"^git\s+clone\s+",
)

SUBMIT_REVIEW_TOOL_NAME: str = "submit_review"

SUBMIT_REVIEW_TOOL: ToolDefinition = {
Expand Down Expand Up @@ -250,6 +267,11 @@ async def execute_tool(
"""
Execute a tool and return the result with an error flag.

Tool output is passed through ``sanitize_output`` to redact any secret
patterns before being returned to the LLM. Subprocesses (Grep, Bash)
run with a sanitized environment that strips secrets, and ``git clone``
commands are restricted to ``DEFAULT_ALLOWED_CLONE_HOSTS``.

Args:
name (str): The tool name (Read, Glob, Grep, Bash).
tool_input (dict[str, Any]): The tool input parameters from the API response.
Expand All @@ -263,20 +285,28 @@ async def execute_tool(

try:
if name == "Read":
return _execute_read(tool_input=tool_input, cwd=cwd), False
output: str = _execute_read(tool_input=tool_input, cwd=cwd)

return sanitize_output(output), False

if name == "Glob":
return _execute_glob(tool_input=tool_input, cwd=cwd), False
output = _execute_glob(tool_input=tool_input, cwd=cwd)

return sanitize_output(output), False

if name == "Grep":
return await _execute_grep(tool_input=tool_input, cwd=cwd), False
output = await _execute_grep(tool_input=tool_input, cwd=cwd)

return sanitize_output(output), False

if name == "Bash":
return await _execute_bash(
output = await _execute_bash(
tool_input=tool_input,
cwd=cwd,
allowed_tools=allowed_tools,
), False
)

return sanitize_output(output), False

raise ToolError(f"Unknown tool '{name}'")

Expand Down Expand Up @@ -314,6 +344,60 @@ def _parse_bash_patterns(allowed_tools: list[str] | None) -> list[str]:
return patterns


def _validate_bash_command(command: str) -> None:
"""
Reject commands containing shell metacharacters that enable injection.

Blocks ``$``, backticks, pipes, semicolons, ``&&``, ``||``, and
dangerous builtins (``eval``, ``exec``, ``source``) that could be
used to read environment variables or chain commands within an
otherwise-allowed fnmatch pattern.

Args:
command (str): The bash command string to validate.

Raises:
ToolError: If the command contains disallowed shell metacharacters.
"""

if SHELL_INJECTION_PATTERN.search(command):
raise ToolError("Command contains disallowed shell metacharacters")


def _validate_clone_host(
command: str,
allowed_hosts: frozenset[str],
) -> None:
"""
Validate that a ``git clone`` command targets an allowed hostname.

Checks for ``host/`` or ``host:`` substrings in the command, which
covers HTTPS (``https://github.com/...``) and SSH
(``git@github.com:...``) URL formats. Rejects ``file://`` URLs
unconditionally.

Args:
command (str): The full ``git clone`` command string.
allowed_hosts (frozenset[str]): Set of permitted hostnames.

Raises:
ToolError: If the command contains a ``file://`` URL or does not
match any allowed hostname.
"""

if "file://" in command:
raise ToolError("file:// protocol is not allowed")

for host in allowed_hosts:
if f"{host}/" in command or f"{host}:" in command:
return

raise ToolError(
f"git clone target host is not allowed. "
f"Permitted hosts: {sorted(allowed_hosts)}",
)


def _resolve_path(file_path: str, cwd: Path) -> Path:
"""
Resolve a file path relative to the working directory.
Expand Down Expand Up @@ -432,10 +516,15 @@ def _execute_glob(tool_input: dict[str, Any], cwd: Path) -> str:
return "\n".join(matches)


async def _execute_grep(tool_input: dict[str, Any], cwd: Path) -> str:
async def _execute_grep(
tool_input: dict[str, Any],
cwd: Path,
) -> str:
"""
Search file contents using grep.

Runs with a sanitized environment that strips secrets.

Args:
tool_input (dict[str, Any]): Must contain ``pattern``, optionally
``path`` and ``include``.
Expand All @@ -448,6 +537,7 @@ async def _execute_grep(tool_input: dict[str, Any], cwd: Path) -> str:
ToolError: If grep times out, fails to start, or exits with an error.
"""

sanitized_env: dict[str, str] = build_sanitized_env()
pattern: str = tool_input["pattern"]
raw_path: str = tool_input.get("path", "")
search_path: Path = Path(raw_path) if raw_path else cwd
Expand All @@ -470,6 +560,7 @@ async def _execute_grep(tool_input: dict[str, Any], cwd: Path) -> str:
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=cwd,
env=sanitized_env,
)

stdout_bytes, stderr_bytes = await asyncio.wait_for(
Expand Down Expand Up @@ -505,6 +596,11 @@ async def _execute_bash(
"""
Execute a bash command with allowlist validation.

Commands are checked for shell metacharacters that could enable injection
attacks (``$``, backticks, pipes, etc.). For ``git clone`` commands, the
target URL hostname is validated against ``DEFAULT_ALLOWED_CLONE_HOSTS``.
Runs with a sanitized environment that strips secrets.

Args:
tool_input (dict[str, Any]): Must contain ``command``.
cwd (Path): Working directory for the command.
Expand All @@ -515,14 +611,18 @@ async def _execute_bash(
str: Command output.

Raises:
ToolError: If the command is not allowed, times out, fails to start,
or exits with a non-zero code.
ToolError: If the command is not allowed, contains shell injection,
targets a disallowed host, times out, fails to start, or exits
with a non-zero code.
"""

sanitized_env: dict[str, str] = build_sanitized_env()
command: str = tool_input["command"]
bash_patterns: list[str] = _parse_bash_patterns(allowed_tools=allowed_tools)

if bash_patterns:
_validate_bash_command(command)

allowed: bool = any(
fnmatch.fnmatch(name=command, pat=pattern) for pattern in bash_patterns
)
Expand All @@ -532,6 +632,12 @@ async def _execute_bash(
f"Command not allowed. Permitted patterns: {bash_patterns}",
)

if GIT_CLONE_PATTERN.search(command):
_validate_clone_host(
command=command,
allowed_hosts=DEFAULT_ALLOWED_CLONE_HOSTS,
)

try:
process: asyncio.subprocess.Process = await asyncio.create_subprocess_exec(
"bash",
Expand All @@ -540,6 +646,7 @@ async def _execute_bash(
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=cwd,
env=sanitized_env,
)

stdout_bytes, stderr_bytes = await asyncio.wait_for(
Expand Down
34 changes: 31 additions & 3 deletions app/nominal_code/agent/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,31 @@
".pyi": "python",
}

TAG_UNTRUSTED_DIFF: str = "untrusted-diff"
TAG_UNTRUSTED_COMMENT: str = "untrusted-comment"
TAG_UNTRUSTED_REQUEST: str = "untrusted-request"
TAG_UNTRUSTED_HUNK: str = "untrusted-hunk"
TAG_FILE_PATH: str = "file-path"
TAG_BRANCH_NAME: str = "branch-name"
TAG_REPO_GUIDELINES: str = "repo-guidelines"


def wrap_tag(tag: str, content: str) -> str:
"""
Wrap content in XML boundary tags for prompt injection defense.

Args:
tag (str): The XML tag name.
content (str): The content to wrap.

Returns:
str: The content wrapped in opening and closing XML tags.
"""

safe_content: str = content.replace(f"</{tag}>", f"<\\/{tag}>")

return f"<{tag}>\n{safe_content}\n</{tag}>"


def resolve_guidelines(
repo_path: Path,
Expand Down Expand Up @@ -77,12 +102,15 @@ def resolve_system_prompt(

guidelines: str = resolve_guidelines(
repo_path=workspace.repo_path,
default_guidelines=config.coding_guidelines,
language_guidelines=config.language_guidelines,
default_guidelines=config.prompts.coding_guidelines,
language_guidelines=config.prompts.language_guidelines,
file_paths=file_paths,
)

return bot_system_prompt + "\n\n" + guidelines
if guidelines:
return bot_system_prompt + "\n\n" + wrap_tag(TAG_REPO_GUIDELINES, guidelines)

return bot_system_prompt


def _load_repo_guidelines(repo_path: Path) -> str:
Expand Down
Loading
Loading