Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ce13a8b
feat(rules): add static flash-rules.md and rules package
deanq Mar 17, 2026
681f617
feat(rules): add rules engine with packaging and agent file generation
deanq Mar 17, 2026
b4f6ecf
feat(rules): add flash rules CLI command
deanq Mar 17, 2026
271f931
feat(rules): integrate agent file generation into flash init
deanq Mar 17, 2026
24fd85b
feat(rules): add dynamic context renderer from manifest data
deanq Mar 17, 2026
f4d9a85
feat(rules): wire dynamic context generation into flash rules command
deanq Mar 17, 2026
7711260
feat(rules): add --no-rules flag to flash init and .gitignore entry
deanq Mar 17, 2026
5d85bee
feat(rules): regenerate dynamic context on flash run and flash build
deanq Mar 17, 2026
a4d3bd3
fix(rules): wrap dynamic context in try/except, remove --disable flag…
deanq Mar 17, 2026
5131fe7
chore: ignore entire .flash directory
deanq Mar 17, 2026
eb7cc4b
fix(rules): restore main run.py shape, use CliRunner in init tests
deanq May 25, 2026
08dd26d
feat(rules): add CLI-first directive and surface generated files in R…
deanq May 25, 2026
62718c0
feat(rules): trim flash-rules.md to AGENTS.md, CLI-first at top
deanq May 25, 2026
bc39e08
feat(rules): add minimal install_agent_files
deanq May 25, 2026
a3998ba
fix(rules): warn on broken CLAUDE.md symlink, clear error on missing …
deanq May 25, 2026
8d0b1b3
chore(rules): replace heavy engine with minimal install_agent_files
deanq May 25, 2026
0c3ec22
docs(rules): rewrite agent integration section for minimal design
deanq May 25, 2026
eec27bd
chore(rules): drop stale .flash/context.md skeleton gitignore entry
deanq May 25, 2026
e67edfc
fix(rules): use 'flash dev' not 'flash run' in AGENTS.md
deanq May 25, 2026
b457380
feat(rules): add Pattern D for pre-built container images (BYOI)
deanq May 25, 2026
f69eb4e
docs(rules): use runpod/worker-v1-vllm:v2.18.1 in Pattern D
deanq May 25, 2026
3562c1e
chore(skeleton): drop legacy .runpod/ and duplicate dist/ from .gitig…
deanq May 25, 2026
1492bf0
docs(rules): restore runpod/skills bundle pointer in README
deanq May 25, 2026
cb196b6
fix(rules): address Copilot PR review feedback (code)
deanq May 25, 2026
635f7f1
docs(rules): heading rename and tighter opt-out wording
deanq May 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ cython_debug/
.pypirc
.DS_Store

# Flash state
# Runpod Flash state
.runpod/
.flash/

# Code intelligence
Expand Down
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,35 @@ flash login

This saves your API key and allows you to use the Flash CLI and call `@Endpoint` functions.

### Coding agent integration (optional)
### Coding agent integration

`flash init` writes an `AGENTS.md` at your project root containing CLI-first rules for AI coding tools (Cursor, Codex, Aider, Amp, Jules, etc.). It also creates `CLAUDE.md` as a symlink to `AGENTS.md` so Claude Code picks up the same rules.

If `AGENTS.md` or `CLAUDE.md` already exist in your project, Flash leaves them alone — your file, your rules.

**Existing projects (already past flash init):**

```bash
# from your project root
python -c "from runpod_flash.rules import install_agent_files; from pathlib import Path; install_agent_files(Path.cwd())"
```

**Tools using other conventions:** GitHub Copilot reads `.github/copilot-instructions.md` and Cursor (legacy) reads `.cursorrules`. If you use those, symlink or copy `AGENTS.md`:

```bash
ln -s ../AGENTS.md .github/copilot-instructions.md
ln -s AGENTS.md .cursorrules
```

**Opt out:** Delete `AGENTS.md`. No `flash` subcommand other than `flash init` (or an explicit call to `install_agent_files(...)`) will re-create it.

**Claude Code skill bundle (optional):** For richer Claude Code integration beyond static rules, install the cross-tool skill bundle:

```bash
npx skills add runpod/skills
```

You can review the `SKILL.md` file in the [runpod/skills repository](https://github.com/runpod/skills/blob/main/flash/SKILL.md).
See the `SKILL.md` file in the [runpod/skills repository](https://github.com/runpod/skills/blob/main/flash/SKILL.md).

## Quickstart

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ where = ["src"]
runpod_flash = [
"cli/utils/skeleton_template/**/*",
"cli/utils/skeleton_template/**/.*",
"rules/**/*",
]

[tool.pytest.ini_options]
Expand Down
11 changes: 11 additions & 0 deletions src/runpod_flash/cli/commands/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from rich.table import Table

from ..utils.skeleton import create_project_skeleton, detect_file_conflicts
from ...rules import install_agent_files

console = Console()

Expand Down Expand Up @@ -76,6 +77,10 @@ def init_command(
)
with console.status(status_msg):
create_project_skeleton(project_dir, should_overwrite)
installed_agent_files = install_agent_files(project_dir)

agents_md_created = (project_dir / "AGENTS.md") in installed_agent_files
claude_md_created = (project_dir / "CLAUDE.md") in installed_agent_files

# Success output
if is_current_dir:
Expand All @@ -93,6 +98,12 @@ def init_command(
panel_content += " ├── pyproject.toml\n"
panel_content += " ├── .env.example\n"
panel_content += " ├── requirements.txt\n"
if agents_md_created:
panel_content += (
" ├── AGENTS.md # AI agent rules (auto-generated)\n"
)
if claude_md_created:
panel_content += " ├── CLAUDE.md # symlink → AGENTS.md\n"
panel_content += " └── README.md\n"

title = "Project Initialized" if is_current_dir else "Project Created"
Expand Down
3 changes: 1 addition & 2 deletions src/runpod_flash/cli/utils/skeleton_template/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ wheels/

# Flash
.flash/
.runpod/
dist/

Comment thread
deanq marked this conversation as resolved.
# OS
.DS_Store
Thumbs.db

131 changes: 131 additions & 0 deletions src/runpod_flash/rules/AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Flash Rules for AI Coding Agents

## Use the Flash CLI — Do Not Call Runpod REST or GraphQL Directly

For anything Flash supports, use the `flash` CLI. **Do not** generate `curl`, `httpx`, `requests`, or `gql` calls against `api.runpod.io`, `api.runpod.ai`, or `*.runpod.net` to build, deploy, list, scale, log, or invoke endpoints. The CLI handles auth, hashing, drift detection, manifest generation, and image selection. Direct API calls bypass all of that and will silently desync from Flash state.

| Intent | Command | Do NOT |
|--------|---------|--------|
| Scaffold a project | `flash init <name>` | Hand-write `pyproject.toml` + manifest |
| Local dev server | `flash dev` | Run `uvicorn` against generated server manually |
| Package artifact | `flash build` | Tar `src/` and POST it |
| Deploy to Runpod | `flash deploy` | Call `saveEndpoint` / REST `POST /v1/endpoints` |
| Preview locally | `flash deploy --preview` | Hand-write `docker-compose.yml` |
| Tear down | `flash undeploy` | Call `deleteEndpoint` mutation |
| List apps/envs | `flash app list` / `flash env list` | Query GraphQL `myself.endpoints` |

If a Flash command does not exist for what the user is asking, surface that gap (`flash <area> --help` first), then ask before reaching for raw API calls. Raw Runpod SDK use (`runpod.Endpoint(...)`) is acceptable only for invoking already-deployed endpoints from non-Flash code — never for lifecycle operations.

## Identity

Flash is a Python SDK for deploying AI workloads to Runpod GPUs. You write decorated Python functions, Flash handles infrastructure, scaling, and deployment.

## Endpoint Patterns

### Pattern A: Queue-based function endpoint

```python
from runpod_flash import Endpoint, GpuType

@Endpoint(
name="my-gpu-worker",
gpu=GpuType.NVIDIA_GEFORCE_RTX_4090,
workers=(0, 3),
dependencies=["torch"],
)
async def process(input_data: dict) -> dict:
import torch
return {"gpu": torch.cuda.get_device_name(0)}
```

### Pattern B: Load-balanced routes

```python
from runpod_flash import Endpoint

api = Endpoint(name="my-api", cpu="cpu3c-1-2", workers=(1, 3))

@api.get("/health")
async def health():
return {"status": "ok"}

@api.post("/compute")
async def compute(numbers: list[float]) -> dict:
return {"sum": sum(numbers)}
```

### Pattern C: Class-based worker (stateful)

```python
from runpod_flash import Endpoint, GpuType

@Endpoint(
name="my-model",
gpu=GpuType.NVIDIA_GEFORCE_RTX_4090,
workers=(1, 3),
dependencies=["torch", "transformers"],
)
class MyModel:
def __init__(self):
import torch
from transformers import pipeline
self.pipe = pipeline("text-generation", device="cuda")

async def generate(self, prompt: str) -> dict:
return {"text": self.pipe(prompt)[0]["generated_text"]}
```

### Pattern D: Pre-built container image (no decorated function)

For workloads that already serve HTTP — vLLM, TGI, ComfyUI, Ollama, custom images — provision the endpoint with an `image=` argument and call it as a client. No Python handler to write. Flash deploys the image and gives you HTTP + queue access to it.

```python
from runpod_flash import Endpoint, GpuGroup

vllm = Endpoint(
name="vllm",
image="runpod/worker-v1-vllm:v2.18.1",
gpu=GpuGroup.ADA_24,
workers=(0, 3),
env={"MODEL_NAME": "meta-llama/Llama-3.1-8B-Instruct"},
)

# QB-style — the Runpod vLLM worker speaks the queue protocol
result = await vllm.runsync({"input": {"prompt": "hello", "max_tokens": 64}})

# Or LB-style HTTP if you've routed through a load-balanced front
models = await vllm.get("/v1/models")
```

When to use this pattern: the upstream project already publishes a serving image and you don't need to add any Python logic on top. If you need pre/post-processing, wrap the call inside a Pattern A or B `@Endpoint` instead.

To attach to an already-deployed endpoint (no provisioning), pass `id=` instead of `image=`:

```python
ep = Endpoint(id="abc123")
result = await ep.runsync({"prompt": "hello"})
```

## Rules That Break If Violated

- `import torch` and heavy libraries INSIDE the function body, never at module level
- Declare runtime dependencies in `@Endpoint(dependencies=[...])`, not in `pyproject.toml`
- Endpoint functions can be sync (`def`) or async (`async def`). Use async when awaiting other endpoints or async I/O
- `workers=N` for fixed count, `workers=(min, max)` for auto-scaling range
- Class workers: model loading in `__init__`, request handling in instance methods
- Cross-worker calls use `await` — call `@Endpoint`-decorated functions as if local; Flash handles remote dispatch
- System-level packages (ffmpeg, libgl1) go in `system_dependencies`, not `dependencies`
- `@Endpoint` is the canonical decorator. `@remote` is the legacy alias

## Common Agent Mistakes

| Mistake | Fix |
|---------|-----|
| Writing raw FastAPI instead of `@Endpoint` | Use `@Endpoint` decorator, Flash generates FastAPI |
| `import torch` at top of file | Move inside function body |
| Adding deps to `pyproject.toml` only | Add to `@Endpoint(dependencies=[...])` |
| Forcing `async def` on all endpoints | Both sync and async are valid; use async only when awaiting |
| Creating `main.py` or `app.py` | Not needed — Flash auto-discovers decorated functions |
| Using `docker-compose` manually | Use `flash deploy --preview` for local container testing |
| Wrapping vLLM/TGI/Comfy in a custom handler for no reason | Use `Endpoint(name=..., image=...)` and call via `.post()`/`.run()` — Pattern D |
| Calling Runpod REST/GraphQL directly | Use `flash` CLI — see top of this file |
67 changes: 67 additions & 0 deletions src/runpod_flash/rules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Flash agent rules — install AGENTS.md and (best-effort) CLAUDE.md symlink."""

from __future__ import annotations

import logging
import os
from importlib import resources
from pathlib import Path

logger = logging.getLogger(__name__)

__all__ = ["install_agent_files"]


def _read_packaged_agents_md() -> str:
try:
return (resources.files("runpod_flash.rules") / "AGENTS.md").read_text(
encoding="utf-8"
)
except FileNotFoundError as exc:
raise FileNotFoundError(
"AGENTS.md not found in runpod_flash.rules package data. "
"The installed wheel may be incomplete."
) from exc


def install_agent_files(target_dir: Path) -> list[Path]:
"""Write AGENTS.md and a CLAUDE.md symlink into target_dir if absent.

Returns the list of paths actually created. Idempotent: if both files
exist (or CLAUDE.md already exists in any form), they are left alone.

Symlink failure (e.g. Windows without developer mode) is non-fatal —
AGENTS.md is still written and the failure is logged.
"""
target_dir = Path(target_dir)
target_dir.mkdir(parents=True, exist_ok=True)
created: list[Path] = []

agents = target_dir / "AGENTS.md"
if agents.is_symlink() and not agents.exists():
logger.warning(
"AGENTS.md is a broken symlink at %s. Repair manually or remove it.",
agents,
)
elif not agents.exists():
agents.write_text(_read_packaged_agents_md(), encoding="utf-8")
created.append(agents)

Comment thread
deanq marked this conversation as resolved.
claude = target_dir / "CLAUDE.md"
if claude.is_symlink() and not claude.exists():
logger.warning(
"CLAUDE.md is a broken symlink at %s. Repair manually or remove it.",
claude,
)
elif not claude.exists():
try:
os.symlink("AGENTS.md", claude)
created.append(claude)
except OSError as exc:
logger.warning(
"Could not create CLAUDE.md symlink (%s). "
"Claude Code users can run: ln -s AGENTS.md CLAUDE.md",
exc,
)

return created
24 changes: 24 additions & 0 deletions tests/unit/cli/commands/test_init.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Tests for flash init command."""

from pathlib import Path
from unittest.mock import MagicMock, Mock, patch

import pytest
Expand Down Expand Up @@ -317,3 +318,26 @@ def test_directory_created_matches_argument(
assert (tmp_path / "my_awesome_project").exists()
# Verify it's a directory
assert (tmp_path / "my_awesome_project").is_dir()


class TestInitInstallsAgentFiles:
def test_init_calls_install_agent_files(self, tmp_path, monkeypatch):
from typer.testing import CliRunner

from runpod_flash.cli.main import app

monkeypatch.chdir(tmp_path)

with (
patch("runpod_flash.cli.commands.init.install_agent_files") as mock_install,
patch("runpod_flash.cli.commands.init.create_project_skeleton"),
patch(
"runpod_flash.cli.commands.init.detect_file_conflicts", return_value=[]
),
):
result = CliRunner().invoke(app, ["init", "test_project"])

assert result.exit_code == 0, result.output
mock_install.assert_called_once()
call_args = mock_install.call_args[0]
assert call_args[0] == Path("test_project")
Empty file added tests/unit/rules/__init__.py
Empty file.
Loading
Loading