runpod · deanq · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026 · Mar 17, 2026
diff --git a/.gitignore b/.gitignore
@@ -175,7 +175,8 @@ cython_debug/
 .pypirc
 .DS_Store
 
-# Flash state
+# Runpod Flash state
+.runpod/
 .flash/
 
 # Code intelligence

diff --git a/README.md b/README.md
@@ -39,13 +39,35 @@ flash login
 
 This saves your API key and allows you to use the Flash CLI and call `@Endpoint` functions.
 
-### Coding agent integration (optional)
+### Coding agent integration
+
+`flash init` writes an `AGENTS.md` at your project root containing CLI-first rules for AI coding tools (Cursor, Codex, Aider, Amp, Jules, etc.). It also creates `CLAUDE.md` as a symlink to `AGENTS.md` so Claude Code picks up the same rules.
+
+If `AGENTS.md` or `CLAUDE.md` already exist in your project, Flash leaves them alone — your file, your rules.
+
+**Existing projects (already past flash init):**
+
+```bash
+# from your project root
+python -c "from runpod_flash.rules import install_agent_files; from pathlib import Path; install_agent_files(Path.cwd())"
+```
+
+**Tools using other conventions:** GitHub Copilot reads `.github/copilot-instructions.md` and Cursor (legacy) reads `.cursorrules`. If you use those, symlink or copy `AGENTS.md`:
+
+```bash
+ln -s ../AGENTS.md .github/copilot-instructions.md
+ln -s AGENTS.md .cursorrules
+```
+
+**Opt out:** Delete `AGENTS.md`. No `flash` subcommand other than `flash init` (or an explicit call to `install_agent_files(...)`) will re-create it.
+
+**Claude Code skill bundle (optional):** For richer Claude Code integration beyond static rules, install the cross-tool skill bundle:
 
 ```bash
 npx skills add runpod/skills
 ```
 
-You can review the `SKILL.md` file in the [runpod/skills repository](https://github.com/runpod/skills/blob/main/flash/SKILL.md).
+See the `SKILL.md` file in the [runpod/skills repository](https://github.com/runpod/skills/blob/main/flash/SKILL.md).
 
 ## Quickstart
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -51,6 +51,7 @@ where = ["src"]
 runpod_flash = [
     "cli/utils/skeleton_template/**/*",
     "cli/utils/skeleton_template/**/.*",
+    "rules/**/*",
 ]
 
 [tool.pytest.ini_options]

diff --git a/src/runpod_flash/cli/commands/init.py b/src/runpod_flash/cli/commands/init.py
@@ -9,6 +9,7 @@
 from rich.table import Table
 
 from ..utils.skeleton import create_project_skeleton, detect_file_conflicts
+from ...rules import install_agent_files
 
 console = Console()
 
@@ -76,6 +77,10 @@ def init_command(
     )
     with console.status(status_msg):
         create_project_skeleton(project_dir, should_overwrite)
+        installed_agent_files = install_agent_files(project_dir)
+
+    agents_md_created = (project_dir / "AGENTS.md") in installed_agent_files
+    claude_md_created = (project_dir / "CLAUDE.md") in installed_agent_files
 
     # Success output
     if is_current_dir:
@@ -93,6 +98,12 @@ def init_command(
     panel_content += "  ├── pyproject.toml\n"
     panel_content += "  ├── .env.example\n"
     panel_content += "  ├── requirements.txt\n"
+    if agents_md_created:
+        panel_content += (
+            "  ├── AGENTS.md             # AI agent rules (auto-generated)\n"
+        )
+    if claude_md_created:
+        panel_content += "  ├── CLAUDE.md             # symlink → AGENTS.md\n"
     panel_content += "  └── README.md\n"
 
     title = "Project Initialized" if is_current_dir else "Project Created"

diff --git a/src/runpod_flash/cli/utils/skeleton_template/.gitignore b/src/runpod_flash/cli/utils/skeleton_template/.gitignore
@@ -37,9 +37,8 @@ wheels/
 
 # Flash
 .flash/
-.runpod/
-dist/
 
 # OS
 .DS_Store
 Thumbs.db
+
diff --git a/src/runpod_flash/rules/AGENTS.md b/src/runpod_flash/rules/AGENTS.md
@@ -0,0 +1,131 @@
+# Flash Rules for AI Coding Agents
+
+## Use the Flash CLI — Do Not Call Runpod REST or GraphQL Directly
+
+For anything Flash supports, use the `flash` CLI. **Do not** generate `curl`, `httpx`, `requests`, or `gql` calls against `api.runpod.io`, `api.runpod.ai`, or `*.runpod.net` to build, deploy, list, scale, log, or invoke endpoints. The CLI handles auth, hashing, drift detection, manifest generation, and image selection. Direct API calls bypass all of that and will silently desync from Flash state.
+
+| Intent | Command | Do NOT |
+|--------|---------|--------|
+| Scaffold a project | `flash init <name>` | Hand-write `pyproject.toml` + manifest |
+| Local dev server | `flash dev` | Run `uvicorn` against generated server manually |
+| Package artifact | `flash build` | Tar `src/` and POST it |
+| Deploy to Runpod | `flash deploy` | Call `saveEndpoint` / REST `POST /v1/endpoints` |
+| Preview locally | `flash deploy --preview` | Hand-write `docker-compose.yml` |
+| Tear down | `flash undeploy` | Call `deleteEndpoint` mutation |
+| List apps/envs | `flash app list` / `flash env list` | Query GraphQL `myself.endpoints` |
+
+If a Flash command does not exist for what the user is asking, surface that gap (`flash <area> --help` first), then ask before reaching for raw API calls. Raw Runpod SDK use (`runpod.Endpoint(...)`) is acceptable only for invoking already-deployed endpoints from non-Flash code — never for lifecycle operations.
+
+## Identity
+
+Flash is a Python SDK for deploying AI workloads to Runpod GPUs. You write decorated Python functions, Flash handles infrastructure, scaling, and deployment.
+
+## Endpoint Patterns
+
+### Pattern A: Queue-based function endpoint
+
+```python
+from runpod_flash import Endpoint, GpuType
+
+@Endpoint(
+    name="my-gpu-worker",
+    gpu=GpuType.NVIDIA_GEFORCE_RTX_4090,
+    workers=(0, 3),
+    dependencies=["torch"],
+)
+async def process(input_data: dict) -> dict:
+    import torch
+    return {"gpu": torch.cuda.get_device_name(0)}
+```
+
+### Pattern B: Load-balanced routes
+
+```python
+from runpod_flash import Endpoint
+
+api = Endpoint(name="my-api", cpu="cpu3c-1-2", workers=(1, 3))
+
+@api.get("/health")
+async def health():
+    return {"status": "ok"}
+
+@api.post("/compute")
+async def compute(numbers: list[float]) -> dict:
+    return {"sum": sum(numbers)}
+```
+
+### Pattern C: Class-based worker (stateful)
+
+```python
+from runpod_flash import Endpoint, GpuType
+
+@Endpoint(
+    name="my-model",
+    gpu=GpuType.NVIDIA_GEFORCE_RTX_4090,
+    workers=(1, 3),
+    dependencies=["torch", "transformers"],
+)
+class MyModel:
+    def __init__(self):
+        import torch
+        from transformers import pipeline
+        self.pipe = pipeline("text-generation", device="cuda")
+
+    async def generate(self, prompt: str) -> dict:
+        return {"text": self.pipe(prompt)[0]["generated_text"]}
+```
+
+### Pattern D: Pre-built container image (no decorated function)
+
+For workloads that already serve HTTP — vLLM, TGI, ComfyUI, Ollama, custom images — provision the endpoint with an `image=` argument and call it as a client. No Python handler to write. Flash deploys the image and gives you HTTP + queue access to it.
+
+```python
+from runpod_flash import Endpoint, GpuGroup
+
+vllm = Endpoint(
+    name="vllm",
+    image="runpod/worker-v1-vllm:v2.18.1",
+    gpu=GpuGroup.ADA_24,
+    workers=(0, 3),
+    env={"MODEL_NAME": "meta-llama/Llama-3.1-8B-Instruct"},
+)
+
+# QB-style — the Runpod vLLM worker speaks the queue protocol
+result = await vllm.runsync({"input": {"prompt": "hello", "max_tokens": 64}})
+
+# Or LB-style HTTP if you've routed through a load-balanced front
+models = await vllm.get("/v1/models")
+```
+
+When to use this pattern: the upstream project already publishes a serving image and you don't need to add any Python logic on top. If you need pre/post-processing, wrap the call inside a Pattern A or B `@Endpoint` instead.
+
+To attach to an already-deployed endpoint (no provisioning), pass `id=` instead of `image=`:
+
+```python
+ep = Endpoint(id="abc123")
+result = await ep.runsync({"prompt": "hello"})
+```
+
+## Rules That Break If Violated
+
+- `import torch` and heavy libraries INSIDE the function body, never at module level
+- Declare runtime dependencies in `@Endpoint(dependencies=[...])`, not in `pyproject.toml`
+- Endpoint functions can be sync (`def`) or async (`async def`). Use async when awaiting other endpoints or async I/O
+- `workers=N` for fixed count, `workers=(min, max)` for auto-scaling range
+- Class workers: model loading in `__init__`, request handling in instance methods
+- Cross-worker calls use `await` — call `@Endpoint`-decorated functions as if local; Flash handles remote dispatch
+- System-level packages (ffmpeg, libgl1) go in `system_dependencies`, not `dependencies`
+- `@Endpoint` is the canonical decorator. `@remote` is the legacy alias
+
+## Common Agent Mistakes
+
+| Mistake | Fix |
+|---------|-----|
+| Writing raw FastAPI instead of `@Endpoint` | Use `@Endpoint` decorator, Flash generates FastAPI |
+| `import torch` at top of file | Move inside function body |
+| Adding deps to `pyproject.toml` only | Add to `@Endpoint(dependencies=[...])` |
+| Forcing `async def` on all endpoints | Both sync and async are valid; use async only when awaiting |
+| Creating `main.py` or `app.py` | Not needed — Flash auto-discovers decorated functions |
+| Using `docker-compose` manually | Use `flash deploy --preview` for local container testing |
+| Wrapping vLLM/TGI/Comfy in a custom handler for no reason | Use `Endpoint(name=..., image=...)` and call via `.post()`/`.run()` — Pattern D |
+| Calling Runpod REST/GraphQL directly | Use `flash` CLI — see top of this file |
diff --git a/src/runpod_flash/rules/__init__.py b/src/runpod_flash/rules/__init__.py
@@ -0,0 +1,67 @@
+"""Flash agent rules — install AGENTS.md and (best-effort) CLAUDE.md symlink."""
+
+from __future__ import annotations
+
+import logging
+import os
+from importlib import resources
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["install_agent_files"]
+
+
+def _read_packaged_agents_md() -> str:
+    try:
+        return (resources.files("runpod_flash.rules") / "AGENTS.md").read_text(
+            encoding="utf-8"
+        )
+    except FileNotFoundError as exc:
+        raise FileNotFoundError(
+            "AGENTS.md not found in runpod_flash.rules package data. "
+            "The installed wheel may be incomplete."
+        ) from exc
+
+
+def install_agent_files(target_dir: Path) -> list[Path]:
+    """Write AGENTS.md and a CLAUDE.md symlink into target_dir if absent.
+
+    Returns the list of paths actually created. Idempotent: if both files
+    exist (or CLAUDE.md already exists in any form), they are left alone.
+
+    Symlink failure (e.g. Windows without developer mode) is non-fatal —
+    AGENTS.md is still written and the failure is logged.
+    """
+    target_dir = Path(target_dir)
+    target_dir.mkdir(parents=True, exist_ok=True)
+    created: list[Path] = []
+
+    agents = target_dir / "AGENTS.md"
+    if agents.is_symlink() and not agents.exists():
+        logger.warning(
+            "AGENTS.md is a broken symlink at %s. Repair manually or remove it.",
+            agents,
+        )
+    elif not agents.exists():
+        agents.write_text(_read_packaged_agents_md(), encoding="utf-8")
+        created.append(agents)
+
+    claude = target_dir / "CLAUDE.md"
+    if claude.is_symlink() and not claude.exists():
+        logger.warning(
+            "CLAUDE.md is a broken symlink at %s. Repair manually or remove it.",
+            claude,
+        )
+    elif not claude.exists():
+        try:
+            os.symlink("AGENTS.md", claude)
+            created.append(claude)
+        except OSError as exc:
+            logger.warning(
+                "Could not create CLAUDE.md symlink (%s). "
+                "Claude Code users can run: ln -s AGENTS.md CLAUDE.md",
+                exc,
+            )
+
+    return created
diff --git a/tests/unit/cli/commands/test_init.py b/tests/unit/cli/commands/test_init.py
@@ -1,5 +1,6 @@
 """Tests for flash init command."""
 
+from pathlib import Path
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
@@ -317,3 +318,26 @@ def test_directory_created_matches_argument(
         assert (tmp_path / "my_awesome_project").exists()
         # Verify it's a directory
         assert (tmp_path / "my_awesome_project").is_dir()
+
+
+class TestInitInstallsAgentFiles:
+    def test_init_calls_install_agent_files(self, tmp_path, monkeypatch):
+        from typer.testing import CliRunner
+
+        from runpod_flash.cli.main import app
+
+        monkeypatch.chdir(tmp_path)
+
+        with (
+            patch("runpod_flash.cli.commands.init.install_agent_files") as mock_install,
+            patch("runpod_flash.cli.commands.init.create_project_skeleton"),
+            patch(
+                "runpod_flash.cli.commands.init.detect_file_conflicts", return_value=[]
+            ),
+        ):
+            result = CliRunner().invoke(app, ["init", "test_project"])
+
+        assert result.exit_code == 0, result.output
+        mock_install.assert_called_once()
+        call_args = mock_install.call_args[0]
+        assert call_args[0] == Path("test_project")
diff --git a/tests/unit/rules/__init__.py b/tests/unit/rules/__init__.py