getsentry · dcramer · May 5, 2026 · May 5, 2026 · devin-ai-integration · May 5, 2026
diff --git a/packages/junior-evals/README.md b/packages/junior-evals/README.md
@@ -147,13 +147,15 @@ Good conversational evals should:
 - Use concrete real-world scenarios (incident updates, planning follow-ups, capability setup requests), not abstract mechanics like "posted two replies."
 - Use judge criteria written in product language, not implementation language.
 - Use rubric sections that are easy for maintainers to scan in a failure: one `contract`, a short `pass` list, and focused `allow` / `fail` lists only when needed.
+- Use fake/nonexistent external targets unless the eval explicitly opts into live provider access.
 - Cover realistic failure behavior (clear user-visible errors) without depending on internal tool wiring.
 - Keep eval output payload user-facing (assistant posts + Slack-visible metadata), excluding low-level tool-call traces.
 
 Avoid:
 
 - Criteria tied to exact internal tool call names (`bash`, etc.) when user-visible behavior is what matters.
 - User prompts that prescribe exact internal commands or tool choices just to force the desired path.
+- Prompts that can hit random external URLs or mutate real provider resources for a behavior that can be tested with fake references.
 - Cases that only validate mocks or internal state transitions without conversational context.
 
 ## Minimal Case

diff --git a/packages/junior-evals/evals/github/skill-workflows.eval.ts b/packages/junior-evals/evals/github/skill-workflows.eval.ts
@@ -82,7 +82,7 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => {
       },
       events: [
         threadMessage(
-          "Warden resolved its own review thread on https://github.com/getsentry/ops/pull/20366 even though the warning still applies. The warning was about `SCM_RPC_SHARED_SECRET` not being backported to the cookiecutter template, and the PR still shows `REVIEW_REQUIRED`.",
+          "Warden resolved its own review thread on getsentry/junior-eval-ops-reference-never-exists#20366 even though the warning still applies. The warning was about `SCM_RPC_SHARED_SECRET` not being backported to the cookiecutter template, and the PR still shows `REVIEW_REQUIRED`.",
           {
             thread: reporterRequesterThread,
             author: {
@@ -93,7 +93,7 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => {
           },
         ),
         mention(
-          "Create a GitHub issue for this in getsentry/warden. Include the issue body you filed in your reply so I can verify attribution.",
+          "Create a GitHub issue for this in getsentry/junior-eval-warden-never-exists. Include the issue body you filed in your reply so I can verify attribution.",
           {
             thread: reporterRequesterThread,
             author: {
@@ -109,7 +109,7 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => {
           "GitHub issue creation from a multi-user Slack thread preserves the original reporter separately from the action requester.",
         pass: [
           "The assistant posts exactly one reply.",
-          "The reply reports a created GitHub issue in getsentry/warden with an issue URL or issue number.",
+          "The reply reports a created GitHub issue in getsentry/junior-eval-warden-never-exists with an issue URL or issue number.",
           "The reply includes the filed issue body or enough quoted issue content to verify attribution.",
           "The shown issue content attributes the report to Bojan Oro.",
           "The shown issue content ends its delegated-action footer with `Action taken on behalf of David Cramer.`",
@@ -230,6 +230,16 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => {
     channel_id: "C-default-repo-issue",
     thread_ts: "17000000.default-repo-issue",
   };
+  const targetClassificationContextThread = {
+    id: "thread-target-classification-context",
+    channel_id: "C-target-classification-context",
+    thread_ts: "17000000.target-classification-context",
+  };
+  const targetClassificationExplicitThread = {
+    id: "thread-target-classification-explicit",
+    channel_id: "C-target-classification-explicit",
+    thread_ts: "17000000.target-classification-explicit",
+  };
 
   it("when creating an issue after repo setup, use the stored repo without inventing tool failures", async ({
     run,
@@ -316,4 +326,95 @@ describeEval("GitHub Skill Workflows", slackEvals, (it) => {
       }),
     });
   });
+
+  it("when drafting a fake issue from contextual foreign reference, keep the default repo as target", async ({
+    run,
+  }) => {
+    await run({
+      overrides: {
+        enable_test_credentials: true,
+        plugin_packages: ["@sentry/junior-github"],
+        reply_timeout_ms: 75_000,
+        test_credential_token: "eval-target-classification-context-token",
+        skill_dirs: ["../junior/skills"],
+      },
+      events: [
+        threadMessage(
+          "Set the default repo to getsentry/junior-eval-bot-never-exists for this channel. Do not verify it exists.",
+          {
+            thread: targetClassificationContextThread,
+            is_mention: true,
+          },
+        ),
+        threadMessage(
+          "We need a tracking issue for the Junior bot. This example from getsentry/junior-eval-reference-never-exists#123 shows GitHub issue references can be mistaken for the target repo. Draft the issue I should approve with target repo, title, and body. Do not run GitHub commands.",
+          {
+            thread: targetClassificationContextThread,
+            is_mention: true,
+          },
+        ),
+      ],
+      criteria: rubric({
+        contract:
+          "Draft a fake issue against the default repo while keeping the fake foreign issue reference as context.",
+        pass: [
+          "The assistant posts exactly two replies in order.",
+          "The first reply confirms default repo setup for getsentry/junior-eval-bot-never-exists.",
+          "The second reply says the target repo is getsentry/junior-eval-bot-never-exists.",
+          "The second reply treats getsentry/junior-eval-reference-never-exists#123 as context or a reference.",
+          "observed_tool_invocations does not include a bash invocation with `gh issue create`, `gh issue comment`, or `gh issue view`.",
+        ],
+        fail: [
+          "Do not choose getsentry/junior-eval-reference-never-exists as the action target.",
+          "Do not run GitHub commands against either fake repo.",
+          "Do not ask the user to provide the repo again.",
+        ],
+      }),
+    });
+  });
+
+  it("when confirming a fake explicit issue reference, use that issue as target", async ({
+    run,
+  }) => {
+    await run({
+      overrides: {
+        enable_test_credentials: true,
+        plugin_packages: ["@sentry/junior-github"],
+        reply_timeout_ms: 75_000,
+        test_credential_token: "eval-target-classification-explicit-token",
+        skill_dirs: ["../junior/skills"],
+      },
+      events: [
+        threadMessage(
+          "Set the default repo to getsentry/junior-eval-bot-never-exists for this channel. Do not verify it exists.",
+          {
+            thread: targetClassificationExplicitThread,
+            is_mention: true,
+          },
+        ),
+        threadMessage(
+          "Before I approve a later comment, confirm the target issue for getsentry/junior-eval-reference-never-exists#123. Do not run GitHub commands.",
+          {
+            thread: targetClassificationExplicitThread,
+            is_mention: true,
+          },
+        ),
+      ],
+      criteria: rubric({
+        contract:
+          "Confirm the explicitly referenced issue as target even when a default repo is set.",
+        pass: [
+          "The assistant posts exactly two replies in order.",
+          "The first reply confirms default repo setup for getsentry/junior-eval-bot-never-exists.",
+          "The second reply says the action target would be getsentry/junior-eval-reference-never-exists#123 or repo getsentry/junior-eval-reference-never-exists.",
+          "observed_tool_invocations does not include a bash invocation with `gh issue create`, `gh issue comment`, or `gh issue view`.",
+        ],
+        fail: [
+          "Do not choose getsentry/junior-eval-bot-never-exists as the action target.",
+          "Do not run GitHub commands against either fake repo.",
+          "Do not ask the user to restate the repository or issue number.",
+        ],
+      }),
+    });
+  });
 });
diff --git a/packages/junior-github/skills/github-code/SKILL.md b/packages/junior-github/skills/github-code/SKILL.md
@@ -20,7 +20,8 @@ Repository checkout, source-code investigation, and pull request operations via
 ### 1. Resolve operation and target
 
 - Determine whether the task is `clone`, `source-code investigation`, a pull request inspection (`view`, `list`, `diff`, `checks`), or a pull request mutation (`create`, `update`, `close`, `merge`).
-- Resolve repository (`owner/repo`). If not explicit, query channel config with `jr-rpc config get github.repo` before running any `gh` or `git` command. If still missing, ask the user.
+- Resolve repository from the requested action: explicit target wins; otherwise use `<configuration>` `github.repo`. If absent, run standalone `jr-rpc config get github.repo`.
+- Preserve non-target GitHub references when they materially support the work.
 - Run `jr-rpc config get github.repo` as its own bash command. Do not combine it with `cd`, `&&`, pipes, or any `gh` or `git` command.
 - After resolving a configured repo, pass it explicitly to the next `gh` command with `--repo owner/repo`; do not rely on implicit GitHub CLI repository discovery.
 - Resolve the pull request number for operations targeting an existing PR.

diff --git a/packages/junior-github/skills/github-issues/SKILL.md b/packages/junior-github/skills/github-issues/SKILL.md
@@ -22,7 +22,8 @@ Use only for GitHub issues. For pull requests, branches, pushes, or PR auth-orde
 ### 1. Resolve operation and target
 
 - Determine whether the task is `create`, `update`, `comment`, `labels`, `state`, or read-only inspection.
-- Resolve repository (`owner/repo`). If not explicit, query channel config with `jr-rpc config get github.repo` before running any `gh` command. If still missing, ask the user.
+- Resolve repository from the requested action: explicit target wins; otherwise use `<configuration>` `github.repo`. If absent, run standalone `jr-rpc config get github.repo`.
+- Preserve non-target GitHub references that materially support created issue or comment bodies.
 - Run `jr-rpc config get github.repo` as its own bash command. Do not combine it with `cd`, `&&`, pipes, or any `gh` command.
 - After resolving a configured repo, pass it explicitly to the next `gh` command with `--repo owner/repo`; do not rely on implicit GitHub CLI repository discovery.
 - Resolve the issue number for non-create operations.
@@ -57,6 +58,7 @@ Follow [references/research-rules.md](references/research-rules.md) for cross-ty
 - Generalize session framing — strip channel references, slash commands, Slack thread IDs, user @mentions, and transcript fragments; replace with the underlying technical problem.
 - Compress source material. Research notes, hypotheses, or transcripts become a short summary + scoped bullets — never paste raw investigation into the body.
 - Do not add desired outcome, expected behavior, or acceptance criteria unless the thread explicitly requests them.
+- Preserve material source references inline.
 - When the request originated from a Slack thread or any on-behalf-of context, append a final line `Action taken on behalf of <name>.` using the action requester's real name. The action requester is the current `<requester>` or the person who explicitly asked you to create/update the issue, not necessarily the original reporter.
 
 **Attribution:**

diff --git a/packages/junior/src/chat/prompt.ts b/packages/junior/src/chat/prompt.ts
@@ -346,6 +346,7 @@ const HEADER =
 const TOOL_POLICY_RULES = [
   "- Tool schemas are the source of truth for parameters; tool names are case-sensitive, so call tools exactly by their exposed names and do not invent arguments.",
   "- Use tools for actionable work and for facts that are mutable, external, repository-backed, provider-backed, or requested as verified/current. Stable general knowledge and already-provided context may be answered directly.",
+  "- Resolve provider action targets before calls: explicit target wins; ambient `<configuration>` fills omitted targets. Treat non-target links/references as context.",
   "- Verification source order: conversation/thread context; user-provided attachments, links, and reference files; local/sandbox files when present; loaded skill references; repository/provider tools; public web. Use the nearest authoritative available source before weaker sources.",
   "- For repository or implementation questions, inspect the target repository first: local checkout when present, otherwise the configured GitHub/source provider. Do not treat loaded skill files as repo source unless the user asks about the skill. Cite file paths, symbols, PRs/issues, commits, or URLs that support the answer.",
   `- Sandbox-backed file and shell tools operate in an isolated workspace rooted at ${SANDBOX_WORKSPACE_ROOT}; readFile/writeFile paths are sandbox-workspace paths, bash runs inside that workspace, and attachFile accepts absolute or workspace-relative sandbox paths.`,
@@ -531,7 +532,7 @@ function buildContextSection(params: {
   if (configLines) {
     blocks.push(
       renderTag("configuration", [
-        "Install and conversation-scoped defaults. Channel overrides take precedence; follow explicit user input when it conflicts.",
+        "Ambient provider defaults; explicit targets win.",
         ...configLines,
       ]),
     );