Generate id per task

jahooma · jahooma · commit 59665e4d00db · 2025-10-11T19:16:37.000-07:00
diff --git a/evals/git-evals2/gen-evals.ts b/evals/git-evals2/gen-evals.ts
@@ -246,6 +246,7 @@ export async function generateEvalFileV2({
           sha: commitSha,
           parentSha,
           spec,
+          id: promptResult.id,
           prompt: promptResult.prompt,
           supplementalFiles: promptResult.supplementalFiles,
           fileDiffs,
diff --git a/evals/git-evals2/migrate-evals-to-v2.ts b/evals/git-evals2/migrate-evals-to-v2.ts
@@ -104,8 +104,10 @@ async function migrateCommit(
       console.log(
         `Supplemental files: ${promptResult.supplementalFiles.length} files`,
       )
+      console.log(`Task ID: ${promptResult.id}`)
 
       return {
+        id: promptResult.id,
         sha: commitSha,
         parentSha,
         spec: oldCommit.spec,
diff --git a/evals/git-evals2/prompt-generator.ts b/evals/git-evals2/prompt-generator.ts
@@ -20,6 +20,11 @@ const promptGeneratorAgentDef: AgentDefinition = {
   outputSchema: {
     type: 'object',
     properties: {
+      id: {
+        type: 'string',
+        description:
+          'Short 2-3 word hyphenated task identifier (e.g., "fix-auth-bug", "add-user-profile", "refactor-login-flow")',
+      },
       reasoning: {
         type: 'string',
         description: 'Your thoughts about what should be in the prompt',
@@ -38,7 +43,7 @@ const promptGeneratorAgentDef: AgentDefinition = {
         description: 'Confidence score 0-1 in the quality of the prompt',
       },
     },
-    required: ['prompt', 'supplementalFiles', 'reasoning', 'confidence'],
+    required: ['id', 'prompt', 'supplementalFiles', 'reasoning', 'confidence'],
   },
   systemPrompt: `You are an expert at analyzing git commits and generating high-level user prompts.
 
@@ -54,8 +59,15 @@ ${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}`,
   instructionsPrompt: `Your task:
 1. Analyze the git diff to understand what changed
 2. Use your tools (read_files, spawn_agents) to explore the codebase and understand context
-3. Identify supplemental files that would help a judge understand the change (exclude directly edited files)
-4. Generate a high-level user prompt that describes WHAT needs to be done (not HOW)
+3. Generate a short, descriptive task ID (2-3 hyphenated words like "fix-auth-bug" or "refactor-login-flow")
+4. Identify supplemental files that would help a judge understand the change (exclude directly edited files)
+5. Generate a high-level user prompt that describes WHAT needs to be done (not HOW)
+
+Key principles for the task ID:
+- 2-3 words maximum, hyphenated (e.g., "fix-memory-leak", "add-user-profile", "refactor-auth-flow")
+- Descriptive but concise
+- Use action verbs when appropriate (fix, add, remove, refactor, update, implement)
+- Lowercase with hyphens
 
 Key principles for the prompt:
 - Focus on the functional requirement, not implementation details
@@ -82,6 +94,7 @@ export async function generatePromptFromCommit({
   }
   agentDefinitions?: any[]
 }): Promise<{
+  id: string
   prompt: string
   supplementalFiles: string[]
   confidence: number
@@ -117,6 +130,7 @@ export async function generatePromptFromCommit({
   }
 
   return generatorResult.output.value as {
+    id: string
     prompt: string
     supplementalFiles: string[]
     reasoning: string
diff --git a/evals/git-evals2/types.ts b/evals/git-evals2/types.ts
@@ -29,6 +29,7 @@ export interface FileDiff {
 }
 
 export interface EvalCommitV2 {
+  id: string
   sha: string
   parentSha: string
   spec: string

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,7 @@ export interface FileDiff {`
`29`	`29`	`}`
`30`	`30`
`31`	`31`	`export interface EvalCommitV2 {`
	`32`	`+ id: string`
`32`	`33`	`sha: string`
`33`	`34`	`parentSha: string`
`34`	`35`	`spec: string`