Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 17 additions & 1 deletion packages/core/src/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,16 @@ function sanitizeMessage(text: string, userId: string, ownerId: string | undefin
return text;
}

/**
* Strip characters from a userId that could break the sender-identity marker
* format or serve as a prompt-injection vector when embedded in a system prompt.
* Backticks, square brackets, and newlines are removed so a crafted userId
* cannot escape the marker or inject additional instructions.
*/
function sanitizeUserIdForPrompt(userId: string): string {
return userId.replace(/[`\[\]\n\r]/g, '');
}

// ---------------------------------------------------------------------------
// Shield — in-memory pending approvals (conversational flow)
// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -873,8 +883,14 @@ export async function agentLoop(
const sanitizedMessage = sanitizeMessage(message, userId, context.ownerId);

// Build messages
// Embed sender identity directly in the system prompt so the LLM can
// correctly apply owner-vs-friend rules for this entire turn (including any
// follow-up tool-result messages). Placing it in a separate system message
// would leave subsequent tool-follow-up user messages without a sender marker
// and would shift message indices expected by tests.
const senderIdentityPrompt = `\n\n[Current message sender: userId = \`${sanitizeUserIdForPrompt(userId)}\`]`;
const messages: Message[] = [
{ role: 'system', content: systemPrompt },
{ role: 'system', content: systemPrompt + senderIdentityPrompt },
...history,
{ role: 'user', content: sanitizedMessage },
Comment thread
warengonzaga marked this conversation as resolved.
Comment thread
warengonzaga marked this conversation as resolved.
];
Comment thread
warengonzaga marked this conversation as resolved.
Expand Down
10 changes: 10 additions & 0 deletions packages/core/tests/loop.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ describe('agentLoop', () => {
expect(systemPrompt).toContain('## Plugin Setup Guidance');
expect(systemPrompt).toContain('For Discord, explain that they need to create an application');
expect(systemPrompt).toContain('do not pretend the plugin is configured');
// Sender-identity must be embedded in the system prompt so the LLM always
// knows who sent the message — verify the marker is present and the very
// next message is the user turn (no separate system message in between).
expect(systemPrompt).toContain('[Current message sender: userId = `web:test`]');
expect(firstPrompt.at(-1)?.role).toBe('user');
});

test('turns structured write tool calls into a natural final reply', async () => {
Expand Down Expand Up @@ -113,6 +118,11 @@ describe('agentLoop', () => {

expect(result).toBe('I refreshed the configuration. Please restart Tiny Claw when convenient.');
expect(prompts).toHaveLength(2);
// Sender-identity is embedded in the system prompt (prompts[0][0]) and
// immediately followed by the user message — no separate system entry.
expect(prompts[0]?.[0]?.role).toBe('system');
expect(prompts[0]?.[0]?.content).toContain('[Current message sender: userId = `web:test`]');
expect(prompts[0]?.at(-1)?.role).toBe('user');
expect(prompts[1]?.at(-2)?.role).toBe('assistant');
expect(prompts[1]?.at(-2)?.content).toContain('I used these tools and the results were:');
expect(prompts[1]?.at(-2)?.content).toContain('Restart required: refresh config');
Expand Down
Loading