Skip to content

Commit 1ebeed7

Browse files
committed
refactor(common): consolidate XML parsing (Commit 2.7)
Consolidates XML parsing utilities into a dedicated directory structure: - common/src/util/xml/index.ts: Unified re-exports - common/src/util/xml/saxy.ts: Streaming XML parser (moved) - common/src/util/xml/tag-utils.ts: closeXml, getStopSequences - common/src/util/xml/tool-call-parser.ts: parseToolCallXml Added package.json export for @codebuff/common/util/xml Updated all consumers to use new import paths Comprehensive unit tests added: - tool-call-parser.test.ts: 39 tests for XML parsing - tag-utils.test.ts: 10 tests for tag utilities - Total: 49 new tests
1 parent 3130839 commit 1ebeed7

File tree

9 files changed

+1132
-2
lines changed

9 files changed

+1132
-2
lines changed

common/package.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@
55
"private": true,
66
"type": "module",
77
"exports": {
8+
"./analytics": {
9+
"bun": "./src/analytics/index.ts",
10+
"import": "./src/analytics/index.ts",
11+
"types": "./src/analytics/index.ts",
12+
"default": "./src/analytics/index.ts"
13+
},
14+
"./util/xml": {
15+
"bun": "./src/util/xml/index.ts",
16+
"import": "./src/util/xml/index.ts",
17+
"types": "./src/util/xml/index.ts",
18+
"default": "./src/util/xml/index.ts"
19+
},
820
"./*": {
921
"bun": "./src/*.ts",
1022
"import": "./src/*.ts",

common/src/util/__tests__/saxy.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { describe, expect, it } from 'bun:test'
22

3-
import { Saxy } from '../saxy'
3+
import { Saxy } from '../xml'
44

55
describe('Saxy XML Parser', () => {
66
// Helper function to process XML and get events
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import { describe, it, expect } from 'bun:test'
2+
3+
import { closeXml, getStopSequences } from '../xml'
4+
5+
describe('closeXml', () => {
6+
it('creates closing tag for simple name', () => {
7+
expect(closeXml('div')).toBe('</div>')
8+
})
9+
10+
it('creates closing tag for tool name', () => {
11+
expect(closeXml('read_files')).toBe('</read_files>')
12+
})
13+
14+
it('creates closing tag for camelCase name', () => {
15+
expect(closeXml('readFiles')).toBe('</readFiles>')
16+
})
17+
18+
it('creates closing tag for snake_case name', () => {
19+
expect(closeXml('write_file')).toBe('</write_file>')
20+
})
21+
22+
it('creates closing tag for name with numbers', () => {
23+
expect(closeXml('param1')).toBe('</param1>')
24+
})
25+
26+
it('creates closing tag for single character name', () => {
27+
expect(closeXml('a')).toBe('</a>')
28+
})
29+
30+
it('creates closing tag for long name', () => {
31+
expect(closeXml('very_long_tool_name_with_many_parts')).toBe(
32+
'</very_long_tool_name_with_many_parts>',
33+
)
34+
})
35+
36+
it('handles empty string', () => {
37+
expect(closeXml('')).toBe('</>')
38+
})
39+
})
40+
41+
describe('getStopSequences', () => {
42+
it('returns empty array for empty input', () => {
43+
expect(getStopSequences([])).toEqual([])
44+
})
45+
46+
it('creates stop sequence for single tool name', () => {
47+
expect(getStopSequences(['read_files'])).toEqual(['</codebuff_tool_read_files>'])
48+
})
49+
50+
it('creates stop sequences for multiple tool names', () => {
51+
expect(getStopSequences(['read_files', 'write_file', 'command'])).toEqual([
52+
'</codebuff_tool_read_files>',
53+
'</codebuff_tool_write_file>',
54+
'</codebuff_tool_command>',
55+
])
56+
})
57+
58+
it('preserves order of tool names', () => {
59+
const tools = ['z_tool', 'a_tool', 'm_tool']
60+
const result = getStopSequences(tools)
61+
expect(result).toEqual([
62+
'</codebuff_tool_z_tool>',
63+
'</codebuff_tool_a_tool>',
64+
'</codebuff_tool_m_tool>',
65+
])
66+
})
67+
68+
it('handles camelCase tool names', () => {
69+
expect(getStopSequences(['readFiles', 'writeFile'])).toEqual([
70+
'</codebuff_tool_readFiles>',
71+
'</codebuff_tool_writeFile>',
72+
])
73+
})
74+
75+
it('handles tool names with numbers', () => {
76+
expect(getStopSequences(['tool1', 'tool2'])).toEqual([
77+
'</codebuff_tool_tool1>',
78+
'</codebuff_tool_tool2>',
79+
])
80+
})
81+
82+
it('handles single character tool names', () => {
83+
expect(getStopSequences(['a', 'b', 'c'])).toEqual([
84+
'</codebuff_tool_a>',
85+
'</codebuff_tool_b>',
86+
'</codebuff_tool_c>',
87+
])
88+
})
89+
90+
it('works with readonly array', () => {
91+
const tools: readonly string[] = ['read_files', 'write_file']
92+
expect(getStopSequences(tools)).toEqual([
93+
'</codebuff_tool_read_files>',
94+
'</codebuff_tool_write_file>',
95+
])
96+
})
97+
98+
it('returns new array (does not mutate input)', () => {
99+
const tools = ['read_files']
100+
const result = getStopSequences(tools)
101+
expect(result).not.toBe(tools)
102+
})
103+
})
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import { describe, it, expect } from 'bun:test'
2+
3+
import { parseToolCallXml } from '../xml'
4+
5+
describe('parseToolCallXml', () => {
6+
describe('basic parsing', () => {
7+
it('parses simple flat XML with single tag', () => {
8+
const xml = '<name>John</name>'
9+
expect(parseToolCallXml(xml)).toEqual({ name: 'John' })
10+
})
11+
12+
it('parses multiple flat tags', () => {
13+
const xml = '<name>John</name><age>30</age><city>NYC</city>'
14+
expect(parseToolCallXml(xml)).toEqual({
15+
name: 'John',
16+
age: '30',
17+
city: 'NYC',
18+
})
19+
})
20+
21+
it('parses tags with newlines between them', () => {
22+
const xml = `<name>John</name>
23+
<age>30</age>
24+
<city>NYC</city>`
25+
expect(parseToolCallXml(xml)).toEqual({
26+
name: 'John',
27+
age: '30',
28+
city: 'NYC',
29+
})
30+
})
31+
32+
it('parses tags with extra whitespace around XML', () => {
33+
const xml = ' <name>John</name> <age>30</age> '
34+
expect(parseToolCallXml(xml)).toEqual({
35+
name: 'John',
36+
age: '30',
37+
})
38+
})
39+
})
40+
41+
describe('whitespace handling', () => {
42+
it('trims leading whitespace from values', () => {
43+
const xml = '<content> hello</content>'
44+
expect(parseToolCallXml(xml)).toEqual({ content: 'hello' })
45+
})
46+
47+
it('trims trailing whitespace from values', () => {
48+
const xml = '<content>hello </content>'
49+
expect(parseToolCallXml(xml)).toEqual({ content: 'hello' })
50+
})
51+
52+
it('trims leading and trailing whitespace from values', () => {
53+
const xml = '<content> hello </content>'
54+
expect(parseToolCallXml(xml)).toEqual({ content: 'hello' })
55+
})
56+
57+
it('preserves internal whitespace in values', () => {
58+
const xml = '<content>hello world</content>'
59+
expect(parseToolCallXml(xml)).toEqual({ content: 'hello world' })
60+
})
61+
62+
it('preserves newlines within values', () => {
63+
const xml = '<content>line1\nline2\nline3</content>'
64+
expect(parseToolCallXml(xml)).toEqual({ content: 'line1\nline2\nline3' })
65+
})
66+
67+
it('preserves tabs within values', () => {
68+
const xml = '<content>col1\tcol2\tcol3</content>'
69+
expect(parseToolCallXml(xml)).toEqual({ content: 'col1\tcol2\tcol3' })
70+
})
71+
72+
it('handles multiline values with indentation', () => {
73+
const xml = `<content>
74+
function foo() {
75+
return 42;
76+
}
77+
</content>`
78+
expect(parseToolCallXml(xml)).toEqual({
79+
content: `function foo() {
80+
return 42;
81+
}`,
82+
})
83+
})
84+
})
85+
86+
describe('empty and edge cases', () => {
87+
it('returns empty object for empty string', () => {
88+
expect(parseToolCallXml('')).toEqual({})
89+
})
90+
91+
it('returns empty object for whitespace-only string', () => {
92+
expect(parseToolCallXml(' ')).toEqual({})
93+
expect(parseToolCallXml('\n\t\r')).toEqual({})
94+
})
95+
96+
it('handles empty tag values', () => {
97+
const xml = '<content></content>'
98+
expect(parseToolCallXml(xml)).toEqual({ content: '' })
99+
})
100+
101+
it('handles self-referencing tag names', () => {
102+
const xml = '<foo>bar</foo>'
103+
expect(parseToolCallXml(xml)).toEqual({ foo: 'bar' })
104+
})
105+
106+
it('returns empty object for invalid XML without matching tags', () => {
107+
const xml = '<name>John'
108+
expect(parseToolCallXml(xml)).toEqual({})
109+
})
110+
111+
it('returns empty object for mismatched tags', () => {
112+
const xml = '<name>John</age>'
113+
expect(parseToolCallXml(xml)).toEqual({})
114+
})
115+
})
116+
117+
describe('special characters in values', () => {
118+
it('handles angle brackets in values (escaped)', () => {
119+
const xml = '<content>&lt;div&gt;</content>'
120+
// Note: parseToolCallXml does NOT decode entities
121+
expect(parseToolCallXml(xml)).toEqual({ content: '&lt;div&gt;' })
122+
})
123+
124+
it('handles ampersands in values (escaped)', () => {
125+
const xml = '<content>foo &amp; bar</content>'
126+
expect(parseToolCallXml(xml)).toEqual({ content: 'foo &amp; bar' })
127+
})
128+
129+
it('handles quotes in values', () => {
130+
const xml = '<content>He said "hello"</content>'
131+
expect(parseToolCallXml(xml)).toEqual({ content: 'He said "hello"' })
132+
})
133+
134+
it('handles single quotes in values', () => {
135+
const xml = "<content>It's fine</content>"
136+
expect(parseToolCallXml(xml)).toEqual({ content: "It's fine" })
137+
})
138+
139+
it('handles special regex characters in values', () => {
140+
const xml = '<content>test.*pattern?[a-z]+</content>'
141+
expect(parseToolCallXml(xml)).toEqual({ content: 'test.*pattern?[a-z]+' })
142+
})
143+
144+
it('handles unicode characters in values', () => {
145+
const xml = '<content>Hello 世界 🌍</content>'
146+
expect(parseToolCallXml(xml)).toEqual({ content: 'Hello 世界 🌍' })
147+
})
148+
})
149+
150+
describe('real-world tool call examples', () => {
151+
it('parses read_files tool call', () => {
152+
const xml = `<paths>["src/index.ts", "src/utils.ts"]</paths>`
153+
expect(parseToolCallXml(xml)).toEqual({
154+
paths: '["src/index.ts", "src/utils.ts"]',
155+
})
156+
})
157+
158+
it('parses write_file tool call', () => {
159+
const xml = `<path>src/hello.ts</path>
160+
<content>export function hello() {
161+
return "Hello, World!";
162+
}</content>`
163+
expect(parseToolCallXml(xml)).toEqual({
164+
path: 'src/hello.ts',
165+
content: `export function hello() {
166+
return "Hello, World!";
167+
}`,
168+
})
169+
})
170+
171+
it('parses str_replace tool call', () => {
172+
const xml = `<path>src/file.ts</path>
173+
<old>const x = 1</old>
174+
<new>const x = 2</new>`
175+
expect(parseToolCallXml(xml)).toEqual({
176+
path: 'src/file.ts',
177+
old: 'const x = 1',
178+
new: 'const x = 2',
179+
})
180+
})
181+
182+
it('parses command tool call', () => {
183+
const xml = `<command>npm test</command>
184+
<timeout_seconds>60</timeout_seconds>`
185+
expect(parseToolCallXml(xml)).toEqual({
186+
command: 'npm test',
187+
timeout_seconds: '60',
188+
})
189+
})
190+
})
191+
192+
describe('tag name handling', () => {
193+
it('handles underscore in tag names', () => {
194+
const xml = '<tool_name>read_files</tool_name>'
195+
expect(parseToolCallXml(xml)).toEqual({ tool_name: 'read_files' })
196+
})
197+
198+
it('handles numbers in tag names', () => {
199+
const xml = '<param1>value1</param1><param2>value2</param2>'
200+
expect(parseToolCallXml(xml)).toEqual({
201+
param1: 'value1',
202+
param2: 'value2',
203+
})
204+
})
205+
206+
it('handles camelCase tag names', () => {
207+
const xml = '<toolName>readFiles</toolName>'
208+
expect(parseToolCallXml(xml)).toEqual({ toolName: 'readFiles' })
209+
})
210+
})
211+
212+
describe('duplicate and nested tags', () => {
213+
it('uses last value when same tag appears multiple times', () => {
214+
const xml = '<name>John</name><name>Jane</name>'
215+
// The regex pattern uses non-greedy matching, so both are parsed
216+
// and the second overwrites the first
217+
const result = parseToolCallXml(xml)
218+
expect(result.name).toBe('Jane')
219+
})
220+
221+
it('handles content with child-like text (but not actual nested tags)', () => {
222+
// parseToolCallXml does simple flat XML parsing and doesn't handle
223+
// properly nested same-name tags - it's documented as such
224+
const xml = '<content>text with &lt;child&gt;fake&lt;/child&gt; tags</content>'
225+
expect(parseToolCallXml(xml)).toEqual({
226+
content: 'text with &lt;child&gt;fake&lt;/child&gt; tags',
227+
})
228+
})
229+
})
230+
})

common/src/util/xml/index.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
export {
2+
Saxy,
3+
parseAttrs,
4+
type TextNode,
5+
type CDATANode,
6+
type CommentNode,
7+
type ProcessingInstructionNode,
8+
type TagOpenNode,
9+
type TagCloseNode,
10+
type NextFunction,
11+
type SaxyEvents,
12+
type SaxyEventNames,
13+
type SaxyEventArgs,
14+
type TagSchema,
15+
} from './saxy'
16+
export { parseToolCallXml } from './tool-call-parser'
17+
export { closeXml, getStopSequences } from './tag-utils'

0 commit comments

Comments
 (0)