Skip to content

Commit f8fca67

Browse files
committed
feat: deduplicate multi-artifact package results
PyPI packages like numpy return one NDJSON line per artifact (sdist, wheels for each platform). This floods the agent with duplicate results for the same package. - Add lib/artifacts.ts with deduplicateArtifacts() that groups results by (type, namespace, name, version) and selects one representative per group (source dist > universal wheel > first artifact) - Add optional `platform` parameter to depscore tool for agents that can detect the user's OS/arch (e.g. 'darwin-arm64', 'linux-x64') - Filter out purlError/summary NDJSON lines before processing - Add 18 unit tests for deduplication and platform matching - Add 2 integration tests for numpy deduplication with/without platform Made-with: Cursor
1 parent eb50c0c commit f8fca67

4 files changed

Lines changed: 313 additions & 4 deletions

File tree

artifacts.test.ts

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#!/usr/bin/env node
2+
import { test } from 'node:test'
3+
import assert from 'node:assert'
4+
import { deduplicateArtifacts } from './lib/artifacts.ts'
5+
import type { ArtifactData } from './lib/artifacts.ts'
6+
7+
function makeArtifact (overrides: Partial<ArtifactData> = {}): ArtifactData {
8+
return {
9+
type: 'pypi',
10+
name: 'numpy',
11+
version: '1.26.0',
12+
score: { overall: 0.95, supply_chain: 0.9, quality: 0.8, maintenance: 0.85, vulnerability: 1.0, license: 1.0 },
13+
...overrides
14+
}
15+
}
16+
17+
test('deduplicateArtifacts', async (t) => {
18+
await t.test('single artifact passes through unchanged', () => {
19+
const artifacts = [makeArtifact({ release: 'numpy-1.26.0.tar.gz' })]
20+
const result = deduplicateArtifacts(artifacts)
21+
assert.strictEqual(result.length, 1)
22+
assert.strictEqual(result[0]!.release, 'numpy-1.26.0.tar.gz')
23+
})
24+
25+
await t.test('multiple artifacts for same package are deduplicated to one', () => {
26+
const artifacts = [
27+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' }),
28+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-macosx_14_0_arm64.whl' }),
29+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-win_amd64.whl' }),
30+
makeArtifact({ release: 'numpy-1.26.0.tar.gz' }),
31+
]
32+
const result = deduplicateArtifacts(artifacts)
33+
assert.strictEqual(result.length, 1)
34+
})
35+
36+
await t.test('source dist is preferred over wheels when no platform specified', () => {
37+
const artifacts = [
38+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.whl' }),
39+
makeArtifact({ release: 'numpy-1.26.0.tar.gz' }),
40+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-win_amd64.whl' }),
41+
]
42+
const result = deduplicateArtifacts(artifacts)
43+
assert.strictEqual(result.length, 1)
44+
assert.strictEqual(result[0]!.release, 'numpy-1.26.0.tar.gz')
45+
})
46+
47+
await t.test('universal wheel is preferred when no sdist available', () => {
48+
const artifacts = [
49+
makeArtifact({ release: 'requests-2.31.0-cp312-cp312-manylinux_2_17_x86_64.whl' }),
50+
makeArtifact({ release: 'requests-2.31.0-py3-none-any.whl' }),
51+
makeArtifact({ release: 'requests-2.31.0-cp312-cp312-win_amd64.whl' }),
52+
]
53+
const result = deduplicateArtifacts(artifacts)
54+
assert.strictEqual(result.length, 1)
55+
assert.strictEqual(result[0]!.release, 'requests-2.31.0-py3-none-any.whl')
56+
})
57+
58+
await t.test('platform hint selects darwin-arm64 wheel', () => {
59+
const artifacts = [
60+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.whl' }),
61+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-macosx_14_0_arm64.whl' }),
62+
makeArtifact({ release: 'numpy-1.26.0.tar.gz' }),
63+
]
64+
const result = deduplicateArtifacts(artifacts, 'darwin-arm64')
65+
assert.strictEqual(result.length, 1)
66+
assert.strictEqual(result[0]!.release, 'numpy-1.26.0-cp312-cp312-macosx_14_0_arm64.whl')
67+
})
68+
69+
await t.test('platform hint selects linux-x64 wheel', () => {
70+
const artifacts = [
71+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' }),
72+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-macosx_14_0_arm64.whl' }),
73+
makeArtifact({ release: 'numpy-1.26.0.tar.gz' }),
74+
]
75+
const result = deduplicateArtifacts(artifacts, 'linux-x64')
76+
assert.strictEqual(result.length, 1)
77+
assert.strictEqual(result[0]!.release, 'numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl')
78+
})
79+
80+
await t.test('platform hint selects win32-x64 wheel', () => {
81+
const artifacts = [
82+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.whl' }),
83+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-win_amd64.whl' }),
84+
makeArtifact({ release: 'numpy-1.26.0.tar.gz' }),
85+
]
86+
const result = deduplicateArtifacts(artifacts, 'win32-x64')
87+
assert.strictEqual(result.length, 1)
88+
assert.strictEqual(result[0]!.release, 'numpy-1.26.0-cp312-cp312-win_amd64.whl')
89+
})
90+
91+
await t.test('platform hint with no match falls back to source dist', () => {
92+
const artifacts = [
93+
makeArtifact({ release: 'numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.whl' }),
94+
makeArtifact({ release: 'numpy-1.26.0.tar.gz' }),
95+
]
96+
const result = deduplicateArtifacts(artifacts, 'win32-x64')
97+
assert.strictEqual(result.length, 1)
98+
assert.strictEqual(result[0]!.release, 'numpy-1.26.0.tar.gz')
99+
})
100+
101+
await t.test('different packages are not deduplicated', () => {
102+
const artifacts = [
103+
makeArtifact({ name: 'numpy', release: 'numpy-1.26.0.tar.gz' }),
104+
makeArtifact({ name: 'scipy', release: 'scipy-1.11.0.tar.gz' }),
105+
]
106+
const result = deduplicateArtifacts(artifacts)
107+
assert.strictEqual(result.length, 2)
108+
})
109+
110+
await t.test('different versions of same package are not deduplicated', () => {
111+
const artifacts = [
112+
makeArtifact({ version: '1.26.0', release: 'numpy-1.26.0.tar.gz' }),
113+
makeArtifact({ version: '1.25.0', release: 'numpy-1.25.0.tar.gz' }),
114+
]
115+
const result = deduplicateArtifacts(artifacts)
116+
assert.strictEqual(result.length, 2)
117+
})
118+
119+
await t.test('artifacts without release field use first-in-group', () => {
120+
const a1 = makeArtifact({ score: { overall: 0.9 } })
121+
const a2 = makeArtifact({ score: { overall: 0.8 } })
122+
const result = deduplicateArtifacts([a1, a2])
123+
assert.strictEqual(result.length, 1)
124+
assert.deepStrictEqual(result[0]!.score, { overall: 0.9 })
125+
})
126+
127+
await t.test('works across different ecosystems', () => {
128+
const artifacts = [
129+
makeArtifact({ type: 'npm', name: 'express', version: '4.18.2' }),
130+
makeArtifact({ type: 'pypi', name: 'numpy', version: '1.26.0', release: 'numpy-1.26.0-cp312-manylinux_x86_64.whl' }),
131+
makeArtifact({ type: 'pypi', name: 'numpy', version: '1.26.0', release: 'numpy-1.26.0.tar.gz' }),
132+
]
133+
const result = deduplicateArtifacts(artifacts)
134+
assert.strictEqual(result.length, 2)
135+
const types = result.map(r => r.type)
136+
assert.ok(types.includes('npm'))
137+
assert.ok(types.includes('pypi'))
138+
})
139+
140+
await t.test('zip source distributions are recognized', () => {
141+
const artifacts = [
142+
makeArtifact({ release: 'package-1.0.0-cp312-win_amd64.whl' }),
143+
makeArtifact({ release: 'package-1.0.0.zip' }),
144+
]
145+
const result = deduplicateArtifacts(artifacts)
146+
assert.strictEqual(result.length, 1)
147+
assert.strictEqual(result[0]!.release, 'package-1.0.0.zip')
148+
})
149+
150+
await t.test('darwin-x64 platform matching', () => {
151+
const artifacts = [
152+
makeArtifact({ release: 'pkg-1.0-cp312-cp312-macosx_10_9_x86_64.whl' }),
153+
makeArtifact({ release: 'pkg-1.0-cp312-cp312-macosx_14_0_arm64.whl' }),
154+
makeArtifact({ release: 'pkg-1.0.tar.gz' }),
155+
]
156+
const result = deduplicateArtifacts(artifacts, 'darwin-x64')
157+
assert.strictEqual(result.length, 1)
158+
assert.strictEqual(result[0]!.release, 'pkg-1.0-cp312-cp312-macosx_10_9_x86_64.whl')
159+
})
160+
161+
await t.test('linux-arm64 platform matching', () => {
162+
const artifacts = [
163+
makeArtifact({ release: 'pkg-1.0-cp312-cp312-manylinux_2_17_aarch64.whl' }),
164+
makeArtifact({ release: 'pkg-1.0-cp312-cp312-manylinux_2_17_x86_64.whl' }),
165+
makeArtifact({ release: 'pkg-1.0.tar.gz' }),
166+
]
167+
const result = deduplicateArtifacts(artifacts, 'linux-arm64')
168+
assert.strictEqual(result.length, 1)
169+
assert.strictEqual(result[0]!.release, 'pkg-1.0-cp312-cp312-manylinux_2_17_aarch64.whl')
170+
})
171+
172+
await t.test('empty array returns empty', () => {
173+
const result = deduplicateArtifacts([])
174+
assert.strictEqual(result.length, 0)
175+
})
176+
177+
await t.test('namespace is included in grouping key', () => {
178+
const artifacts = [
179+
makeArtifact({ type: 'maven', namespace: 'org.apache', name: 'commons', version: '3.0' }),
180+
makeArtifact({ type: 'maven', namespace: 'org.spring', name: 'commons', version: '3.0' }),
181+
]
182+
const result = deduplicateArtifacts(artifacts)
183+
assert.strictEqual(result.length, 2)
184+
})
185+
})

index.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js'
77
import { isInitializeRequest } from '@modelcontextprotocol/sdk/types.js'
88
import { randomUUID } from 'node:crypto'
99
import { buildPurl } from './lib/purl.ts'
10+
import { deduplicateArtifacts } from './lib/artifacts.ts'
1011
import { z } from 'zod'
1112
import pino from 'pino'
1213
import readline from 'readline'
@@ -395,12 +396,13 @@ function createConfiguredServer (): McpServer {
395396
depname: z.string().describe('The name of the dependency'),
396397
version: z.string().describe("The version of the dependency, use 'unknown' if not known").default('unknown'),
397398
})).describe('Array of packages to check'),
399+
platform: z.string().optional().describe("Optional OS-architecture hint (e.g., 'linux-x64', 'darwin-arm64', 'win32-x64'). Used to select the most relevant artifact when a package has platform-specific builds."),
398400
},
399401
annotations: {
400402
readOnlyHint: true,
401403
},
402404
},
403-
async ({ packages }, extra) => {
405+
async ({ packages, platform }, extra) => {
404406
logger.info(`Received request for ${packages.length} packages`)
405407
const accessToken = extra.authInfo?.token || SOCKET_API_KEY
406408
if (!accessToken) {
@@ -476,6 +478,7 @@ function createConfiguredServer (): McpServer {
476478
const jsonLines = responseText.split('\n')
477479
.filter(line => line.trim())
478480
.map(line => JSON.parse(line))
481+
.filter((obj: Record<string, unknown>) => !obj['_type'])
479482

480483
if (!jsonLines.length) {
481484
const errorMsg = 'No valid JSON objects found in NDJSON response'
@@ -485,11 +488,11 @@ function createConfiguredServer (): McpServer {
485488
}
486489
}
487490

488-
// Process each result
489-
for (const jsonData of jsonLines) {
491+
const deduplicated = deduplicateArtifacts(jsonLines, platform)
492+
for (const jsonData of deduplicated) {
490493
const ns = jsonData.namespace ? `${jsonData.namespace}/` : ''
491494
const purl: string = `pkg:${jsonData.type || 'unknown'}/${ns}${jsonData.name || 'unknown'}@${jsonData.version || 'unknown'}`
492-
if (jsonData.score && jsonData.score.overall !== undefined) {
495+
if (jsonData.score && jsonData.score['overall'] !== undefined) {
493496
const scoreEntries = Object.entries(jsonData.score)
494497
.filter(([key]) => key !== 'overall' && key !== 'uuid')
495498
.map(([key, value]) => {

lib/artifacts.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
export interface ArtifactData {
2+
type?: string
3+
namespace?: string
4+
name?: string
5+
version?: string
6+
release?: string
7+
score?: Record<string, unknown>
8+
_type?: string
9+
[key: string]: unknown
10+
}
11+
12+
type PlatformPattern = RegExp
13+
14+
const PLATFORM_PATTERNS: Record<string, PlatformPattern[]> = {
15+
'darwin-arm64': [/macosx.*arm64/i],
16+
'darwin-x64': [/macosx.*x86_64/i],
17+
'linux-x64': [/(manylinux|linux).*x86_64/i],
18+
'linux-arm64': [/(manylinux|linux).*(aarch64|arm64)/i],
19+
'win32-x64': [/win.*(amd64|x86_64)/i],
20+
'win32-ia32': [/win.*win32/i],
21+
}
22+
23+
function artifactGroupKey (artifact: ArtifactData): string {
24+
const ns = artifact.namespace || ''
25+
return `${artifact.type || ''}/${ns}/${artifact.name || ''}@${artifact.version || ''}`
26+
}
27+
28+
function isSourceDist (release: string): boolean {
29+
return /\.(tar\.gz|tar\.bz2|zip)$/i.test(release) || /sdist/i.test(release)
30+
}
31+
32+
function isUniversalWheel (release: string): boolean {
33+
return /[-_]none[-_]any\.whl$/i.test(release) || /py3[-_]none[-_]any/i.test(release)
34+
}
35+
36+
function matchesPlatform (release: string, platform: string): boolean {
37+
const patterns = PLATFORM_PATTERNS[platform]
38+
if (patterns) {
39+
return patterns.some(p => p.test(release))
40+
}
41+
return release.toLowerCase().includes(platform.toLowerCase())
42+
}
43+
44+
function selectBestArtifact (artifacts: ArtifactData[], platform?: string): ArtifactData {
45+
if (artifacts.length === 1) {
46+
return artifacts[0]!
47+
}
48+
49+
if (platform) {
50+
const match = artifacts.find(a => a.release && matchesPlatform(a.release, platform))
51+
if (match) return match
52+
}
53+
54+
const sdist = artifacts.find(a => a.release && isSourceDist(a.release))
55+
if (sdist) return sdist
56+
57+
const universal = artifacts.find(a => a.release && isUniversalWheel(a.release))
58+
if (universal) return universal
59+
60+
return artifacts[0]!
61+
}
62+
63+
/**
64+
* Deduplicate artifacts that share the same (type, namespace, name, version) identity.
65+
* When multiple artifacts exist for the same package (e.g. PyPI wheels for different
66+
* platforms), one representative is selected using a priority: platform-matching artifact
67+
* (if hint provided) > source distribution > universal wheel > first artifact.
68+
*/
69+
export function deduplicateArtifacts (artifacts: ArtifactData[], platform?: string): ArtifactData[] {
70+
const groups = new Map<string, ArtifactData[]>()
71+
72+
for (const artifact of artifacts) {
73+
const key = artifactGroupKey(artifact)
74+
let group = groups.get(key)
75+
if (!group) {
76+
group = []
77+
groups.set(key, group)
78+
}
79+
group.push(artifact)
80+
}
81+
82+
const results: ArtifactData[] = []
83+
for (const group of groups.values()) {
84+
results.push(selectBestArtifact(group, platform))
85+
}
86+
87+
return results
88+
}

test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,39 @@ test('Socket MCP Server', async (t) => {
100100
assert.ok(textContent.text.includes('pkg:pypi/'), 'Result should contain pypi purl format')
101101
})
102102

103+
await t.test('pypi multi-artifact package is deduplicated to one result', async () => {
104+
const packages = [
105+
{ depname: 'numpy', ecosystem: 'pypi', version: '1.26.4' }
106+
]
107+
108+
const result = await client.callTool({
109+
name: 'depscore',
110+
arguments: { packages }
111+
})
112+
113+
assert.ok(result?.content && Array.isArray(result.content) && result.content.length > 0)
114+
const textContent = result.content[0] as { type: string; text: string }
115+
const numpyLines = textContent.text.split('\n').filter(line => line.includes('pkg:pypi/numpy'))
116+
assert.strictEqual(numpyLines.length, 1, `Expected 1 deduplicated result for numpy, got ${numpyLines.length}:\n${numpyLines.join('\n')}`)
117+
})
118+
119+
await t.test('depscore accepts optional platform parameter', async () => {
120+
const packages = [
121+
{ depname: 'numpy', ecosystem: 'pypi', version: '1.26.4' }
122+
]
123+
124+
const result = await client.callTool({
125+
name: 'depscore',
126+
arguments: { packages, platform: 'darwin-arm64' }
127+
})
128+
129+
assert.ok(result?.content && Array.isArray(result.content) && result.content.length > 0)
130+
const textContent = result.content[0] as { type: string; text: string }
131+
assert.ok(textContent.text.includes('pkg:pypi/numpy'), 'Result should contain numpy')
132+
const numpyLines = textContent.text.split('\n').filter(line => line.includes('pkg:pypi/numpy'))
133+
assert.strictEqual(numpyLines.length, 1, 'Platform hint should still produce one deduplicated result')
134+
})
135+
103136
await t.test('call depscore tool with golang ecosystem', async (t) => {
104137
const golangPackages = [
105138
{ depname: 'github.com/gin-gonic/gin', ecosystem: 'golang', version: 'v1.9.0' }

0 commit comments

Comments
 (0)