Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ know what they are looking for.
| RubyGems | `rubygems` | `Gemfile.lock`, installed `*.gemspec` |
| Composer | `packagist` | `composer.lock`, `vendor/composer/installed.json` |
| MCP | `mcp` | JSON host configs: `mcp.json`, `.mcp.json`, `claude_desktop_config.json`, `mcp_config.json`, `mcp_settings.json`, `cline_mcp_settings.json`, plus `~/.gemini/settings.json` (Gemini CLI / Code Assist) and `~/.claude.json` (Claude Code user- and project-scoped `mcpServers`). Non-JSON configs (Codex `config.toml`, Continue YAML) are not parsed in v0.1. |
| Agent skills | `agent-skill` | `skills.sh` / `vercel-labs/skills` lock files: global `~/.agents/.skill-lock.json` (or `$XDG_STATE_HOME/skills/.skill-lock.json`) and project-local `skills-lock.json`. Loose `SKILL.md` directories without a lock file are not enumerated. |
| Editor extensions | `editor-extension` | VS Code, Cursor, Windsurf, VSCodium manifests |
| Browser extensions | `browser-extension` | Chromium-family (`manifest.json`) and Firefox (`extensions.json`) per profile |
| Homebrew | `homebrew` | Formula `INSTALL_RECEIPT.json` files and cask `.metadata` install markers |
Expand Down
2 changes: 1 addition & 1 deletion cmd/bumblebee/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func registerScanFlags(fs *flag.FlagSet, o *scanOpts) {
"scan profile: baseline (bounded known package/tool roots), project (configured developer/project roots), or deep (incident-response exposure scan; may include user home roots)")
fs.Var(&o.roots, "root", "directory to scan (repeatable or comma-separated; unrelated to running as root). Required for deep; optional for baseline/project.")
fs.Var(&o.excludes, "exclude", "additional directory name or suffix path to exclude (repeatable)")
fs.Var(&o.ecosystems, "ecosystem", "limit scanning to emitted ecosystem values (repeatable or comma-separated): npm,pypi,go,rubygems,packagist,mcp,editor-extension,browser-extension,homebrew")
fs.Var(&o.ecosystems, "ecosystem", "limit scanning to emitted ecosystem values (repeatable or comma-separated): "+strings.Join(model.SupportedEcosystems(), ","))
fs.Int64Var(&o.maxFileSize, "max-file-size", 5*1024*1024, "max bytes to read from any single metadata file")
fs.DurationVar(&o.maxDuration, "max-duration", 0, "max wall-clock duration for the whole scan (0 = unbounded)")
fs.IntVar(&o.concurrency, "concurrency", 4, "number of concurrent file parsers")
Expand Down
52 changes: 52 additions & 0 deletions cmd/bumblebee/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,58 @@ func TestResolveRootsBaselineSkipsAbsentClaudeCodexRoots(t *testing.T) {
}
}

// TestResolveRootsBaselineIncludesAgentSkillRoot verifies that ~/.agents
// (and the XDG override) are picked up by baseline when present.
func TestResolveRootsBaselineIncludesAgentSkillRoot(t *testing.T) {
if runtime.GOOS != "darwin" && runtime.GOOS != "linux" {
t.Skipf("profile defaults are darwin/linux specific")
}
home := t.TempDir()
xdg := t.TempDir()
t.Setenv("HOME", home)
t.Setenv("XDG_STATE_HOME", xdg)

agents := filepath.Join(home, ".agents")
xdgSkills := filepath.Join(xdg, "skills")
for _, p := range []string{agents, xdgSkills} {
if err := os.MkdirAll(p, 0o755); err != nil {
t.Fatal(err)
}
}

roots, _, err := resolveRoots(model.ProfileBaseline, nil, rootsOpts{})
if err != nil {
t.Fatalf("resolveRoots baseline: %v", err)
}
got := map[string]string{}
for _, r := range roots {
got[r.Path] = r.Kind
}
for _, p := range []string{agents, xdgSkills} {
kind, ok := got[p]
if !ok {
t.Errorf("baseline missing agent-skill root %q (got %v)", p, roots)
continue
}
if kind != model.RootKindAgentSkill {
t.Errorf("baseline root %q kind = %q, want %q", p, kind, model.RootKindAgentSkill)
}
}
}

func TestClassifyRootAgentSkill(t *testing.T) {
cases := []string{
"/Users/alice/.agents",
"/home/alice/.agents",
"/home/alice/.local/state/skills",
}
for _, p := range cases {
if got := classifyRoot(p, model.ProfileBaseline); got != model.RootKindAgentSkill {
t.Errorf("classifyRoot(%q) = %q, want %q", p, got, model.RootKindAgentSkill)
}
}
}

// TestResolveRootsBaselineIncludesClaudeJSONFileRoot verifies that the
// `~/.claude.json` config file is included as a baseline MCP root when
// present. Unlike the other MCP candidates it is a regular file, not a
Expand Down
11 changes: 11 additions & 0 deletions cmd/bumblebee/roots.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ func classifyRoot(path, profile string) string {
strings.HasSuffix(p, "/.config/Claude Code") ||
strings.HasSuffix(p, "/.continue"):
return model.RootKindMCPConfig
case strings.HasSuffix(p, "/.agents") || strings.HasSuffix(p, "/.local/state/skills"):
return model.RootKindAgentSkill
case p == "/opt/homebrew/lib" ||
p == "/usr/local/lib" ||
strings.HasSuffix(p, "/Cellar") ||
Expand Down Expand Up @@ -264,6 +266,15 @@ func baselineHomeCandidates(home string) []scanner.Root {
add(filepath.Join(home, ".continue"), model.RootKindMCPConfig)
}

// Agent-skill lock locations. ~/.agents holds the global
// `.skill-lock.json` written by the skills.sh CLI; $XDG_STATE_HOME
// overrides that to <state>/skills/.skill-lock.json when set.
// Absent locations are dropped by filterExistingRoots.
add(filepath.Join(home, ".agents"), model.RootKindAgentSkill)
if xdg := os.Getenv("XDG_STATE_HOME"); xdg != "" {
add(filepath.Join(xdg, "skills"), model.RootKindAgentSkill)
}

// Browser extension trees. We point directly at the per-profile
// Extensions/ directories so the default home-tree excludes (which
// keep us out of Chromium/Firefox app trees for privacy reasons)
Expand Down
100 changes: 95 additions & 5 deletions docs/inventory-sources.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ by recent supply-chain incidents — see the [Why these ecosystems](#why-these-e
section at the bottom for the reporting that informed it.

The `ecosystem` field on every record matches OSV ecosystem identifiers
where one exists (`npm`, `pypi`, `go`, `rubygems`, `packagist`, ...). `mcp`
and `editor-extension` are project-local values for execution surfaces that
do not map cleanly to a package registry; both are emitted without resolved
package versions.
where one exists (`npm`, `pypi`, `go`, `rubygems`, `packagist`, ...). `mcp`,
`agent-skill`, and `editor-extension` are project-local values for
execution surfaces that do not map cleanly to a package registry; all
three are emitted without resolved package versions.

## `ecosystem` vs source toolchain

Expand All @@ -29,7 +29,7 @@ Each scan profile reads from a different slice of the sources below:

| Profile | Sources walked |
|-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `baseline` | Homebrew `Cellar` / `Caskroom` install metadata and lib prefixes; `/Library/Python`; Linux system Python (`/usr/lib/python*`, plus `/usr/local/lib`); user Python (`~/.local/lib/python*`, `~/.local/share/pipx/venvs`, `pyenv`); language version managers (`asdf`, `nvm`, `rbenv`, `rvm`); `~/.cargo`; `~/go`; editor-extension trees; MCP config locations; per-profile browser-extension trees (Chromium-family + Firefox-family, including common snap/flatpak paths). No project trees. |
| `baseline` | Homebrew `Cellar` / `Caskroom` install metadata and lib prefixes; `/Library/Python`; Linux system Python (`/usr/lib/python*`, plus `/usr/local/lib`); user Python (`~/.local/lib/python*`, `~/.local/share/pipx/venvs`, `pyenv`); language version managers (`asdf`, `nvm`, `rbenv`, `rvm`); `~/.cargo`; `~/go`; editor-extension trees; MCP config locations; agent-skill lock locations (`~/.agents`, `$XDG_STATE_HOME/skills`); per-profile browser-extension trees (Chromium-family + Firefox-family, including common snap/flatpak paths). No project trees. |
| `project` | Configured developer/project roots (`~/code`, `~/src`, `~/Developer`, `~/Projects`, `~/workspace`, and any explicit `--root`). All ecosystem parsers below apply within those trees. |
| `deep` | Operator-supplied roots, typically a bare home directory during a campaign. Same ecosystem parsers; recommended only in combination with `--exposure-catalog` to emit `record_type=finding` records. |

Expand Down Expand Up @@ -405,6 +405,92 @@ References:
- MCP introduction: <https://modelcontextprotocol.io/>
- MCP server configuration: <https://modelcontextprotocol.io/quickstart/user>

## Agent skills (skills.sh / vercel-labs/skills)

Files read (JSON only):

- `.skill-lock.json`: the global lock written by the `skills.sh` CLI.
Default location is `~/.agents/.skill-lock.json`; when
`XDG_STATE_HOME` is set, the CLI writes to
`$XDG_STATE_HOME/skills/.skill-lock.json` instead.
- `skills-lock.json`: project-local lock file written at a repo root.

Both basenames share one envelope:

```json
{
"version": <int>,
"skills": {
"<local-name>": {
"source": "<owner/repo or path>",
"sourceType": "github" | "mintlify" | "huggingface" | "local",
"ref": "<branch | tag | sha>",
"skillPath": "<subdir>"
}
}
}
```

Schema versions v1 (legacy `computedHash`) and v3 (current
`skillFolderHash`) are both accepted, and unknown top-level fields and
unknown schema versions are tolerated so a future schema bump does not
break inventory.

### Package identity

For each entry, `PackageName` is the upstream source slug
(`vercel-labs/agent-skills`, `vercel/ai`, ...). The local alias from
the lock file's map key is preserved in `server_name` so a renamed
install still attributes back to the slot in the lock file. For
`sourceType=local`, the on-disk path in `source` is deliberately not
retained — only the local alias is recorded, so the operator's
filesystem layout does not leak through inventory; `requested_spec`
is set to `local:` in that case.

`requested_spec` is otherwise a compact install-channel descriptor
formatted as `<sourceType>:<source>[@<ref>][/<skillPath>]`, e.g.
`github:vercel/ai@main` or `github:vercel-labs/agent-skills/react`.
`version` stays empty because a `ref` may be a branch, tag, or commit
SHA and v0.1's slim schema does not distinguish them.

`source_type` is always `skill-lock` regardless of the file basename;
the basename is recoverable from `source_file`. `root_kind` is
`agent_skill_root` only when the file falls outside every configured
root — under a project tree the enclosing `project_root` wins (same
rule as MCP). `confidence` is recorded as `low` for all entries;
these are configured references, not running installs.

### Matching behavior

Example jq filters:

```
# All agent-skill records
jq 'select(.record_type == "package" and .ecosystem == "agent-skill")' inventory.ndjson

# Agent skills pulled from a specific upstream
jq 'select(.ecosystem == "agent-skill" and .normalized_name == "vercel-labs/agent-skills")
| {server_name, package_name, requested_spec, source_file}' inventory.ndjson
```

Exposure-catalog matches against agent-skill records work on name only,
since `version` is intentionally empty. A catalog entry that pins a
specific source slug (e.g. `vercel-labs/agent-skills`) with `versions`
including `""` will match every install of that upstream regardless of
local alias or ref.

Loose skill directories without a lock file — for example
`~/.claude/skills/<name>/SKILL.md` and similar — are not enumerated in
v0.1. Without a manifest anchoring the install to an upstream identity
there is no stable package_name to record. Operators who want skills
that ship outside `skills.sh` covered can pin them by hand-copying their
upstream slug into an exposure catalog and matching at `record_type=finding`.

References:

- skills.sh: <https://www.skills.sh>
- skills CLI source: <https://github.com/vercel-labs/skills>

## Browser extensions (Chromium-family + Firefox)

Files read:
Expand Down Expand Up @@ -575,3 +661,7 @@ strong installed-state correlation tooling today.
- Safari extensions. Safari's on-disk layout
(`~/Library/Safari/Extensions/`, `~/Library/Containers/<bundle-id>`)
is TCC-protected and is not enumerated.
- Loose agent-skill directories without a lock file (e.g.
`~/.claude/skills/<name>/SKILL.md`, Cursor rules, Continue agents).
Only `skills.sh` / `vercel-labs/skills` lock files are enumerated
under `ecosystem=agent-skill`; see the section above.
Loading