Skip to content

Commit 30ead4e

Browse files
DimaBirDima Birenbaum
andauthored
fix(ci): sort -V for guardian binary, semver sort for config dirs, per-tool regex fallback (#226)
Co-authored-by: Dima Birenbaum <dbirenbaum@microsoft.com>
1 parent c9bc891 commit 30ead4e

2 files changed

Lines changed: 62 additions & 188 deletions

File tree

.github/workflows/msdo-breach-monitor.lock.yml

Lines changed: 1 addition & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 61 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,22 @@
11
name: MSDO Toolchain Version Probe
22

3-
# Resolves the exact tool versions pinned by MSDO's .gdntool configs and writes
4-
# them to .github/toolchain-versions.json before the breach monitor runs.
3+
# Runs MSDO to install tools as a side effect, then scrapes the install
4+
# directories to record exact resolved versions into toolchain-versions.json.
5+
# The breach monitor reads this file instead of guessing "latest" from registries.
56
#
6-
# Design: uses 'guardian init' only (via existingFilename to skip full scan).
7-
# guardian init downloads Microsoft.Security.DevOps.Tools.Configuration — a tiny
8-
# NuGet package containing the .gdntool XML files that define pinned versions.
9-
# No tool binaries are downloaded or executed. Runs in ~15 seconds.
10-
#
11-
# Cache: keyed by OS + week. Cold start once per week; warm runs re-use the
12-
# cached CLI + Tools.Configuration and just call 'guardian init --force' directly.
7+
# Guardian installs all tool wrappers as NuGet packages into:
8+
# /home/runner/work/_msdo/packages/nuget/{PackageName}.{version}/
9+
# ESLint is installed via npm into:
10+
# /home/runner/work/_msdo/packages/node_modules/eslint/
11+
# Package names confirmed from run 23433052319.
1312

1413
on:
1514
schedule:
16-
- cron: '0 11 * * *' # Daily 11:00 UTC
15+
- cron: '0 4 * * 1' # Weekly Monday 04:00 UTC
1716
workflow_dispatch:
1817

1918
permissions:
2019
contents: write
21-
actions: write # needed to dispatch the breach monitor after committing versions
2220

2321
jobs:
2422
probe:
@@ -28,197 +26,89 @@ jobs:
2826
steps:
2927
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
3028

31-
- name: Compute weekly cache key
32-
id: week
33-
run: echo "key=$(date +%Y-%W)" >> "$GITHUB_OUTPUT"
34-
35-
# Cache the MSDO CLI + Tools.Configuration (~10 MB, contains .gdntool files).
36-
# Keyed by week: busts every Monday so version pins stay fresh.
37-
- name: Restore MSDO CLI cache
38-
id: cache
39-
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c6158d # v4.2.2
40-
with:
41-
path: /home/runner/work/_msdo/versions
42-
key: msdo-cli-linux-x64-${{ steps.week.outputs.key }}
43-
44-
# Cache miss path: use the MSDO action with a dummy SARIF to trigger
45-
# 'guardian init' (which downloads the CLI + Tools.Configuration) without
46-
# running any scan tools. 'guardian upload' will fail gracefully — that's fine.
47-
- name: Create dummy SARIF (skip-scan sentinel)
48-
if: steps.cache.outputs.cache-hit != 'true'
49-
run: |
50-
echo '{"version":"2.1.0","runs":[]}' > /tmp/dummy.sarif
51-
52-
- name: Install MSDO CLI via guardian init (cache miss)
53-
if: steps.cache.outputs.cache-hit != 'true'
29+
# Run MSDO — scan may find nothing (no real targets), that's fine.
30+
# Side effect: Guardian downloads all tool packages into _msdo/packages/nuget/.
31+
- name: Install MSDO tools
5432
uses: microsoft/security-devops-action@main
55-
continue-on-error: true # guardian upload will fail — that's expected
33+
continue-on-error: true
5634
with:
57-
existingFilename: /tmp/dummy.sarif
35+
tools: bandit,binskim,checkov,eslint,templateanalyzer,terrascan,trivy
5836

59-
# Cache hit path: guardian binary already exists. Re-run 'guardian init'
60-
# to refresh the workspace .gdn config pointing at the cached CLI.
61-
- name: Run guardian init (cache hit)
62-
if: steps.cache.outputs.cache-hit == 'true'
63-
run: |
64-
guardian=$(find /home/runner/work/_msdo/versions -maxdepth 4 -name 'guardian' -type f 2>/dev/null | sort -V | tail -1)
65-
if [[ -z "$guardian" ]]; then
66-
echo "::error::guardian binary not found in cache — cache may be corrupt"
67-
exit 1
68-
fi
69-
echo "Guardian binary: $guardian"
70-
"$guardian" init --force
71-
72-
# Parse pinned versions from .gdntool XML files in the Tools.Configuration package.
73-
# These files define EXACTLY which NuGet/npm package version guardian will download
74-
# for each tool — no tool binaries are needed to read them.
75-
- name: Parse tool versions from .gdntool configs
76-
id: collect
37+
- name: Collect resolved tool versions from install dirs
7738
run: |
7839
python3 - <<'PYEOF'
79-
import os, json, re, pathlib, datetime, sys
80-
import xml.etree.ElementTree as ET
81-
82-
versions_base = pathlib.Path('/home/runner/work/_msdo/versions')
83-
84-
# Tools.Configuration is installed inside the CLI package directory:
85-
# _msdo/versions/Microsoft.Security.Devops.Cli.linux-x64.{ver}/tools/Config/Tools/
86-
def cli_version(p):
87-
# Extract semver tuple from path e.g. .../Cli.linux-x64.0.215.0/tools/Config/Tools
88-
m = re.search(r'\.(\d+)\.(\d+)\.(\d+)[/\\]', str(p))
89-
return tuple(int(x) for x in m.groups()) if m else (0, 0, 0)
90-
91-
config_dirs = sorted(versions_base.glob('*/tools/Config/Tools'), key=cli_version)
92-
if not config_dirs:
93-
print('ERROR: Config/Tools not found — guardian init may not have run', file=sys.stderr)
94-
gh_out = os.environ.get('GITHUB_OUTPUT', '')
95-
if gh_out:
96-
open(gh_out, 'a').write('skip_commit=true\n')
97-
sys.exit(0)
98-
99-
config_tools = config_dirs[-1]
100-
gdntool_files = sorted(config_tools.glob('**/*.gdntool'))
101-
print(f'Found {len(gdntool_files)} .gdntool files in {config_tools}')
102-
103-
# Map Guardian NuGet package names (lowercase) → canonical tool names
40+
import os, json, re, pathlib, datetime
41+
42+
NUGET_DIR = pathlib.Path('/home/runner/work/_msdo/packages/nuget')
43+
NPM_DIR = pathlib.Path('/home/runner/work/_msdo/packages/node_modules')
44+
45+
VER_PAT = re.compile(r'^(.+?)\.(v?\d+\.\d+(?:\.\d+)*(?:[-+][0-9A-Za-z.-]+)?)$', re.IGNORECASE)
46+
47+
# Guardian NuGet wrapper package names → canonical tool names.
48+
# Confirmed from run 23433052319 (_msdo/packages/nuget/ directory listing).
10449
PKG_TO_TOOL = {
105-
# NuGet
106-
'microsoft.codeanalysis.binskim': 'binskim',
107-
'microsoft.azure.templates.analyzer': 'templateanalyzer',
108-
# pip (package name == tool name for these)
109-
'bandit': 'bandit',
110-
'checkov': 'checkov',
111-
# npm
112-
'eslint': 'eslint',
113-
# zip / GitHub releases (names TBD from first run — check raw_dirs)
114-
'trivy': 'trivy',
115-
'terrascan': 'terrascan',
50+
'microsoft.guardian.banditredist_linux_amd64': 'bandit',
51+
'microsoft.codeanalysis.binskim': 'binskim',
52+
'microsoft.guardian.checkovredist_linux_amd64': 'checkov',
53+
'azure.templates.analyzer.commandline.linux-x64': 'templateanalyzer',
54+
'microsoft.guardian.terrascanredist_linux_amd64': 'terrascan',
55+
'microsoft.guardian.trivyredist_linux_amd64': 'trivy',
11656
}
11757
118-
# Internal CLI package — skip in output
119-
CLI_PKGS = {
58+
# Internal packages — skip
59+
SKIP_PKGS = {
12060
'microsoft.security.devops.cli',
12161
'microsoft.security.devops.cli.linux-x64',
12262
'microsoft.security.devops.cli.linux-arm64',
12363
'microsoft.security.devops.cli.win-x64',
64+
'microsoft.security.devops.policy.names',
65+
'microsoft.security.devops.policy.github',
12466
}
12567
12668
tools = {}
127-
raw_gdntools = {}
128-
VER_RE = re.compile(r'\d+\.\d+(?:\.\d+)*(?:[-+][0-9A-Za-z.-]+)?')
129-
130-
for f in gdntool_files:
131-
content = f.read_text(encoding='utf-8', errors='replace')
132-
raw_gdntools[f.name] = content
133-
134-
# --- Strategy 1: standard XML attribute scan ---
135-
# Look for elements with Name/PackageName + Version attributes
136-
try:
137-
root = ET.fromstring(content)
138-
for elem in root.iter():
139-
for name_key in ('Name', 'PackageName', 'package', 'id'):
140-
pkg = (elem.get(name_key) or '').strip().lower()
141-
if not pkg:
142-
continue
143-
canonical = PKG_TO_TOOL.get(pkg)
144-
if not canonical:
145-
continue
146-
for ver_key in ('Version', 'version', 'PackageVersion'):
147-
ver = (elem.get(ver_key) or '').strip()
148-
if ver and VER_RE.match(ver):
149-
tools[canonical] = ver
150-
break
151-
except ET.ParseError:
152-
pass
153-
154-
# --- Strategy 2: child element text scan ---
155-
# <PackageName>Microsoft.Guardian.TrivyRedist_linux_amd64</PackageName>
156-
# <Version>0.69.3</Version>
157-
try:
158-
root = ET.fromstring(content)
159-
for elem in root.iter():
160-
children = {c.tag: (c.text or '').strip() for c in elem}
161-
pkg = children.get('PackageName', children.get('Name', children.get('Id', ''))).lower()
162-
ver = children.get('Version', children.get('PackageVersion', ''))
163-
if pkg and ver:
164-
canonical = PKG_TO_TOOL.get(pkg)
165-
if canonical and VER_RE.match(ver):
166-
tools[canonical] = ver
167-
except ET.ParseError:
168-
pass
169-
170-
# --- Strategy 3: regex fallback on raw XML text (per-tool) ---
171-
# Runs for each tool not yet resolved, regardless of other tools.
172-
# Handles malformed XML or unexpected schemas.
173-
for pkg_lower, canonical in PKG_TO_TOOL.items():
174-
if canonical in tools:
69+
raw_dirs = []
70+
71+
if NUGET_DIR.exists():
72+
entries = sorted(d.name for d in NUGET_DIR.iterdir() if d.is_dir())
73+
raw_dirs = entries
74+
for name in entries:
75+
m = VER_PAT.match(name)
76+
if not m:
17577
continue
176-
if pkg_lower in content.lower():
177-
m = re.search(
178-
re.escape(pkg_lower) + r'[^"\'<>]*["\'>][\s\S]{0,200}?' +
179-
r'(\d+\.\d+(?:\.\d+)*)',
180-
content.lower()
181-
)
182-
if m:
183-
tools[canonical] = m.group(1)
184-
185-
# eslint: installed via npm — version is in the npm package spec inside
186-
# the .gdntool for eslint. Try to find it from the raw XML dump.
187-
if 'eslint' not in tools:
188-
for fname, content in raw_gdntools.items():
189-
if 'eslint' not in fname.lower() and 'eslint' not in content.lower():
78+
pkg_lower = m.group(1).lower()
79+
version = m.group(2)
80+
if pkg_lower in SKIP_PKGS:
19081
continue
191-
m = re.search(r'eslint[@=](\d+\.\d+(?:\.\d+)*)', content, re.IGNORECASE)
192-
if m:
193-
tools['eslint'] = m.group(1)
194-
break
82+
canonical = PKG_TO_TOOL.get(pkg_lower)
83+
if canonical:
84+
tools[canonical] = version
19585
196-
# Dump raw .gdntool content so we can inspect the schema on first run
197-
print('\n=== RAW .gdntool FILES (schema discovery) ===')
198-
for fname, content in raw_gdntools.items():
199-
print(f'\n--- {fname} ---')
200-
print(content[:2000]) # first 2KB per file
86+
# ESLint: installed via npm, read version from package.json
87+
eslint_pkg = NPM_DIR / 'eslint' / 'package.json'
88+
if eslint_pkg.exists():
89+
tools['eslint'] = json.loads(eslint_pkg.read_text())['version']
20190
202-
print(f'\n=== RESOLVED VERSIONS ===')
203-
print(json.dumps(tools, indent=2))
91+
print('raw_dirs:', raw_dirs)
92+
print('resolved:', tools)
20493
20594
if not tools:
206-
print('\nWARNING: no versions resolved from .gdntool files — check raw output above')
207-
gh_out = os.environ.get('GITHUB_OUTPUT', '')
208-
if gh_out:
209-
open(gh_out, 'a').write('skip_commit=true\n')
210-
sys.exit(0)
95+
raise SystemExit('ERROR: no versions resolved — _msdo/packages/nuget/ empty or missing. Aborting.')
96+
97+
missing = (set(PKG_TO_TOOL.values()) | {'eslint'}) - set(tools.keys())
98+
if missing:
99+
print(f'WARNING: expected tools not found: {sorted(missing)}')
211100
212101
output = {
213102
'generated_at': datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
214103
'msdo_cli_version': os.environ.get('MSDO_INSTALLEDVERSION', 'unknown'),
215104
'tools': tools,
216-
'raw_gdntools': list(raw_gdntools.keys()),
105+
'raw_dirs': raw_dirs,
217106
}
218107
219108
out = pathlib.Path('.github/toolchain-versions.json')
220109
out.parent.mkdir(parents=True, exist_ok=True)
221110
out.write_text(json.dumps(output, indent=2) + '\n')
111+
print(json.dumps(output, indent=2))
222112
PYEOF
223113
224114
- name: Commit updated versions
@@ -232,14 +122,3 @@ jobs:
232122
git commit -m "chore(ci): update toolchain-versions.json [skip ci]"
233123
git push
234124
fi
235-
236-
# Trigger the breach monitor only after versions are committed.
237-
# This guarantees the monitor always reads fresh versions — no schedule
238-
# race condition between the two workflows.
239-
- name: Trigger breach monitor
240-
if: steps.collect.outputs.skip_commit != 'true'
241-
env:
242-
GH_TOKEN: ${{ github.token }}
243-
run: |
244-
gh workflow run msdo-breach-monitor.lock.yml --ref main
245-
echo "Breach monitor dispatched — will read freshly committed toolchain-versions.json"

0 commit comments

Comments
 (0)