11name : MSDO Toolchain Version Probe
22
3- # Resolves the exact tool versions pinned by MSDO's .gdntool configs and writes
4- # them to .github/toolchain-versions.json before the breach monitor runs.
3+ # Runs MSDO to install tools as a side effect, then scrapes the install
4+ # directories to record exact resolved versions into toolchain-versions.json.
5+ # The breach monitor reads this file instead of guessing "latest" from registries.
56#
6- # Design: uses 'guardian init' only (via existingFilename to skip full scan).
7- # guardian init downloads Microsoft.Security.DevOps.Tools.Configuration — a tiny
8- # NuGet package containing the .gdntool XML files that define pinned versions.
9- # No tool binaries are downloaded or executed. Runs in ~15 seconds.
10- #
11- # Cache: keyed by OS + week. Cold start once per week; warm runs re-use the
12- # cached CLI + Tools.Configuration and just call 'guardian init --force' directly.
7+ # Guardian installs all tool wrappers as NuGet packages into:
8+ # /home/runner/work/_msdo/packages/nuget/{PackageName}.{version}/
9+ # ESLint is installed via npm into:
10+ # /home/runner/work/_msdo/packages/node_modules/eslint/
11+ # Package names confirmed from run 23433052319.
1312
1413on :
1514 schedule :
16- - cron : ' 0 11 * * * ' # Daily 11 :00 UTC
15+ - cron : ' 0 4 * * 1 ' # Weekly Monday 04 :00 UTC
1716 workflow_dispatch :
1817
1918permissions :
2019 contents : write
21- actions : write # needed to dispatch the breach monitor after committing versions
2220
2321jobs :
2422 probe :
@@ -28,197 +26,89 @@ jobs:
2826 steps :
2927 - uses : actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
3028
31- - name : Compute weekly cache key
32- id : week
33- run : echo "key=$(date +%Y-%W)" >> "$GITHUB_OUTPUT"
34-
35- # Cache the MSDO CLI + Tools.Configuration (~10 MB, contains .gdntool files).
36- # Keyed by week: busts every Monday so version pins stay fresh.
37- - name : Restore MSDO CLI cache
38- id : cache
39- uses : actions/cache@1bd1e32a3bdc45362d1e726936510720a7c6158d # v4.2.2
40- with :
41- path : /home/runner/work/_msdo/versions
42- key : msdo-cli-linux-x64-${{ steps.week.outputs.key }}
43-
44- # Cache miss path: use the MSDO action with a dummy SARIF to trigger
45- # 'guardian init' (which downloads the CLI + Tools.Configuration) without
46- # running any scan tools. 'guardian upload' will fail gracefully — that's fine.
47- - name : Create dummy SARIF (skip-scan sentinel)
48- if : steps.cache.outputs.cache-hit != 'true'
49- run : |
50- echo '{"version":"2.1.0","runs":[]}' > /tmp/dummy.sarif
51-
52- - name : Install MSDO CLI via guardian init (cache miss)
53- if : steps.cache.outputs.cache-hit != 'true'
29+ # Run MSDO — scan may find nothing (no real targets), that's fine.
30+ # Side effect: Guardian downloads all tool packages into _msdo/packages/nuget/.
31+ - name : Install MSDO tools
5432 uses : microsoft/security-devops-action@main
55- continue-on-error : true # guardian upload will fail — that's expected
33+ continue-on-error : true
5634 with :
57- existingFilename : /tmp/dummy.sarif
35+ tools : bandit,binskim,checkov,eslint,templateanalyzer,terrascan,trivy
5836
59- # Cache hit path: guardian binary already exists. Re-run 'guardian init'
60- # to refresh the workspace .gdn config pointing at the cached CLI.
61- - name : Run guardian init (cache hit)
62- if : steps.cache.outputs.cache-hit == 'true'
63- run : |
64- guardian=$(find /home/runner/work/_msdo/versions -maxdepth 4 -name 'guardian' -type f 2>/dev/null | sort -V | tail -1)
65- if [[ -z "$guardian" ]]; then
66- echo "::error::guardian binary not found in cache — cache may be corrupt"
67- exit 1
68- fi
69- echo "Guardian binary: $guardian"
70- "$guardian" init --force
71-
72- # Parse pinned versions from .gdntool XML files in the Tools.Configuration package.
73- # These files define EXACTLY which NuGet/npm package version guardian will download
74- # for each tool — no tool binaries are needed to read them.
75- - name : Parse tool versions from .gdntool configs
76- id : collect
37+ - name : Collect resolved tool versions from install dirs
7738 run : |
7839 python3 - <<'PYEOF'
79- import os, json, re, pathlib, datetime, sys
80- import xml.etree.ElementTree as ET
81-
82- versions_base = pathlib.Path('/home/runner/work/_msdo/versions')
83-
84- # Tools.Configuration is installed inside the CLI package directory:
85- # _msdo/versions/Microsoft.Security.Devops.Cli.linux-x64.{ver}/tools/Config/Tools/
86- def cli_version(p):
87- # Extract semver tuple from path e.g. .../Cli.linux-x64.0.215.0/tools/Config/Tools
88- m = re.search(r'\.(\d+)\.(\d+)\.(\d+)[/\\]', str(p))
89- return tuple(int(x) for x in m.groups()) if m else (0, 0, 0)
90-
91- config_dirs = sorted(versions_base.glob('*/tools/Config/Tools'), key=cli_version)
92- if not config_dirs:
93- print('ERROR: Config/Tools not found — guardian init may not have run', file=sys.stderr)
94- gh_out = os.environ.get('GITHUB_OUTPUT', '')
95- if gh_out:
96- open(gh_out, 'a').write('skip_commit=true\n')
97- sys.exit(0)
98-
99- config_tools = config_dirs[-1]
100- gdntool_files = sorted(config_tools.glob('**/*.gdntool'))
101- print(f'Found {len(gdntool_files)} .gdntool files in {config_tools}')
102-
103- # Map Guardian NuGet package names (lowercase) → canonical tool names
40+ import os, json, re, pathlib, datetime
41+
42+ NUGET_DIR = pathlib.Path('/home/runner/work/_msdo/packages/nuget')
43+ NPM_DIR = pathlib.Path('/home/runner/work/_msdo/packages/node_modules')
44+
45+ VER_PAT = re.compile(r'^(.+?)\.(v?\d+\.\d+(?:\.\d+)*(?:[-+][0-9A-Za-z.-]+)?)$', re.IGNORECASE)
46+
47+ # Guardian NuGet wrapper package names → canonical tool names.
48+ # Confirmed from run 23433052319 (_msdo/packages/nuget/ directory listing).
10449 PKG_TO_TOOL = {
105- # NuGet
106- 'microsoft.codeanalysis.binskim': 'binskim',
107- 'microsoft.azure.templates.analyzer': 'templateanalyzer',
108- # pip (package name == tool name for these)
109- 'bandit': 'bandit',
110- 'checkov': 'checkov',
111- # npm
112- 'eslint': 'eslint',
113- # zip / GitHub releases (names TBD from first run — check raw_dirs)
114- 'trivy': 'trivy',
115- 'terrascan': 'terrascan',
50+ 'microsoft.guardian.banditredist_linux_amd64': 'bandit',
51+ 'microsoft.codeanalysis.binskim': 'binskim',
52+ 'microsoft.guardian.checkovredist_linux_amd64': 'checkov',
53+ 'azure.templates.analyzer.commandline.linux-x64': 'templateanalyzer',
54+ 'microsoft.guardian.terrascanredist_linux_amd64': 'terrascan',
55+ 'microsoft.guardian.trivyredist_linux_amd64': 'trivy',
11656 }
11757
118- # Internal CLI package — skip in output
119- CLI_PKGS = {
58+ # Internal packages — skip
59+ SKIP_PKGS = {
12060 'microsoft.security.devops.cli',
12161 'microsoft.security.devops.cli.linux-x64',
12262 'microsoft.security.devops.cli.linux-arm64',
12363 'microsoft.security.devops.cli.win-x64',
64+ 'microsoft.security.devops.policy.names',
65+ 'microsoft.security.devops.policy.github',
12466 }
12567
12668 tools = {}
127- raw_gdntools = {}
128- VER_RE = re.compile(r'\d+\.\d+(?:\.\d+)*(?:[-+][0-9A-Za-z.-]+)?')
129-
130- for f in gdntool_files:
131- content = f.read_text(encoding='utf-8', errors='replace')
132- raw_gdntools[f.name] = content
133-
134- # --- Strategy 1: standard XML attribute scan ---
135- # Look for elements with Name/PackageName + Version attributes
136- try:
137- root = ET.fromstring(content)
138- for elem in root.iter():
139- for name_key in ('Name', 'PackageName', 'package', 'id'):
140- pkg = (elem.get(name_key) or '').strip().lower()
141- if not pkg:
142- continue
143- canonical = PKG_TO_TOOL.get(pkg)
144- if not canonical:
145- continue
146- for ver_key in ('Version', 'version', 'PackageVersion'):
147- ver = (elem.get(ver_key) or '').strip()
148- if ver and VER_RE.match(ver):
149- tools[canonical] = ver
150- break
151- except ET.ParseError:
152- pass
153-
154- # --- Strategy 2: child element text scan ---
155- # <PackageName>Microsoft.Guardian.TrivyRedist_linux_amd64</PackageName>
156- # <Version>0.69.3</Version>
157- try:
158- root = ET.fromstring(content)
159- for elem in root.iter():
160- children = {c.tag: (c.text or '').strip() for c in elem}
161- pkg = children.get('PackageName', children.get('Name', children.get('Id', ''))).lower()
162- ver = children.get('Version', children.get('PackageVersion', ''))
163- if pkg and ver:
164- canonical = PKG_TO_TOOL.get(pkg)
165- if canonical and VER_RE.match(ver):
166- tools[canonical] = ver
167- except ET.ParseError:
168- pass
169-
170- # --- Strategy 3: regex fallback on raw XML text (per-tool) ---
171- # Runs for each tool not yet resolved, regardless of other tools.
172- # Handles malformed XML or unexpected schemas.
173- for pkg_lower, canonical in PKG_TO_TOOL.items():
174- if canonical in tools:
69+ raw_dirs = []
70+
71+ if NUGET_DIR.exists():
72+ entries = sorted(d.name for d in NUGET_DIR.iterdir() if d.is_dir())
73+ raw_dirs = entries
74+ for name in entries:
75+ m = VER_PAT.match(name)
76+ if not m:
17577 continue
176- if pkg_lower in content.lower():
177- m = re.search(
178- re.escape(pkg_lower) + r'[^"\'<>]*["\'>][\s\S]{0,200}?' +
179- r'(\d+\.\d+(?:\.\d+)*)',
180- content.lower()
181- )
182- if m:
183- tools[canonical] = m.group(1)
184-
185- # eslint: installed via npm — version is in the npm package spec inside
186- # the .gdntool for eslint. Try to find it from the raw XML dump.
187- if 'eslint' not in tools:
188- for fname, content in raw_gdntools.items():
189- if 'eslint' not in fname.lower() and 'eslint' not in content.lower():
78+ pkg_lower = m.group(1).lower()
79+ version = m.group(2)
80+ if pkg_lower in SKIP_PKGS:
19081 continue
191- m = re.search(r'eslint[@=](\d+\.\d+(?:\.\d+)*)', content, re.IGNORECASE)
192- if m:
193- tools['eslint'] = m.group(1)
194- break
82+ canonical = PKG_TO_TOOL.get(pkg_lower)
83+ if canonical:
84+ tools[canonical] = version
19585
196- # Dump raw .gdntool content so we can inspect the schema on first run
197- print('\n=== RAW .gdntool FILES (schema discovery) ===')
198- for fname, content in raw_gdntools.items():
199- print(f'\n--- {fname} ---')
200- print(content[:2000]) # first 2KB per file
86+ # ESLint: installed via npm, read version from package.json
87+ eslint_pkg = NPM_DIR / 'eslint' / 'package.json'
88+ if eslint_pkg.exists():
89+ tools['eslint'] = json.loads(eslint_pkg.read_text())['version']
20190
202- print(f'\n=== RESOLVED VERSIONS ===' )
203- print(json.dumps(tools, indent=2) )
91+ print('raw_dirs:', raw_dirs )
92+ print('resolved:', tools )
20493
20594 if not tools:
206- print('\nWARNING : no versions resolved from .gdntool files — check raw output above ')
207- gh_out = os.environ.get('GITHUB_OUTPUT', '')
208- if gh_out:
209- open(gh_out, 'a').write('skip_commit=true\n')
210- sys.exit(0 )
95+ raise SystemExit('ERROR : no versions resolved — _msdo/packages/nuget/ empty or missing. Aborting. ')
96+
97+ missing = (set(PKG_TO_TOOL.values()) | {'eslint'}) - set(tools.keys())
98+ if missing:
99+ print(f'WARNING: expected tools not found: {sorted(missing)}' )
211100
212101 output = {
213102 'generated_at': datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
214103 'msdo_cli_version': os.environ.get('MSDO_INSTALLEDVERSION', 'unknown'),
215104 'tools': tools,
216- 'raw_gdntools ': list(raw_gdntools.keys()) ,
105+ 'raw_dirs ': raw_dirs ,
217106 }
218107
219108 out = pathlib.Path('.github/toolchain-versions.json')
220109 out.parent.mkdir(parents=True, exist_ok=True)
221110 out.write_text(json.dumps(output, indent=2) + '\n')
111+ print(json.dumps(output, indent=2))
222112 PYEOF
223113
224114 - name : Commit updated versions
@@ -232,14 +122,3 @@ jobs:
232122 git commit -m "chore(ci): update toolchain-versions.json [skip ci]"
233123 git push
234124 fi
235-
236- # Trigger the breach monitor only after versions are committed.
237- # This guarantees the monitor always reads fresh versions — no schedule
238- # race condition between the two workflows.
239- - name : Trigger breach monitor
240- if : steps.collect.outputs.skip_commit != 'true'
241- env :
242- GH_TOKEN : ${{ github.token }}
243- run : |
244- gh workflow run msdo-breach-monitor.lock.yml --ref main
245- echo "Breach monitor dispatched — will read freshly committed toolchain-versions.json"
0 commit comments