Skip to content

Commit 220ff59

Browse files
ci: remove temporary npm-hang diagnostic instrumentation
The diagnostic blocks (pre-npm-ci probes, --loglevel=http on npm ci, post-failure debug-log artifact upload) were added to identify the root cause of the npm-ci silent 8-minute hang on protected runners. That root cause is identified and fixed: the lockfile contained 415 `resolved:` URLs pointing to npm-proxy.dev.databricks.com (the internal dev-only proxy), which the protected runners cannot reach. The fix is the `Rewrite lockfile to JFrog registry` step in `.github/actions/setup-jfrog`, which rewrites both `registry.npmjs.org` and the dev-proxy URLs to JFrog before `npm ci`. With the root cause fixed and CI now green across the full [16, 18, 20, 22, 24] matrix on both unit-test and e2e-test, the diagnostics are no longer needed. Strip them so the workflow stays focused on its actual responsibilities. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
1 parent 3156569 commit 220ff59

1 file changed

Lines changed: 3 additions & 163 deletions

File tree

.github/workflows/main.yml

Lines changed: 3 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -34,69 +34,11 @@ jobs:
3434
${{ runner.os }}-build-${{ env.cache-name }}-
3535
${{ runner.os }}-build-
3636
${{ runner.os }}-
37-
# DIAGNOSTIC (temporary — remove once npm-hang root cause identified).
38-
# Captures effective npm config, lockfile registry distribution, and
39-
# registry reachability BEFORE npm ci runs.
40-
- name: Diag — pre-npm-ci
41-
run: |
42-
set +e
43-
echo "=== effective npm config ==="
44-
npm config list -l 2>&1 | grep -E '^(registry|fetch-|cache|loglevel|prefer-|@databricks)' || true
45-
echo "=== ~/.npmrc (auth masked) ==="
46-
sed 's/_authToken=.*/_authToken=***/' ~/.npmrc 2>/dev/null || echo "(no ~/.npmrc)"
47-
echo "=== npm cache dir ==="
48-
npm config get cache
49-
ls -la "$(npm config get cache)" 2>/dev/null | head -10 || echo "(empty)"
50-
echo "=== package-lock.json resolved URLs by registry ==="
51-
grep -oE '"resolved": "https://[^/]+' package-lock.json | sort | uniq -c
52-
echo "=== reachability probes ==="
53-
for url in https://registry.npmjs.org/ https://databricks.jfrog.io/artifactory/api/npm/db-npm/; do
54-
echo "--- $url ---"
55-
curl -sS -o /dev/null -w "HTTP=%{http_code} connect=%{time_connect}s total=%{time_total}s\n" \
56-
--max-time 10 --connect-timeout 5 \
57-
-H "Authorization: Bearer $JFROG_ACCESS_TOKEN" \
58-
"$url" || echo "FAIL (curl exit $?)"
59-
done
60-
echo "=== sample package metadata fetch (basic-ftp, new in this PR) ==="
61-
curl -sS -o /tmp/probe.json -w "basic-ftp: HTTP=%{http_code} size=%{size_download}b time=%{time_total}s\n" \
62-
--max-time 10 -H "Authorization: Bearer $JFROG_ACCESS_TOKEN" \
63-
"https://databricks.jfrog.io/artifactory/api/npm/db-npm/basic-ftp" || echo "FAIL"
64-
head -c 200 /tmp/probe.json 2>/dev/null; echo
65-
echo "=== /probe ==="
66-
true
6737
- name: Check code style
6838
run: |
69-
npm ci --loglevel=http --no-progress --foreground-scripts
39+
npm ci
7040
npm run prettier
7141
npm run lint
72-
# DIAGNOSTIC (temporary — remove once npm-hang root cause identified).
73-
# Captures npm debug log, cache state, and node_modules state AFTER
74-
# the failure so we can see exactly what npm did during the silent
75-
# 8-minute hang.
76-
- name: Diag — post-npm-ci on failure
77-
if: failure()
78-
run: |
79-
set +e
80-
DIAG=/tmp/npm-diag
81-
mkdir -p "$DIAG"
82-
cp -r ~/.npm/_logs "$DIAG/npm_logs" 2>/dev/null || echo "no _logs dir"
83-
du -sh ~/.npm/_cacache 2>/dev/null > "$DIAG/cacache_size.txt"
84-
ls -la node_modules/.bin/ 2>/dev/null > "$DIAG/node_modules_bin.txt" || echo "(no .bin)" > "$DIAG/node_modules_bin.txt"
85-
ls node_modules/ 2>/dev/null | wc -l > "$DIAG/node_modules_pkg_count.txt"
86-
ps auxf > "$DIAG/ps_snapshot.txt" 2>&1 || true
87-
dmesg 2>&1 | tail -50 > "$DIAG/dmesg_tail.txt" || true
88-
cp package-lock.json "$DIAG/package-lock.json.in-ci"
89-
echo "=== diag bundle contents ==="
90-
ls -la "$DIAG"
91-
true
92-
- name: Diag — upload bundle
93-
if: failure()
94-
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
95-
with:
96-
name: npm-diag-lint
97-
path: /tmp/npm-diag
98-
if-no-files-found: warn
99-
retention-days: 7
10042
10143
unit-test:
10244
runs-on:
@@ -129,61 +71,10 @@ jobs:
12971
${{ runner.os }}-${{ matrix.node-version }}-build-${{ env.cache-name }}-
13072
${{ runner.os }}-${{ matrix.node-version }}-build-
13173
${{ runner.os }}-${{ matrix.node-version }}-
132-
- name: Diag — pre-npm-ci
133-
run: |
134-
set +e
135-
echo "=== effective npm config ==="
136-
npm config list -l 2>&1 | grep -E '^(registry|fetch-|cache|loglevel|prefer-|@databricks)' || true
137-
echo "=== ~/.npmrc (auth masked) ==="
138-
sed 's/_authToken=.*/_authToken=***/' ~/.npmrc 2>/dev/null || echo "(no ~/.npmrc)"
139-
echo "=== npm cache dir ==="
140-
npm config get cache
141-
ls -la "$(npm config get cache)" 2>/dev/null | head -10 || echo "(empty)"
142-
echo "=== package-lock.json resolved URLs by registry ==="
143-
grep -oE '"resolved": "https://[^/]+' package-lock.json | sort | uniq -c
144-
echo "=== reachability probes ==="
145-
for url in https://registry.npmjs.org/ https://databricks.jfrog.io/artifactory/api/npm/db-npm/; do
146-
echo "--- $url ---"
147-
curl -sS -o /dev/null -w "HTTP=%{http_code} connect=%{time_connect}s total=%{time_total}s\n" \
148-
--max-time 10 --connect-timeout 5 \
149-
-H "Authorization: Bearer $JFROG_ACCESS_TOKEN" \
150-
"$url" || echo "FAIL (curl exit $?)"
151-
done
152-
echo "=== sample package metadata fetch (basic-ftp, new in this PR) ==="
153-
curl -sS -o /tmp/probe.json -w "basic-ftp: HTTP=%{http_code} size=%{size_download}b time=%{time_total}s\n" \
154-
--max-time 10 -H "Authorization: Bearer $JFROG_ACCESS_TOKEN" \
155-
"https://databricks.jfrog.io/artifactory/api/npm/db-npm/basic-ftp" || echo "FAIL"
156-
head -c 200 /tmp/probe.json 2>/dev/null; echo
157-
echo "=== /probe ==="
158-
true
15974
- name: Run unit tests
16075
run: |
161-
npm ci --loglevel=http --no-progress --foreground-scripts
76+
npm ci
16277
npm run test
163-
- name: Diag — post-npm-ci on failure
164-
if: failure()
165-
run: |
166-
set +e
167-
DIAG=/tmp/npm-diag
168-
mkdir -p "$DIAG"
169-
cp -r ~/.npm/_logs "$DIAG/npm_logs" 2>/dev/null || echo "no _logs dir"
170-
du -sh ~/.npm/_cacache 2>/dev/null > "$DIAG/cacache_size.txt"
171-
ls -la node_modules/.bin/ 2>/dev/null > "$DIAG/node_modules_bin.txt" || echo "(no .bin)" > "$DIAG/node_modules_bin.txt"
172-
ls node_modules/ 2>/dev/null | wc -l > "$DIAG/node_modules_pkg_count.txt"
173-
ps auxf > "$DIAG/ps_snapshot.txt" 2>&1 || true
174-
dmesg 2>&1 | tail -50 > "$DIAG/dmesg_tail.txt" || true
175-
cp package-lock.json "$DIAG/package-lock.json.in-ci"
176-
echo "=== diag bundle contents ==="
177-
ls -la "$DIAG"
178-
true
179-
- name: Diag — upload bundle
180-
if: failure()
181-
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
182-
with:
183-
name: npm-diag-unit-test-node${{ matrix.node-version }}
184-
path: /tmp/npm-diag
185-
if-no-files-found: warn
186-
retention-days: 7
18778
- run: tar -cvf ${{ env.NYC_REPORT_DIR }}.tar ${{ env.NYC_REPORT_DIR }}
18879
- name: Store coverage report
18980
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
@@ -236,61 +127,10 @@ jobs:
236127
${{ runner.os }}-${{ matrix.node-version }}-build-${{ env.cache-name }}-
237128
${{ runner.os }}-${{ matrix.node-version }}-build-
238129
${{ runner.os }}-${{ matrix.node-version }}-
239-
- name: Diag — pre-npm-ci
240-
run: |
241-
set +e
242-
echo "=== effective npm config ==="
243-
npm config list -l 2>&1 | grep -E '^(registry|fetch-|cache|loglevel|prefer-|@databricks)' || true
244-
echo "=== ~/.npmrc (auth masked) ==="
245-
sed 's/_authToken=.*/_authToken=***/' ~/.npmrc 2>/dev/null || echo "(no ~/.npmrc)"
246-
echo "=== npm cache dir ==="
247-
npm config get cache
248-
ls -la "$(npm config get cache)" 2>/dev/null | head -10 || echo "(empty)"
249-
echo "=== package-lock.json resolved URLs by registry ==="
250-
grep -oE '"resolved": "https://[^/]+' package-lock.json | sort | uniq -c
251-
echo "=== reachability probes ==="
252-
for url in https://registry.npmjs.org/ https://databricks.jfrog.io/artifactory/api/npm/db-npm/; do
253-
echo "--- $url ---"
254-
curl -sS -o /dev/null -w "HTTP=%{http_code} connect=%{time_connect}s total=%{time_total}s\n" \
255-
--max-time 10 --connect-timeout 5 \
256-
-H "Authorization: Bearer $JFROG_ACCESS_TOKEN" \
257-
"$url" || echo "FAIL (curl exit $?)"
258-
done
259-
echo "=== sample package metadata fetch (basic-ftp, new in this PR) ==="
260-
curl -sS -o /tmp/probe.json -w "basic-ftp: HTTP=%{http_code} size=%{size_download}b time=%{time_total}s\n" \
261-
--max-time 10 -H "Authorization: Bearer $JFROG_ACCESS_TOKEN" \
262-
"https://databricks.jfrog.io/artifactory/api/npm/db-npm/basic-ftp" || echo "FAIL"
263-
head -c 200 /tmp/probe.json 2>/dev/null; echo
264-
echo "=== /probe ==="
265-
true
266130
- name: Run e2e tests
267131
run: |
268-
npm ci --loglevel=http --no-progress --foreground-scripts
132+
npm ci
269133
NODE_OPTIONS="--max-old-space-size=4096" npm run e2e
270-
- name: Diag — post-npm-ci on failure
271-
if: failure()
272-
run: |
273-
set +e
274-
DIAG=/tmp/npm-diag
275-
mkdir -p "$DIAG"
276-
cp -r ~/.npm/_logs "$DIAG/npm_logs" 2>/dev/null || echo "no _logs dir"
277-
du -sh ~/.npm/_cacache 2>/dev/null > "$DIAG/cacache_size.txt"
278-
ls -la node_modules/.bin/ 2>/dev/null > "$DIAG/node_modules_bin.txt" || echo "(no .bin)" > "$DIAG/node_modules_bin.txt"
279-
ls node_modules/ 2>/dev/null | wc -l > "$DIAG/node_modules_pkg_count.txt"
280-
ps auxf > "$DIAG/ps_snapshot.txt" 2>&1 || true
281-
dmesg 2>&1 | tail -50 > "$DIAG/dmesg_tail.txt" || true
282-
cp package-lock.json "$DIAG/package-lock.json.in-ci"
283-
echo "=== diag bundle contents ==="
284-
ls -la "$DIAG"
285-
true
286-
- name: Diag — upload bundle
287-
if: failure()
288-
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
289-
with:
290-
name: npm-diag-e2e-test-node${{ matrix.node-version }}
291-
path: /tmp/npm-diag
292-
if-no-files-found: warn
293-
retention-days: 7
294134
- run: tar -cvf ${{ env.NYC_REPORT_DIR }}.tar ${{ env.NYC_REPORT_DIR }}
295135
- name: Store coverage report
296136
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4

0 commit comments

Comments
 (0)