-
Notifications
You must be signed in to change notification settings - Fork 0
292 lines (273 loc) · 11.8 KB
/
mine-and-deploy.yml
File metadata and controls
292 lines (273 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
name: Mine GitHub user stats + deploy to Cloudflare
on:
schedule:
- cron: "0 6 * * *"
workflow_dispatch:
inputs:
user:
description: "Single GitHub login to mine (skips the full users.txt loop)."
required: false
push:
branches: [main]
paths:
- "generate_stats.py"
- "stats_template.html"
- "cloudflare/**"
- ".github/workflows/mine-and-deploy.yml"
concurrency:
group: mine-and-deploy
cancel-in-progress: false
jobs:
mine-and-deploy:
runs-on: ubuntu-latest
permissions:
contents: write # for appending new users to users.txt
steps:
- uses: actions/checkout@v4
with:
token: ${{ secrets.GH_MINING_TOKEN || github.token }}
- uses: actions/setup-python@v5
with:
python-version: "3.13"
- uses: actions/setup-node@v4
with:
node-version: "24"
# Persist per-user mining caches across runs so we don't re-fetch
# PR details, commit stats, etc. that we've already pulled before.
# Key includes the inputs.user (or 'full' for cron runs) so single-
# user dispatches restore that user's cache specifically.
- name: Restore mining caches
uses: actions/cache@v4
with:
path: |
cache_*/api/
cache_*/bare/
cache/api/
cache/bare/
key: stats-cache-v1-${{ inputs.user || 'full' }}-${{ github.run_id }}
restore-keys: |
stats-cache-v1-${{ inputs.user || 'full' }}-
stats-cache-v1-
# Persist deployed user HTMLs across runs so that single-user
# mining doesn't wipe other users from the CF bucket when wrangler
# replaces the assets dir on deploy. Single shared key — every run
# restores the latest snapshot of all deployed dashboards, adds /
# refreshes its own user(s), and writes back the full set.
#
# Exclude committed dashboards (pirate, index, 404) from the cache
# so old versions can't overwrite the just-checked-out repo files
# when this step restores. We also explicitly re-apply them from
# git after the restore (next step) — defends against the case
# where the existing cache still contains them from a previous run.
- name: Restore deployed dashboards
uses: actions/cache@v4
with:
path: |
cloudflare/public/*.html
!cloudflare/public/pirate.html
!cloudflare/public/index.html
!cloudflare/public/404.html
key: deployed-htmls-v1-${{ github.run_id }}
restore-keys: |
deployed-htmls-v1-
- name: Re-apply committed dashboards from git
run: |
git checkout HEAD -- \
cloudflare/public/pirate.html \
cloudflare/public/index.html \
cloudflare/public/404.html
# Bootstrap from the live CF bucket: any user that's already
# deployed in production but missing locally (because the previous
# run was cancelled at the 6h cap and its post-step cache save
# didn't run) gets downloaded here. Without this, the next run
# would re-mine ~all users every time.
- name: Bootstrap dashboards from deployed bucket
run: |
set +e
deployed_json=$(curl -sSL --max-time 15 \
"https://githubusers.archivebox.io/deployed.json" 2>/dev/null)
if [ -z "$deployed_json" ] || ! echo "$deployed_json" | python3 -c "import json,sys;json.load(sys.stdin)" >/dev/null 2>&1; then
echo "No deployed.json available — skipping bootstrap"
exit 0
fi
users=$(echo "$deployed_json" \
| python3 -c "import json,sys; [print(u) for u in json.load(sys.stdin)]")
n=0
for u in $users; do
f="cloudflare/public/${u}.html"
# Don't clobber pirate.html (committed/authoritative).
[ "$u" = "pirate" ] && continue
if [ ! -s "$f" ]; then
if curl -sSL --max-time 30 --fail \
"https://githubusers.archivebox.io/${u}.html" \
-o "$f" 2>/dev/null; then
n=$((n + 1))
else
rm -f "$f"
fi
fi
done
echo "Bootstrapped $n dashboards from deployed bucket"
- name: Install gh CLI
run: |
type -p gh >/dev/null || (
sudo apt-get update -qq && sudo apt-get install -y gh
)
# gh authenticates via the GH_TOKEN environment variable (which we
# already set on each step that calls it); no explicit `gh auth login`
# step needed.
- name: Determine target users
id: targets
working-directory: .
env:
INPUT_USER: ${{ inputs.user }}
run: |
set -e
mkdir -p cloudflare/public
# Build the list of users we'll mine THIS run.
if [ -n "$INPUT_USER" ]; then
echo "Single-user mine (forced): $INPUT_USER"
# Persist new users into users.txt so future scheduled runs
# include them.
if ! grep -qiE "^${INPUT_USER}$" cloudflare/users.txt; then
echo "$INPUT_USER" >> cloudflare/users.txt
echo "added=true" >> $GITHUB_OUTPUT
fi
echo "$INPUT_USER" > /tmp/targets.txt
else
# Full mine: only mine users that don't have a deployed
# dashboard yet. Once a dashboard exists, it stays put
# until someone clicks the manual "Refresh" button (which
# dispatches with inputs.user set).
echo "Full mine of users.txt (skip already-deployed)"
: > /tmp/targets.txt
while IFS= read -r u || [ -n "$u" ]; do
u="${u%%#*}"
u="${u//[[:space:]]/}"
[ -z "$u" ] && continue
if [ -f "cloudflare/public/${u}.html" ]; then
echo " skip @$u — dashboard already deployed"
continue
fi
echo "$u" >> /tmp/targets.txt
done < cloudflare/users.txt
fi
# Pre-stage pirate's enhanced version
if [ -f stats.html ]; then
cp stats.html cloudflare/public/pirate.html
fi
# Stage users.txt as a public asset so the Worker's dynamic /
# handler can read it for the queued/mining list. Doing it here
# (before the mining loop) means every interim deploy also has
# a fresh users.txt available to the homepage.
cp cloudflare/users.txt cloudflare/public/users.txt
# Generate deployed.json — a JSON array of user logins whose
# dashboard HTML is currently in /public. Updated on every
# deploy_now() call below so the homepage stays accurate.
ls cloudflare/public/*.html 2>/dev/null \
| sed -E 's|cloudflare/public/||;s|\.html$||' \
| grep -vE '^(index|404)$' \
| python3 -c "import sys,json; print(json.dumps(sorted([l.strip() for l in sys.stdin if l.strip()])))" \
> cloudflare/public/deployed.json
echo "Targets:"
cat /tmp/targets.txt
- name: Mine each user (with live in-progress deploys)
working-directory: .
env:
NO_COLOR: "1"
GH_TOKEN: ${{ secrets.GH_MINING_TOKEN }}
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
# Posted by generate_stats.py to /api/progress so the Worker's
# loading page can render real-time phase info.
STATS_PROGRESS_TOKEN: ${{ secrets.GH_MINING_TOKEN }}
run: |
set -e
regen_manifest() {
ls cloudflare/public/*.html 2>/dev/null \
| sed -E 's|cloudflare/public/||;s|\.html$||' \
| grep -vE '^(index|404)$' \
| python3 -c "import sys,json; print(json.dumps(sorted([l.strip() for l in sys.stdin if l.strip()])))" \
> cloudflare/public/deployed.json
}
deploy_now() {
regen_manifest
(cd cloudflare && npx --yes wrangler@latest deploy --minify 2>&1 |
tail -2) || echo "::warning::interim deploy failed"
}
watch_and_deploy() {
# Watches stats_<user>.html every 30s while $1 (PID) is alive.
# Copies any updated file into the deploy dir and re-deploys so
# the live page shows partial data as mining progresses.
local pid="$1" user="$2" src="stats_${user}.html" \
dst="cloudflare/public/${user}.html" last_mtime=0
while kill -0 "$pid" 2>/dev/null; do
sleep 30
if [ -f "$src" ]; then
local mtime
mtime=$(stat -c %Y "$src" 2>/dev/null \
|| stat -f %m "$src" 2>/dev/null || echo 0)
if [ "$mtime" -gt "$last_mtime" ]; then
cp "$src" "$dst"
echo "::group::Interim deploy of @$user (live)"
deploy_now
echo "::endgroup::"
last_mtime="$mtime"
fi
fi
done
}
# Per-user wallclock cap. Some users have hundreds of repos and
# cold-mining them takes forever; bounding to 25min/user keeps
# the queue moving (partial data already deployed via the
# watcher's interim deploys).
USER_TIMEOUT=1500
while IFS= read -r user || [ -n "$user" ]; do
user="${user%%#*}"
user="${user//[[:space:]]/}"
[ -z "$user" ] && continue
[ "$user" = "pirate" ] && continue
echo "::group::Mining @$user"
# Run mining in the background; watch loop deploys partials.
timeout --kill-after=30s "$USER_TIMEOUT" python3 \
generate_stats.py --user "$user" \
--no-search-commits \
--max-api-fetches 800 &
MINE_PID=$!
watch_and_deploy "$MINE_PID" "$user" &
WATCH_PID=$!
wait "$MINE_PID" \
|| echo "::warning::mining @$user exited non-zero (timeout or error)"
# Stop the watcher and do a final deploy with the final HTML.
kill "$WATCH_PID" 2>/dev/null || true
wait "$WATCH_PID" 2>/dev/null || true
if [ -f "stats_$user.html" ]; then
cp "stats_$user.html" "cloudflare/public/$user.html"
echo "::group::Final deploy of @$user"
deploy_now
echo "::endgroup::"
fi
echo "::endgroup::"
done < /tmp/targets.txt
- name: Commit added users.txt entries
if: steps.targets.outputs.added == 'true'
working-directory: cloudflare
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add users.txt
git diff --staged --quiet || git commit -m "Add ${{ inputs.user }} to users.txt [skip ci]"
git push || echo "::warning::push failed (no commit permission?)"
- name: Final deploy
working-directory: .
run: |
# Regenerate deployed.json one last time before the final push.
ls cloudflare/public/*.html 2>/dev/null \
| sed -E 's|cloudflare/public/||;s|\.html$||' \
| grep -vE '^(index|404)$' \
| python3 -c "import sys,json; print(json.dumps(sorted([l.strip() for l in sys.stdin if l.strip()])))" \
> cloudflare/public/deployed.json
cd cloudflare && npx --yes wrangler@latest deploy
env:
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}