Skip to content

Commit 8883090

Browse files
feat: add crowdin distribution mirror
1 parent 8fe596a commit 8883090

5 files changed

Lines changed: 414 additions & 0 deletions

File tree

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
#!/usr/bin/env node
2+
/**
3+
* Syncs Crowdin distribution files from distributions.crowdin.net to a local directory.
4+
* Designed to be run from GitHub Actions and produce a static-file artifact for GitHub Pages.
5+
*
6+
* Usage:
7+
* node sync-crowdin-distribution.js
8+
*
9+
* Environment variables:
10+
* OUTPUT_DIR - Directory to write files into (default: dist-pages/crowdin-dist)
11+
*/
12+
13+
'use strict';
14+
15+
const https = require('node:https');
16+
const fs = require('node:fs');
17+
const path = require('node:path');
18+
19+
const BASE_CDN = 'https://distributions.crowdin.net';
20+
const OUTPUT_DIR = path.resolve(process.env.OUTPUT_DIR || 'dist-pages/crowdin-dist');
21+
22+
/** Number of simultaneous downloads per batch. */
23+
const CONCURRENCY = 8;
24+
25+
/**
26+
* Distribution hashes to sync.
27+
* Read from the CROWDIN_DISTRIBUTION_IDS environment variable as a
28+
* comma-separated list (e.g. "hash1,hash2"). Store the value in GitHub
29+
* project variables under the name CROWDIN_DISTRIBUTION_IDS.
30+
*/
31+
const DISTRIBUTIONS = (process.env.CROWDIN_DISTRIBUTION_IDS || '')
32+
.split(',')
33+
.map((s) => s.trim())
34+
.filter(Boolean);
35+
36+
if (DISTRIBUTIONS.length === 0) {
37+
console.error('ERROR: CROWDIN_DISTRIBUTION_IDS environment variable is not set or empty.');
38+
process.exit(1);
39+
}
40+
41+
/**
42+
* Fetches a URL, following redirects, and returns the body as a Buffer.
43+
* @param {string} url
44+
* @returns {Promise<Buffer>}
45+
*/
46+
function fetchUrl(url) {
47+
return new Promise((resolve, reject) => {
48+
https.get(url, (res) => {
49+
// Follow redirects
50+
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
51+
return fetchUrl(res.headers.location).then(resolve).catch(reject);
52+
}
53+
const chunks = [];
54+
res.on('data', (chunk) => chunks.push(chunk));
55+
res.on('end', () => {
56+
if (res.statusCode >= 400) {
57+
return reject(new Error(`HTTP ${res.statusCode} for ${url}`));
58+
}
59+
resolve(Buffer.concat(chunks));
60+
});
61+
res.on('error', reject);
62+
}).on('error', reject);
63+
});
64+
}
65+
66+
/**
67+
* Writes data to a file, creating parent directories as needed.
68+
* @param {string} filePath
69+
* @param {Buffer|string} data
70+
*/
71+
function saveFile(filePath, data) {
72+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
73+
fs.writeFileSync(filePath, data);
74+
}
75+
76+
/**
77+
* Processes an array of items in fixed-size concurrent batches.
78+
* @template T
79+
* @param {T[]} items
80+
* @param {number} batchSize
81+
* @param {(item: T) => Promise<void>} fn
82+
*/
83+
async function processInBatches(items, batchSize, fn) {
84+
for (let i = 0; i < items.length; i += batchSize) {
85+
await Promise.all(items.slice(i, i + batchSize).map(fn));
86+
}
87+
}
88+
89+
/**
90+
* Downloads all distribution files for a single hash.
91+
* @param {string} hash Distribution hash.
92+
* @returns {Promise<boolean>} true if all files were fetched without errors.
93+
*/
94+
async function syncDistribution(hash) {
95+
console.log(`\n=== Syncing distribution: ${hash} ===`);
96+
const hashDir = path.join(OUTPUT_DIR, hash);
97+
98+
// manifest.json
99+
console.log(' Fetching manifest.json...');
100+
const manifestBuf = await fetchUrl(`${BASE_CDN}/${hash}/manifest.json`);
101+
saveFile(path.join(hashDir, 'manifest.json'), manifestBuf);
102+
const manifest = JSON.parse(manifestBuf.toString('utf8'));
103+
104+
console.log(` Timestamp : ${manifest.timestamp}`);
105+
console.log(` Languages : ${(manifest.languages || []).length}`);
106+
107+
// languages.json
108+
console.log(' Fetching languages.json...');
109+
const langsBuf = await fetchUrl(`${BASE_CDN}/${hash}/languages.json`);
110+
saveFile(path.join(hashDir, 'languages.json'), langsBuf);
111+
112+
// content files
113+
const contentPaths = new Set();
114+
if (manifest.content) {
115+
for (const paths of Object.values(manifest.content)) {
116+
for (const p of paths) {
117+
contentPaths.add(p);
118+
}
119+
}
120+
}
121+
122+
const pathList = [...contentPaths];
123+
console.log(` Content files: ${pathList.length} (concurrency=${CONCURRENCY})`);
124+
125+
let fetched = 0;
126+
let failed = 0;
127+
128+
await processInBatches(pathList, CONCURRENCY, async (contentPath) => {
129+
const url = `${BASE_CDN}/${hash}${contentPath}`;
130+
const localPath = path.join(hashDir, contentPath);
131+
try {
132+
const data = await fetchUrl(url);
133+
saveFile(localPath, data);
134+
fetched++;
135+
if ((fetched + failed) % 50 === 0) {
136+
console.log(` Progress: ${fetched + failed}/${pathList.length}`);
137+
}
138+
} catch (err) {
139+
failed++;
140+
console.warn(` WARN: failed to fetch ${contentPath}: ${err.message}`);
141+
}
142+
});
143+
144+
console.log(` Result: ${fetched} fetched, ${failed} failed`);
145+
return failed === 0;
146+
}
147+
148+
async function main() {
149+
console.log('Crowdin Distribution Sync');
150+
console.log(`Output dir: ${OUTPUT_DIR}`);
151+
console.log(`Distributions: ${DISTRIBUTIONS.length}`);
152+
153+
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
154+
155+
let allOk = true;
156+
for (const hash of DISTRIBUTIONS) {
157+
try {
158+
const ok = await syncDistribution(hash);
159+
if (!ok) allOk = false;
160+
} catch (err) {
161+
console.error(`\nFATAL: Failed to sync ${hash}:`, err.message);
162+
allOk = false;
163+
}
164+
}
165+
166+
if (!allOk) {
167+
console.error('\nSync completed with errors.');
168+
process.exit(1);
169+
}
170+
console.log('\nSync complete!');
171+
}
172+
173+
main();
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
---
2+
# Syncs Crowdin distribution files from distributions.crowdin.net to a
3+
# dedicated git branch (crowdin-dist) served via jsDelivr CDN.
4+
#
5+
# proxy-translator.js fetches manifest.json, languages.json and all translation
6+
# JSON files from https://distributions.crowdin.net. Those requests count
7+
# against the LizardByte Crowdin free-tier quota, so we mirror the content
8+
# here (refreshed daily) and redirect browser fetch() calls to jsDelivr via
9+
# the interceptor in src/js/crowdin.js.
10+
#
11+
# jsDelivr CDN URL pattern:
12+
# https://cdn.jsdelivr.net/gh/LizardByte/shared-web@crowdin-dist/<hash>/…
13+
#
14+
# jsDelivr guarantees Access-Control-Allow-Origin: * on all responses, which
15+
# means no CORS plugin is required in consumer pages.
16+
17+
name: Sync Crowdin Distribution
18+
permissions: {}
19+
20+
on:
21+
schedule:
22+
# Run daily at 02:00 UTC so translations are fresh at the start of each day.
23+
- cron: '0 2 * * *'
24+
workflow_dispatch: # Allow ad-hoc manual runs
25+
26+
# Only one deployment at a time; do not cancel an in-progress run.
27+
concurrency:
28+
group: crowdin-dist-sync
29+
cancel-in-progress: false
30+
31+
jobs:
32+
sync:
33+
name: Sync distributions to crowdin-dist branch
34+
runs-on: ubuntu-latest
35+
permissions:
36+
contents: write
37+
environment:
38+
name: crowdin-dist
39+
url: ${{ github.server_url }}/${{ github.repository }}/tree/crowdin-dist
40+
if: github.repository_owner == 'LizardByte' # don't run for forks
41+
42+
steps:
43+
- name: Checkout
44+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
45+
with:
46+
token: ${{ secrets.GH_BOT_TOKEN }}
47+
48+
- name: Set up Node.js
49+
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
50+
with:
51+
node-version: 'node'
52+
53+
- name: Download Crowdin distribution files
54+
env:
55+
CROWDIN_DISTRIBUTION_IDS: ${{ vars.CROWDIN_DISTRIBUTION_IDS }}
56+
OUTPUT_DIR: /tmp/crowdin-dist
57+
run: node .github/scripts/sync-crowdin-distribution.js
58+
59+
- name: Commit and push to crowdin-dist branch
60+
env:
61+
GH_BOT_NAME: ${{ vars.GH_BOT_NAME }}
62+
GH_BOT_EMAIL: ${{ secrets.GH_BOT_EMAIL }}
63+
run: |
64+
git config user.name "${GH_BOT_NAME}"
65+
git config user.email "${GH_BOT_EMAIL}"
66+
67+
# Create an orphan branch so the branch contains only distribution
68+
# files with no history from main (keeps the branch lean).
69+
git checkout --orphan crowdin-dist
70+
71+
# Remove every file that was inherited from the main checkout.
72+
git rm -rf . --quiet
73+
74+
# Clean up any remaining untracked files / directories.
75+
git clean -fdx
76+
77+
# Populate the branch with the freshly downloaded distribution files.
78+
cp -r /tmp/crowdin-dist/. .
79+
80+
git add .
81+
82+
# Only commit when there are actual changes.
83+
if git diff --staged --quiet; then
84+
echo "No changes – distribution files are already up to date."
85+
else
86+
git commit -m "chore: sync Crowdin distributions"
87+
git push origin crowdin-dist --force
88+
fi

eslint.config.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ export default [
55
pluginJs.configs.recommended,
66
{
77
ignores: [
8+
"_readthedocs/**", // generated ReadTheDocs HTML
89
"coverage/**",
910
"dist/**",
11+
"docs/**", // generated JSDoc output
1012
],
1113
},
1214
{

src/js/crowdin.js

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,51 @@
11
const loadScript = require('./load-script');
22

3+
/**
4+
* jsDelivr CDN URL serving Crowdin distribution files from the crowdin-dist
5+
* git branch. jsDelivr unconditionally sets Access-Control-Allow-Origin: *,
6+
* so cross-origin fetch() calls succeed without any browser plugin.
7+
* The branch is refreshed daily by the "Sync Crowdin Distribution" workflow.
8+
* Structure mirrors https://distributions.crowdin.net/<hash>/… exactly.
9+
* @type {string}
10+
*/
11+
const CROWDIN_DIST_MIRROR = 'https://cdn.jsdelivr.net/gh/LizardByte/shared-web@crowdin-dist';
12+
13+
/**
14+
* Monkey-patches globalThis.fetch to redirect Crowdin distribution requests to
15+
* the self-hosted GitHub Pages mirror.
16+
*
17+
* Must be called BEFORE proxy-translator.js is loaded so that every fetch()
18+
* the script makes is already intercepted.
19+
*
20+
* Idempotent – installs the interceptor at most once per page.
21+
*/
22+
function _installCrowdinFetchInterceptor() {
23+
if (typeof globalThis.fetch !== 'function') return;
24+
if (globalThis._crowdinMirrorInstalled) return;
25+
globalThis._crowdinMirrorInstalled = true;
26+
27+
const _origFetch = globalThis.fetch.bind(globalThis);
28+
29+
globalThis.fetch = function crowdinMirrorFetch(url, options) {
30+
if (typeof url === 'string') {
31+
let parsed;
32+
try {
33+
parsed = new URL(url);
34+
} catch {
35+
// Not a valid absolute URL – pass through unchanged.
36+
}
37+
// Use exact hostname comparison to avoid prefix-match bypasses
38+
// (e.g. distributions.crowdin.net.evil.com) that would be flagged
39+
// by incomplete URL sanitisation checks.
40+
if (parsed?.protocol === 'https:' && parsed.hostname === 'distributions.crowdin.net') {
41+
const mirroredUrl = CROWDIN_DIST_MIRROR + parsed.pathname + parsed.search + parsed.hash;
42+
return _origFetch(mirroredUrl, options);
43+
}
44+
}
45+
return _origFetch(url, options);
46+
};
47+
}
48+
349
/**
450
* Initializes Crowdin translation widget based on project and UI platform.
551
* @param {string} project - Project name ('LizardByte' or 'LizardByte-docs').
@@ -16,6 +62,10 @@ function initCrowdIn(project = 'LizardByte', platform = null) {
1662
return;
1763
}
1864

65+
// Redirect distribution CDN requests to our self-hosted GitHub Pages mirror
66+
// before the script is even loaded so every fetch() it makes is intercepted.
67+
_installCrowdinFetchInterceptor();
68+
1969
loadScript('https://website-translator.app.crowdin.net/assets/proxy-translator.js', function() {
2070
// Configure base settings based on project
2171
const projectSettings = {

0 commit comments

Comments
 (0)