Skip to content

Commit 585ea52

Browse files
Daniel Jonesclaude
andcommitted
feat(snippets): regenerate language-table.jsx from vended data
Introduces scripts/generate_language_table.py, which reads the JSON files vended under data/v3-languages/ plus a manual data/translation-memory.json (TM is not exposed via the API) and rewrites the languageData array in snippets/language-table.jsx between new BEGIN/END GENERATED markers. The surrounding JSX (filters, sorting, table rendering) is unchanged. Also wires the generator into the hourly refresh workflow so future upstream changes flow straight to the rendered language table in the same PR. The first run picks up several genuine API changes that the manual data had drifted from (e.g. tag_handling support added to many languages, new de-DE / fr-FR generic variants, current beta status of de-CH / fr-CA). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent add5c86 commit 585ea52

4 files changed

Lines changed: 297 additions & 107 deletions

File tree

.github/workflows/refresh-v3-languages.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ concurrency:
1717

1818
env:
1919
REFRESH_BRANCH: chore/refresh-v3-languages
20-
COMMIT_PATHS: data/v3-languages
20+
COMMIT_PATHS: data/v3-languages snippets/language-table.jsx
2121

2222
jobs:
2323
refresh:
@@ -58,6 +58,9 @@ jobs:
5858
DEEPL_AUTH_KEY: ${{ secrets.DEEPL_API_KEY }}
5959
run: python3 scripts/fetch_v3_languages.py
6060

61+
- name: Regenerate language-table.jsx from vended data
62+
run: python3 scripts/generate_language_table.py
63+
6164
- name: Detect content changes
6265
id: diff
6366
run: |
@@ -101,7 +104,7 @@ jobs:
101104
--base main \
102105
--head "$REFRESH_BRANCH" \
103106
--title "chore(v3-languages): refresh vended responses" \
104-
--body "Automated refresh of \`data/v3-languages/\` from \`https://api.deepl.com/v3/languages\`, opened by the \`refresh-v3-languages\` workflow.
107+
--body "Automated refresh of \`data/v3-languages/\` from \`https://api.deepl.com/v3/languages\`, with \`snippets/language-table.jsx\` regenerated from the new responses. Opened by the \`refresh-v3-languages\` workflow.
105108
106109
Subsequent runs append new commits to this PR when the responses change again, so the diff against \`main\` represents the cumulative update. Review the latest state and merge when ready.
107110

data/translation-memory.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"_comment": "Languages that support Translation Memory in the DeepL platform. Translation Memory is not exposed via /v3/languages, so this list is maintained manually. Codes use the same casing as the generated language-table.jsx (uppercase, hyphen-separated). See docs/learning-how-tos/examples-and-guides/how-to-use-translation-memories.",
3+
"languages": [
4+
"DE",
5+
"EN",
6+
"EN-GB",
7+
"EN-US",
8+
"ES",
9+
"ES-419",
10+
"FR",
11+
"IT",
12+
"JA",
13+
"KO",
14+
"ZH",
15+
"ZH-HANS"
16+
]
17+
}

scripts/generate_language_table.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#!/usr/bin/env python3
2+
"""Regenerate the inline language data inside snippets/language-table.jsx.
3+
4+
Reads the vended JSON under data/v3-languages/ plus the manual
5+
translation-memory list at data/translation-memory.json, then rewrites the
6+
`languageData` array between the BEGIN/END GENERATED markers in
7+
snippets/language-table.jsx. Everything else in the JSX file is left
8+
alone.
9+
10+
Source-of-truth mappings (per language code returned by /v3/languages):
11+
12+
code uppercased BCP 47 from translate_text.json (de-CH -> DE-CH)
13+
name translate_text "name"
14+
translation true if present in translate_text.json
15+
isVariant translate_text usable_as_source=false and usable_as_target=true
16+
isBeta translate_text status != "stable"
17+
glossaries translate_text features contain "glossary"
18+
tagHandling translate_text features contain "tag_handling"
19+
textImprovement language is present in write.json
20+
styleRules translate_text features contain "style_rules"
21+
translationMemory code is in data/translation-memory.json
22+
"""
23+
from __future__ import annotations
24+
25+
import argparse
26+
import json
27+
import sys
28+
from pathlib import Path
29+
30+
REPO_ROOT = Path(__file__).resolve().parent.parent
31+
DATA_DIR = REPO_ROOT / "data" / "v3-languages"
32+
TM_FILE = REPO_ROOT / "data" / "translation-memory.json"
33+
JSX_FILE = REPO_ROOT / "snippets" / "language-table.jsx"
34+
35+
BEGIN_MARKER = " // BEGIN GENERATED: languageData (do not edit; run scripts/generate_language_table.py)"
36+
END_MARKER = " // END GENERATED"
37+
38+
39+
def load_json(path: Path) -> object:
40+
with path.open("r", encoding="utf-8") as f:
41+
return json.load(f)
42+
43+
44+
def upper_code(lang: str) -> str:
45+
return lang.upper()
46+
47+
48+
def build_rows(tm_codes: set[str]) -> list[dict]:
49+
translate_text = load_json(DATA_DIR / "translate_text.json")
50+
write_langs = {entry["lang"] for entry in load_json(DATA_DIR / "write.json")}
51+
52+
rows: list[dict] = []
53+
for entry in translate_text:
54+
lang = entry["lang"]
55+
code = upper_code(lang)
56+
features = entry.get("features", {})
57+
row = {
58+
"code": code,
59+
"name": entry["name"],
60+
"translation": True,
61+
"isVariant": not entry["usable_as_source"] and entry["usable_as_target"],
62+
"glossaries": "glossary" in features,
63+
"tagHandling": "tag_handling" in features,
64+
"textImprovement": lang in write_langs,
65+
"translationMemory": code in tm_codes,
66+
"styleRules": "style_rules" in features,
67+
}
68+
if entry.get("status") != "stable":
69+
row["isBeta"] = True
70+
rows.append(row)
71+
72+
rows.sort(key=lambda r: (not r["isVariant"], r["code"]))
73+
# Existing layout grouped non-variants first, then variants. Preserve
74+
# a stable, readable order: base languages alphabetically, then variants
75+
# alphabetically.
76+
rows.sort(key=lambda r: (r["isVariant"], r["code"]))
77+
return rows
78+
79+
80+
def js_literal(value: object) -> str:
81+
if value is True:
82+
return "true"
83+
if value is False:
84+
return "false"
85+
if isinstance(value, str):
86+
escaped = value.replace("\\", "\\\\").replace("'", "\\'")
87+
return f"'{escaped}'"
88+
raise TypeError(f"Unsupported literal: {value!r}")
89+
90+
91+
def format_row(row: dict) -> str:
92+
# Key order is fixed so diffs remain readable.
93+
ordered_keys = [
94+
"code",
95+
"name",
96+
"translation",
97+
"isVariant",
98+
"isBeta",
99+
"glossaries",
100+
"tagHandling",
101+
"textImprovement",
102+
"translationMemory",
103+
"styleRules",
104+
]
105+
parts = [f"{k}: {js_literal(row[k])}" for k in ordered_keys if k in row]
106+
return " { " + ", ".join(parts) + " },"
107+
108+
109+
def render_block(rows: list[dict]) -> str:
110+
lines = [
111+
BEGIN_MARKER,
112+
" const languageData = [",
113+
*(format_row(r) for r in rows),
114+
" ]",
115+
END_MARKER,
116+
]
117+
return "\n".join(lines)
118+
119+
120+
def replace_block(jsx: str, new_block: str) -> str:
121+
if BEGIN_MARKER in jsx and END_MARKER in jsx:
122+
before, _, rest = jsx.partition(BEGIN_MARKER)
123+
_, _, after = rest.partition(END_MARKER)
124+
return before + new_block + after
125+
126+
# First-time install: find the existing `const languageData = [ ... ]`
127+
# block (up to and including its closing `]`) and replace it.
128+
needle = " // Language data with individual feature support\n const languageData = ["
129+
start = jsx.find(needle)
130+
if start == -1:
131+
raise RuntimeError(
132+
"Could not locate languageData block to replace. Insert the BEGIN/END "
133+
"GENERATED markers manually before running this script."
134+
)
135+
end_anchor = "\n ]\n"
136+
end = jsx.find(end_anchor, start)
137+
if end == -1:
138+
raise RuntimeError("Could not find closing ']' of languageData array.")
139+
return jsx[:start] + new_block + jsx[end + len(end_anchor) :]
140+
141+
142+
def main() -> int:
143+
parser = argparse.ArgumentParser(description=__doc__)
144+
parser.add_argument(
145+
"--check",
146+
action="store_true",
147+
help="Exit 1 if the file would change instead of writing.",
148+
)
149+
args = parser.parse_args()
150+
151+
tm = load_json(TM_FILE)
152+
if not isinstance(tm, dict) or "languages" not in tm:
153+
print(f"error: {TM_FILE} is missing the 'languages' key", file=sys.stderr)
154+
return 2
155+
tm_codes = {c.upper() for c in tm["languages"]}
156+
157+
rows = build_rows(tm_codes)
158+
new_block = render_block(rows)
159+
160+
current = JSX_FILE.read_text(encoding="utf-8")
161+
updated = replace_block(current, new_block)
162+
163+
if updated == current:
164+
print(f"{JSX_FILE}: up to date")
165+
return 0
166+
if args.check:
167+
print(f"{JSX_FILE}: out of date", file=sys.stderr)
168+
return 1
169+
JSX_FILE.write_text(updated, encoding="utf-8")
170+
print(f"{JSX_FILE}: regenerated ({len(rows)} languages)")
171+
return 0
172+
173+
174+
if __name__ == "__main__":
175+
raise SystemExit(main())

0 commit comments

Comments
 (0)