pingcap · ti-chi-bot · May 28, 2026 · May 27, 2026
diff --git a/.github/workflows/keywords.yaml b/.github/workflows/keywords.yaml
@@ -0,0 +1,119 @@
+name: Keywords
+
+on:
+  repository_dispatch:
+  workflow_dispatch:
+  schedule:
+    # Runs at 12:00 every Wednesday (Beijing time, UTC+8)
+    - cron: "0 4 * * 3"
+
+env:
+  # Branches to check (docs branch and TiDB parser branch share the same name).
+  # Edit this space-separated list to add or remove branches.
+  CHECK_BRANCHES: "master release-8.5"
+
+permissions:
+  contents: read
+  issues: write
+
+jobs:
+  check-keywords:
+    if: github.repository == 'pingcap/docs-cn'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+
+      - name: Install dependencies
+        run: pip install requests
+
+      - name: Check keywords for all branches
+        id: check
+        run: |
+          mkdir -p /tmp/kw-results
+          has_failure=false
+
+          for branch in $CHECK_BRANCHES; do
+            git checkout "$branch" --quiet
+
+            set +e
+            output=$(python ./scripts/check-keywords.py \
+              --download_from_url \
+              --parser_url "https://github.com/pingcap/tidb/raw/refs/heads/${branch}/pkg/parser/parser.y" 2>&1)
+            exit_code=$?
+            set -e
+
+            if [ $exit_code -eq 0 ]; then
+              echo "pass" > "/tmp/kw-results/${branch}.status"
+            elif echo "$output" | grep -q "Failed to download parser file"; then
+              echo "::warning::Failed to download parser.y for branch ${branch}. Skipping."
+              echo "skip" > "/tmp/kw-results/${branch}.status"
+            else
+              has_failure=true
+              echo "fail" > "/tmp/kw-results/${branch}.status"
+              echo "$output" | grep -v "^Fetching " > "/tmp/kw-results/${branch}.errors"
+            fi
+          done
+
+          echo "has_failure=$has_failure" >> "$GITHUB_OUTPUT"
+
+      - name: Build issue report
+        if: steps.check.outputs.has_failure == 'true'
+        run: |
+          {
+            echo "# Weekly Keywords Check Report"
+            echo
+            echo "## Summary"
+            echo
+
+            for branch in $CHECK_BRANCHES; do
+              status=$(cat "/tmp/kw-results/${branch}.status")
+              case "$status" in
+                pass) echo "- **${branch}** — Keywords check result: ✅ pass" ;;
+                skip) echo "- **${branch}** — Keywords check result: ⚠️ skipped (download failed)" ;;
+                fail) echo "- **${branch}** — Keywords check result: ❌ mismatch" ;;
+              esac
+            done
+
+            echo
+            echo "---"
+            echo
+
+            for branch in $CHECK_BRANCHES; do
+              status=$(cat "/tmp/kw-results/${branch}.status")
+              if [ "$status" = "fail" ]; then
+                error_count=$(wc -l < "/tmp/kw-results/${branch}.errors" | tr -d ' ')
+                echo "## \`${branch}\` — ${error_count} issue(s)"
+                echo
+                echo "Comparing [\`keywords.md\`]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/blob/${branch}/keywords.md)"
+                echo "against [TiDB parser (\`${branch}\`)](https://github.com/pingcap/tidb/blob/${branch}/pkg/parser/parser.y):"
+                echo
+                echo '```'
+                cat "/tmp/kw-results/${branch}.errors"
+                echo '```'
+                echo
+              fi
+            done
+
+            echo "## How to fix"
+            echo
+            echo "Update \`keywords.md\` on the affected branch to match the current TiDB parser keywords."
+            echo "See [check-keywords.py]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/blob/master/scripts/check-keywords.py) for details."
+            echo
+            echo "---"
+            echo "**Run date:** $(date -u '+%Y-%m-%d %H:%M UTC')"
+            echo "**Workflow run:** $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+          } > keywords-report.md
+
+      - name: Create issue
+        if: steps.check.outputs.has_failure == 'true'
+        uses: peter-evans/create-issue-from-file@v6
+        with:
+          title: "Weekly keywords check: mismatches found"
+          content-filepath: keywords-report.md
diff --git a/scripts/check-keywords.py b/scripts/check-keywords.py
@@ -0,0 +1,84 @@
+#!/bin/python3
+import argparse
+import requests
+import re
+import sys
+from pathlib import Path
+
+aparser = argparse.ArgumentParser()
+aparser.add_argument(
+    "--parser_file", default="../tidb/pkg/parser/parser.y", help="Path to parser.y"
+)
+aparser.add_argument(
+    "--parser_url",
+    default="https://github.com/pingcap/tidb/raw/refs/heads/master/pkg/parser/parser.y",
+    help="URL to parser.y",
+)
+aparser.add_argument("--download_from_url", action="store_true")
+args = aparser.parse_args()
+
+if args.download_from_url:
+    try:
+        print(f"Fetching {args.parser_url}")
+        r = requests.get(args.parser_url, timeout=30)
+        r.raise_for_status()
+        lines = r.text.splitlines()
+    except requests.RequestException as e:
+        sys.exit(f"Failed to download parser file: {e}")
+else:
+    parser = Path(args.parser_file)
+    if not parser.exists():
+        sys.exit(f"{parser} doesn't exist")
+    lines = parser.read_text(encoding="utf-8").splitlines()
+
+kwdocs = Path("keywords.md")
+if not kwdocs.exists():
+    sys.exit(f"{kwdocs} doesn't exist")
+
+keywords = kwdocs.read_text()
+
+errors = 0
+section = "Unknown"
+for line in lines:
+    if line == "":
+        section = "NotKeywordToken"
+
+    elif line.find("The following tokens belong to ReservedKeyword") >= 0:
+        section = "ReservedKeyword"
+
+    elif line.find("The following tokens belong to UnReservedKeyword") >= 0:
+        section = "UnReservedKeyword"
+
+    elif line.find("The following tokens belong to TiDBKeyword") >= 0:
+        section = "TiDBKeyword"
+
+    elif line.find("The following tokens belong to NotKeywordToken") >= 0:
+        section = "NotKeywordToken"
+
+    if section == "ReservedKeyword":
+        if m := re.match(r'^\t\w+\s+"(\w+)"$', line):
+            kw = m.groups()[0]
+            if not (
+                kwm := re.search(f"^- {kw} \\((R|R-Window)\\)$", keywords, re.MULTILINE)
+            ):
+                if kwm := re.search(f"^- {kw}$", keywords, re.MULTILINE):
+                    print(f"Reserved keyword not labeled as reserved: {kw}")
+                else:
+                    print(f"Missing docs for reserved keyword: {kw}")
+                errors += 1
+
+    if section in ["UnReservedKeyword", "TiDBKeyword"]:
+        if m := re.match(r'^\t\w+\s+"(\w+)"$', line):
+            kw = m.groups()[0]
+            if not (kwm := re.search(f"^- {kw}$", keywords, re.MULTILINE)):
+                if kwm := re.search(
+                    f"^- {kw} \\((R|R-Window)\\)$", keywords, re.MULTILINE
+                ):
+                    print(
+                        f"Non-reserved keyword from {section} labeled as reserved: {kw}"
+                    )
+                else:
+                    print(f"Missing docs for non-reserved keyword from {section}: {kw}")
+                errors += 1
+
+sys.exit(1 if errors else 0)