Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions .github/workflows/keywords.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
name: Keywords

on:
repository_dispatch:
workflow_dispatch:
schedule:
# Runs at 12:00 every Wednesday (Beijing time, UTC+8)
- cron: "0 4 * * 3"

env:
# Branches to check (docs branch and TiDB parser branch share the same name).
# Edit this space-separated list to add or remove branches.
CHECK_BRANCHES: "master release-8.5"

permissions:
contents: read
issues: write

jobs:
check-keywords:
if: github.repository == 'pingcap/docs-cn'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"

- name: Install dependencies
run: pip install requests

- name: Check keywords for all branches
id: check
run: |
mkdir -p /tmp/kw-results
has_failure=false

for branch in $CHECK_BRANCHES; do
git checkout "$branch" --quiet

set +e
output=$(python ./scripts/check-keywords.py \
--download_from_url \
--parser_url "https://github.com/pingcap/tidb/raw/refs/heads/${branch}/pkg/parser/parser.y" 2>&1)
exit_code=$?
set -e

if [ $exit_code -eq 0 ]; then
echo "pass" > "/tmp/kw-results/${branch}.status"
elif echo "$output" | grep -q "Failed to download parser file"; then
echo "::warning::Failed to download parser.y for branch ${branch}. Skipping."
echo "skip" > "/tmp/kw-results/${branch}.status"
else
has_failure=true
echo "fail" > "/tmp/kw-results/${branch}.status"
echo "$output" | grep -v "^Fetching " > "/tmp/kw-results/${branch}.errors"
fi
done

echo "has_failure=$has_failure" >> "$GITHUB_OUTPUT"

- name: Build issue report
if: steps.check.outputs.has_failure == 'true'
run: |
{
echo "# Weekly Keywords Check Report"
echo
echo "## Summary"
echo

for branch in $CHECK_BRANCHES; do
status=$(cat "/tmp/kw-results/${branch}.status")
case "$status" in
pass) echo "- **${branch}** — Keywords check result: ✅ pass" ;;
skip) echo "- **${branch}** — Keywords check result: ⚠️ skipped (download failed)" ;;
fail) echo "- **${branch}** — Keywords check result: ❌ mismatch" ;;
esac
done

echo
echo "---"
echo

for branch in $CHECK_BRANCHES; do
status=$(cat "/tmp/kw-results/${branch}.status")
if [ "$status" = "fail" ]; then
error_count=$(wc -l < "/tmp/kw-results/${branch}.errors" | tr -d ' ')
echo "## \`${branch}\` — ${error_count} issue(s)"
echo
echo "Comparing [\`keywords.md\`]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/blob/${branch}/keywords.md)"
echo "against [TiDB parser (\`${branch}\`)](https://github.com/pingcap/tidb/blob/${branch}/pkg/parser/parser.y):"
echo
echo '```'
cat "/tmp/kw-results/${branch}.errors"
echo '```'
echo
fi
done

echo "## How to fix"
echo
echo "Update \`keywords.md\` on the affected branch to match the current TiDB parser keywords."
echo "See [check-keywords.py]($GITHUB_SERVER_URL/$GITHUB_REPOSITORY/blob/master/scripts/check-keywords.py) for details."
echo
echo "---"
echo "**Run date:** $(date -u '+%Y-%m-%d %H:%M UTC')"
echo "**Workflow run:** $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
} > keywords-report.md

- name: Create issue
if: steps.check.outputs.has_failure == 'true'
uses: peter-evans/create-issue-from-file@v6
with:
title: "Weekly keywords check: mismatches found"
content-filepath: keywords-report.md
84 changes: 84 additions & 0 deletions scripts/check-keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/python3
import argparse
import requests
import re
import sys
from pathlib import Path

aparser = argparse.ArgumentParser()
aparser.add_argument(
"--parser_file", default="../tidb/pkg/parser/parser.y", help="Path to parser.y"
)
aparser.add_argument(
"--parser_url",
default="https://github.com/pingcap/tidb/raw/refs/heads/master/pkg/parser/parser.y",
help="URL to parser.y",
)
aparser.add_argument("--download_from_url", action="store_true")
args = aparser.parse_args()

if args.download_from_url:
try:
print(f"Fetching {args.parser_url}")
r = requests.get(args.parser_url, timeout=30)
r.raise_for_status()
lines = r.text.splitlines()
except requests.RequestException as e:
sys.exit(f"Failed to download parser file: {e}")
else:
parser = Path(args.parser_file)
if not parser.exists():
sys.exit(f"{parser} doesn't exist")
lines = parser.read_text(encoding="utf-8").splitlines()

kwdocs = Path("keywords.md")
if not kwdocs.exists():
sys.exit(f"{kwdocs} doesn't exist")

keywords = kwdocs.read_text()

errors = 0
section = "Unknown"
for line in lines:
if line == "":
section = "NotKeywordToken"

elif line.find("The following tokens belong to ReservedKeyword") >= 0:
section = "ReservedKeyword"

elif line.find("The following tokens belong to UnReservedKeyword") >= 0:
section = "UnReservedKeyword"

elif line.find("The following tokens belong to TiDBKeyword") >= 0:
section = "TiDBKeyword"

elif line.find("The following tokens belong to NotKeywordToken") >= 0:
section = "NotKeywordToken"

if section == "ReservedKeyword":
if m := re.match(r'^\t\w+\s+"(\w+)"$', line):
kw = m.groups()[0]
if not (
kwm := re.search(f"^- {kw} \\((R|R-Window)\\)$", keywords, re.MULTILINE)
):
if kwm := re.search(f"^- {kw}$", keywords, re.MULTILINE):
print(f"Reserved keyword not labeled as reserved: {kw}")
else:
print(f"Missing docs for reserved keyword: {kw}")
errors += 1

if section in ["UnReservedKeyword", "TiDBKeyword"]:
if m := re.match(r'^\t\w+\s+"(\w+)"$', line):
kw = m.groups()[0]
if not (kwm := re.search(f"^- {kw}$", keywords, re.MULTILINE)):
if kwm := re.search(
f"^- {kw} \\((R|R-Window)\\)$", keywords, re.MULTILINE
):
print(
f"Non-reserved keyword from {section} labeled as reserved: {kw}"
)
else:
print(f"Missing docs for non-reserved keyword from {section}: {kw}")
errors += 1

sys.exit(1 if errors else 0)
Loading