executablebooks · chrisjsewell · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/docs/index.md b/docs/index.md
@@ -43,6 +43,18 @@ html_string = md.render("some *Markdown*")
 .. autofunction:: mdit_py_plugins.front_matter.front_matter_plugin
 ```
 
+## GFM (GitHub Flavored Markdown)
+
+```{eval-rst}
+.. autofunction:: mdit_py_plugins.gfm.gfm_plugin
+```
+
+## GFM Autolinks
+
+```{eval-rst}
+.. autofunction:: mdit_py_plugins.gfm_autolink.gfm_autolink_plugin
+```
+
 ## Footnotes
 
 ```{eval-rst}

diff --git a/mdit_py_plugins/gfm/__init__.py b/mdit_py_plugins/gfm/__init__.py
@@ -0,0 +1,102 @@
+"""Composite GFM (GitHub Flavored Markdown) plugin.
+
+Enables a set of plugins that together approximate GitHub's Markdown rendering:
+
+- Tables (built-in)
+- Strikethrough with single and double tildes (built-in)
+- Autolinks (gfm_autolink plugin)
+- Task lists (built-in, markdown-it-py >= 4.1.0)
+- Alerts (built-in, markdown-it-py >= 4.1.0)
+- Footnotes (``[^label]`` references and definitions)
+
+Optional extras:
+
+- Dollar math (``$...$`` / ``$$...$$``)
+- Front matter (YAML)
+
+.. note::
+   Tag filtering (disallowed raw HTML tags) is not yet implemented.
+
+.. seealso::
+   - `GitHub Flavored Markdown Spec <https://github.github.com/gfm/>`__
+   - `GitHub basic formatting syntax
+     <https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax>`__
+
+.. versionadded:: 0.5.0
+
+Requires markdown-it-py >= 4.1.0.
+"""
+
+from __future__ import annotations
+
+from functools import lru_cache
+
+from markdown_it import MarkdownIt
+from markdown_it import __version__ as _mdit_version
+
+from mdit_py_plugins.dollarmath import dollarmath_plugin
+from mdit_py_plugins.footnote import footnote_plugin
+from mdit_py_plugins.front_matter import front_matter_plugin
+from mdit_py_plugins.gfm_autolink import gfm_autolink_plugin
+
+__all__ = ("gfm_plugin",)
+
+_MIN_VERSION = (4, 1, 0)
+
+
+@lru_cache(maxsize=8)
+def _parse_version(v: str) -> tuple[int, ...]:
+    """Parse a version string like '4.1.0' into a tuple of ints."""
+    return tuple(int(x) for x in v.split(".")[:3])
+
+
+def gfm_plugin(
+    md: MarkdownIt,
+    *,
+    dollarmath: bool = False,
+    front_matter: bool = False,
+    tasklists_editable: bool = False,
+) -> None:
+    """Enable GFM-like rendering.
+
+    Starts from the current parser configuration and enables the GFM
+    components on top.
+
+    :param dollarmath: Enable dollar-delimited math (``$...$``, ``$$...$$``).
+    :param front_matter: Enable YAML front matter (``---``).
+    :param tasklists_editable: If True, rendered task list checkboxes are not
+        disabled (i.e. they are interactive).
+    """
+    if _parse_version(_mdit_version) < _MIN_VERSION:
+        raise RuntimeError(
+            f"gfm_plugin requires markdown-it-py >= {'.'.join(str(x) for x in _MIN_VERSION)} "
+            f"(installed: {_mdit_version})"
+        )
+
+    # Enable table and strikethrough rules (built into markdown-it-py)
+    md.enable("table")
+    md.enable("strikethrough")
+
+    # GFM options available in markdown-it-py >= 4.1.0
+    md.options["tasklists"] = True
+    md.options["tasklists_editable"] = tasklists_editable
+    md.options["alerts"] = True
+    md.options["strikethrough_single_tilde"] = True
+    # GFM autolinks
+    md.use(gfm_autolink_plugin)
+
+    # Footnotes (inline footnotes ^[...] are not part of GFM)
+    md.use(footnote_plugin, inline=False)
+
+    # Dollar math (inline $...$ and block $$...$$)
+    if dollarmath:
+        md.use(dollarmath_plugin, allow_blank_lines=False)
+
+    # TODO: Tag filter — replace leading `<` with `&lt;` for disallowed raw
+    # HTML tags: <title>, <textarea>, <style>, <xmp>, <iframe>, <noembed>,
+    # <noframes>, <script>, <plaintext>.
+    # See https://github.github.com/gfm/#disallowed-raw-html-extension-
+
+    # Optional plugins
+    if front_matter:
+        md.use(front_matter_plugin)
diff --git a/mdit_py_plugins/gfm_autolink/__init__.py b/mdit_py_plugins/gfm_autolink/__init__.py
@@ -0,0 +1,15 @@
+"""GFM autolink extension plugin for markdown-it-py.
+
+Implements the `GFM autolink extension
+<https://github.github.com/gfm/#autolinks-extension->`_,
+which recognises bare URLs (``http://``, ``https://``, ``www.``),
+protocol links (``mailto:``, ``xmpp:``),
+and bare email addresses without requiring angle brackets.
+
+Ported from the Rust crate
+`markdown_it_autolink <https://github.com/markdown-it-rust/markdown-it-plugins.rs>`_.
+"""
+
+from .index import gfm_autolink_plugin
+
+__all__ = ("gfm_autolink_plugin",)
diff --git a/mdit_py_plugins/gfm_autolink/_match.py b/mdit_py_plugins/gfm_autolink/_match.py
@@ -0,0 +1,250 @@
+"""URL / email matching helpers for the GFM autolink extension.
+
+Ported from the Rust ``gfm_autolinks`` crate.
+"""
+
+from __future__ import annotations
+
+import unicodedata
+
+# ---------------------------------------------------------------------------
+# Character classification helpers
+# ---------------------------------------------------------------------------
+
+_VALID_PREV_CHARS = frozenset(" \t\r\n*_~(")
+
+
+def check_prev(ch: str) -> bool:
+    """Return ``True`` if *ch* is a valid preceding character for an autolink."""
+    return ch in _VALID_PREV_CHARS
+
+
+def _is_valid_hostchar(ch: str) -> bool:
+    """Return ``True`` if *ch* is valid inside a domain label (not whitespace/punctuation)."""
+    if ch.isspace():
+        return False
+    cat = unicodedata.category(ch)
+    # Unicode punctuation categories: Pc, Pd, Pe, Pf, Pi, Po, Ps
+    return not cat.startswith("P")
+
+
+# Characters that terminate a URL (before autolink_delim trimming).
+_SPACE_CHARS = frozenset(" \t\r\n\x00\x0b\x0c")
+
+
+def _isspace(ch: str) -> bool:
+    return ch in _SPACE_CHARS
+
+
+_LINK_END_ASSORTMENT = frozenset("?!.,:*_~'\"[]")
+
+
+def _autolink_delim(data: str, link_end: int) -> int:
+    """Trim trailing punctuation from a URL according to GFM rules."""
+    # Truncate at first '<'
+    for i, ch in enumerate(data[:link_end]):
+        if ch == "<":
+            link_end = i
+            break
+
+    while link_end > 0:
+        cclose = data[link_end - 1]
+
+        copen = "(" if cclose == ")" else None
+
+        if cclose in _LINK_END_ASSORTMENT:
+            link_end -= 1
+        elif cclose == ";":
+            new_end = link_end - 2
+            while new_end > 0 and data[new_end].isalpha():
+                new_end -= 1
+            if new_end < link_end - 2 and data[new_end] == "&":
+                link_end = new_end
+            else:
+                link_end -= 1
+        elif copen is not None:
+            opening = data[:link_end].count(copen)
+            closing = data[:link_end].count(cclose)
+            if closing <= opening:
+                break
+            link_end -= 1
+        else:
+            break
+
+    return link_end
+
+
+# ---------------------------------------------------------------------------
+# Domain validation
+# ---------------------------------------------------------------------------
+
+
+def _check_domain(data: str, allow_short: bool) -> int | None:
+    """Validate a domain name and return the length consumed, or ``None``."""
+    if not data:
+        return None
+
+    np = 0
+    uscore1 = 0
+    uscore2 = 0
+
+    for i, ch in enumerate(data):
+        if ch == "_":
+            uscore2 += 1
+        elif ch == ".":
+            uscore1 = uscore2
+            uscore2 = 0
+            np += 1
+        elif not _is_valid_hostchar(ch) and ch != "-":
+            if uscore1 == 0 and uscore2 == 0 and (allow_short or np > 0):
+                return i
+            return None
+        # else: valid hostchar or '-'
+
+    if (uscore1 > 0 or uscore2 > 0) and np <= 10:
+        return None
+    if allow_short or np > 0:
+        return len(data)
+    return None
+
+
+# ---------------------------------------------------------------------------
+# www matching
+# ---------------------------------------------------------------------------
+
+_EMAIL_OK = frozenset(".+-_")
+
+
+def match_www(text: str) -> tuple[str, int] | None:
+    """Match a bare ``www.`` URL at the start of *text*.
+
+    Returns ``(url_with_scheme, char_count)`` or ``None``.
+    """
+    if not text.startswith("www."):
+        return None
+
+    link_end = _check_domain(text[4:], False)
+    if link_end is None:
+        return None
+    # link_end is offset from position 4
+    link_end += 4
+
+    # extend to the end of non-space characters
+    while link_end < len(text) and not _isspace(text[link_end]):
+        link_end += 1
+
+    link_end = _autolink_delim(text, link_end)
+
+    matched = text[:link_end]
+    url = "http://" + matched
+    return url, len(matched)
+
+
+# ---------------------------------------------------------------------------
+# http(s):// matching
+# ---------------------------------------------------------------------------
+
+
+def match_http(text: str) -> tuple[str, int] | None:
+    """Match an ``http://`` or ``https://`` URL at the start of *text*.
+
+    Returns ``(url, char_count)`` or ``None``.
+    """
+    if text.startswith("http://"):
+        prefix_len = 7
+    elif text.startswith("https://"):
+        prefix_len = 8
+    else:
+        return None
+
+    link_end = _check_domain(text[prefix_len:], True)
+    if link_end is None:
+        return None
+    link_end += prefix_len
+
+    while link_end < len(text) and not _isspace(text[link_end]):
+        link_end += 1
+
+    link_end = _autolink_delim(text, link_end)
+
+    url = text[:link_end]
+    return url, len(url)
+
+
+# ---------------------------------------------------------------------------
+# Email matching
+# ---------------------------------------------------------------------------
+
+
+def match_email(text: str) -> tuple[str, int] | None:
+    """Match an email address (optionally prefixed by ``mailto:``/``xmpp:``)."""
+    pos = 0
+    protocol: str | None = None
+    if text.startswith("mailto:"):
+        protocol = "mailto"
+        pos = 7
+    elif text.startswith("xmpp:"):
+        protocol = "xmpp"
+        pos = 5
+
+    return match_any_email(text, pos, protocol)
+
+
+def match_any_email(
+    text: str, pos: int, protocol: str | None
+) -> tuple[str, int] | None:
+    """Match an email address in *text* starting the local-part scan at *pos*.
+
+    *protocol* is ``"mailto"``, ``"xmpp"``, or ``None`` (bare address).
+    Returns ``(url, char_count)`` or ``None``.
+    """
+    size = len(text)
+
+    # scan local part (before @)
+    start_pos = pos
+    while pos < size:
+        ch = text[pos]
+        if ch.isascii() and (ch.isalnum() or ch in _EMAIL_OK):
+            pos += 1
+            continue
+        if ch == "@":
+            break
+        return None
+
+    if pos == start_pos:
+        return None
+
+    # scan domain (after @)
+    link_end = pos + 1
+    np = 0
+    num_slash = 0
+
+    while link_end < size:
+        ch = text[link_end]
+        if ch.isascii() and ch.isalnum():
+            pass
+        elif ch == "@":
+            if protocol != "xmpp":
+                return None
+        elif (
+            ch == "."
+            and link_end < size - 1
+            and text[link_end + 1].isascii()
+            and text[link_end + 1].isalnum()
+        ):
+            np += 1
+        elif ch == "/" and protocol == "xmpp" and num_slash == 0:
+            num_slash += 1
+        elif ch != "-" and ch != "_":
+            break
+        link_end += 1
+
+    if link_end < 2 or np == 0:
+        return None
+    last_ch = text[link_end - 1]
+    if not (last_ch.isascii() and last_ch.isalpha()) and last_ch != ".":
+        return None
+
+    url = "mailto:" + text[:link_end] if protocol is None else text[:link_end]
+
+    return url, link_end