Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
87 changes: 87 additions & 0 deletions apps/pages/templatetags/page_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Custom template tags and filters for the pages app."""

import re

from django import template
from django.utils.safestring import mark_safe
from django.utils.text import slugify

register = template.Library()

# Match h1–h4 elements; capture tag name, existing attributes, and inner HTML.
# Using DOTALL so inner content can span multiple lines.
_HEADING_RE = re.compile(r"<(h[1-4])([^>]*)>(.*?)</\1>", re.IGNORECASE | re.DOTALL)

# Extract the value of an existing id attribute, e.g. id="my-section".
_EXISTING_ID_RE = re.compile(r'\bid\s*=\s*["\'](.*?)["\']', re.IGNORECASE)


@register.filter(is_safe=True)
def add_heading_anchors(html):
"""Add self-link anchors to h1\u2013h4 headings.

Given the rendered HTML of a CMS page, this filter finds every ``<h1>``,
``<h2>``, ``<h3>``, and ``<h4>`` element and injects a pilcrow (\u00b6)
anchor inside it so visitors can copy a direct link to any section.

Two cases are handled:

* **Heading already has an ``id``** (common for RST-generated content where
docutils injects ids automatically): the existing id is reused as the
anchor target and a pilcrow link is appended. The heading is otherwise
left intact.
* **Heading has no ``id``**: a URL-safe id is derived from the heading's
plain text via :func:`django.utils.text.slugify`, a ``-N`` suffix is
appended for duplicates, and both the id and the pilcrow link are added.

Headings whose text produces an empty slug *and* that carry no existing id
are left completely untouched. The filter is idempotent: headings that
already contain a ``headerlink`` anchor are skipped.

Usage in a template::

{% load page_tags %}
{{ page.content|add_heading_anchors }}
"""
seen_slugs: dict[str, int] = {}

def _replace(match: re.Match) -> str:
tag = match.group(1).lower()
attrs = match.group(2)
inner = match.group(3)

# Idempotency: skip headings that already have a pilcrow link.
if "headerlink" in inner:
return match.group(0)

# If the heading already carries an id (e.g. from RST/docutils),
# reuse it for the pilcrow link rather than skipping the heading.
existing = _EXISTING_ID_RE.search(attrs)
if existing:
anchor_id = existing.group(1)
link = (
f'<a class="headerlink" href="#{anchor_id}" '
f'title="Link to this section" aria-label="Link to this section">\u00b6</a>'
)
return f'<{tag}{attrs}>{inner} {link}</{tag}>'

# Derive a slug from the plain text (strip any nested HTML tags).
plain_text = re.sub(r"<[^>]+>", "", inner).strip()
base_slug = slugify(plain_text)

if not base_slug:
return match.group(0)

# Deduplicate: first occurrence keeps the bare slug; subsequent
# occurrences become slug-2, slug-3, ...
count = seen_slugs.get(base_slug, 0) + 1
seen_slugs[base_slug] = count
anchor_id = base_slug if count == 1 else f"{base_slug}-{count}"

link = (
f'<a class="headerlink" href="#{anchor_id}" '
f'title="Link to this section" aria-label="Link to this section">\u00b6</a>'
)
return f'<{tag} id="{anchor_id}"{attrs}>{inner} {link}</{tag}>'

return mark_safe(_HEADING_RE.sub(_replace, str(html)))
97 changes: 97 additions & 0 deletions apps/pages/tests/test_templatetags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Tests for apps/pages/templatetags/page_tags.py."""

from django.test import SimpleTestCase

from apps.pages.templatetags.page_tags import add_heading_anchors


class AddHeadingAnchorsFilterTests(SimpleTestCase):
"""Tests for the ``add_heading_anchors`` template filter."""

def test_h2_gets_id_and_anchor_link(self):
"""An h2 heading receives an id attribute and a pilcrow anchor link."""
html = "<h2>2023</h2>"
result = add_heading_anchors(html)
self.assertIn('id="2023"', result)
self.assertIn('href="#2023"', result)
self.assertIn("¶", result)

def test_h1_h3_h4_also_processed(self):
"""h1, h3, and h4 headings are also processed."""
for tag in ("h1", "h3", "h4"):
html = f"<{tag}>Section Title</{tag}>"
result = add_heading_anchors(html)
self.assertIn('id="section-title"', result)
self.assertIn('href="#section-title"', result)

def test_h5_is_not_changed(self):
"""h5 headings are left untouched."""
html = "<h5>Title</h5>"
result = add_heading_anchors(html)
self.assertNotIn("id=", result)
self.assertNotIn("href=", result)

def test_duplicate_headings_get_unique_ids(self):
"""Duplicate heading text produces unique, numbered ids."""
html = "<h2>Board Resolution</h2><h2>Board Resolution</h2>"
result = add_heading_anchors(html)
self.assertIn('id="board-resolution"', result)
self.assertIn('id="board-resolution-2"', result)

def test_heading_with_existing_id_gets_pilcrow_link(self):
"""A heading with an existing id (e.g. from RST/docutils) gets a pilcrow
link using that id, without the id being changed or duplicated."""
html = '<h2 id="custom-id">My Section</h2>'
result = str(add_heading_anchors(html))
# Original id is preserved and not duplicated.
self.assertIn('id="custom-id"', result)
self.assertEqual(result.count('id="'), 1)
# Pilcrow link is injected using the existing id.
self.assertIn('href="#custom-id"', result)
self.assertIn("headerlink", result)

def test_rst_generated_headings_get_pilcrow_links(self):
"""RST/docutils headings that already carry ids get pilcrow links added."""
html = (
'<h2 id="board-resolutions">Board Resolutions</h2>'
'<h3 id="resolution-1-budget">Resolution 1: Budget</h3>'
)
result = str(add_heading_anchors(html))
self.assertIn('href="#board-resolutions"', result)
self.assertIn('href="#resolution-1-budget"', result)
self.assertEqual(result.count("headerlink"), 2)

def test_filter_is_idempotent(self):
"""Running the filter twice does not add duplicate pilcrow links."""
html = "<h2>Section</h2>"
once = str(add_heading_anchors(html))
twice = str(add_heading_anchors(once))
self.assertEqual(once, twice)

def test_heading_with_nested_html_tags(self):
"""Plain text is extracted from headings that contain nested tags."""
html = "<h2><em>Nested</em> Heading</h2>"
result = add_heading_anchors(html)
self.assertIn('id="nested-heading"', result)

def test_non_heading_html_is_unchanged(self):
"""Non-heading elements are passed through unmodified."""
html = "<p>Some paragraph</p><ul><li>Item</li></ul>"
result = add_heading_anchors(html)
self.assertEqual(str(result), html)

def test_empty_string_returns_empty_string(self):
"""Passing an empty string returns an empty string."""
self.assertEqual(str(add_heading_anchors("")), "")

def test_heading_with_empty_text_is_unchanged(self):
"""A heading whose text slugifies to an empty string is left alone."""
html = "<h2> </h2>"
result = add_heading_anchors(html)
self.assertNotIn("id=", result)

def test_anchor_link_is_inside_heading(self):
"""The pilcrow anchor link appears inside the heading element."""
html = "<h2>Resolutions 2022</h2>"
result = str(add_heading_anchors(html))
self.assertIn("¶</a></h2>", result)
4 changes: 3 additions & 1 deletion templates/psf/default.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
{% extends "base.html" %}
{% load boxes %}
{% load banners %}
{% load page_tags %}

{# TODO: Try to deduplicate this and templates/pages/default.html. #}
{% block page_title %}{{ page.title }} | Python Software Foundation{% endblock %}
Expand Down Expand Up @@ -52,7 +53,7 @@
<h1 class="page-title">{{ page.title }}</h1>
</header>

{{ page.content }}
{{ page.content|add_heading_anchors }}

</article>
{% endblock content %}
Expand All @@ -71,3 +72,4 @@ <h1 class="page-title">{{ page.title }}</h1>
</aside>

{% endblock left_sidebar %}