Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Mathlib.lean
Original file line number Diff line number Diff line change
Expand Up @@ -7124,6 +7124,7 @@ public import Mathlib.Tactic.Contrapose
public import Mathlib.Tactic.Conv
public import Mathlib.Tactic.Convert
public import Mathlib.Tactic.Core
public import Mathlib.Tactic.CrossRef.Fetch
public import Mathlib.Tactic.CrossRefAttribute
public import Mathlib.Tactic.DSimpPercent
public import Mathlib.Tactic.DeclarationNames
Expand Down Expand Up @@ -7378,6 +7379,7 @@ public import Mathlib.Tactic.Widget.Calc
public import Mathlib.Tactic.Widget.CommDiag
public import Mathlib.Tactic.Widget.CongrM
public import Mathlib.Tactic.Widget.Conv
public import Mathlib.Tactic.Widget.CrossRefHover
public import Mathlib.Tactic.Widget.GCongr
public import Mathlib.Tactic.Widget.InteractiveUnfold
public import Mathlib.Tactic.Widget.LibraryRewrite
Expand Down
2 changes: 2 additions & 0 deletions Mathlib/Tactic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ public import Mathlib.Tactic.Contrapose
public import Mathlib.Tactic.Conv
public import Mathlib.Tactic.Convert
public import Mathlib.Tactic.Core
public import Mathlib.Tactic.CrossRef.Fetch
public import Mathlib.Tactic.CrossRefAttribute
public import Mathlib.Tactic.DSimpPercent
public import Mathlib.Tactic.DeclarationNames
Expand Down Expand Up @@ -328,6 +329,7 @@ public import Mathlib.Tactic.Widget.Calc
public import Mathlib.Tactic.Widget.CommDiag
public import Mathlib.Tactic.Widget.CongrM
public import Mathlib.Tactic.Widget.Conv
public import Mathlib.Tactic.Widget.CrossRefHover
public import Mathlib.Tactic.Widget.GCongr
public import Mathlib.Tactic.Widget.InteractiveUnfold
public import Mathlib.Tactic.Widget.LibraryRewrite
Expand Down
261 changes: 261 additions & 0 deletions Mathlib/Tactic/CrossRef/Fetch.lean
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
/-
Copyright (c) 2026 Lean FRO, LLC. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Kim Morrison
-/
module

public meta import Lean.Data.Json
public import Mathlib.Init

/-!
# Fetching cross-reference snippets

The Mathlib counterpart of `scripts/crossref-snippet.lean`: given a database
(`stacks`, `kerodon`, or `wikidata`) and a tag, fetch a one-line
`(title, description)` snippet from the upstream site.

This module is used by `Mathlib/Tactic/Widget/CrossRefHover.lean` so that the
LSP widget on a cross-reference attribute can call back into the server to
resolve a snippet on demand. The script in PR 1 of this stack reimplements
roughly the same logic; we keep that script self-contained (no Mathlib import,
runs via `lake env lean --run`) and this module isolated from `IO.Process`
dependency-wise, so the two evolve in parallel.

The `Database` enum and its `URL` / `label` projections also live here; they
are re-exported through `Mathlib.Tactic.CrossRefAttribute` for the attribute
infrastructure.
-/

namespace Mathlib.CrossRef

/-- The supported external databases.

When adding a new case, also update each of these:
- `databaseURL`, `databaseLabel`, `Database.name`, `Database.ofName?`,
the per-database branch in `fetchSnippet`, and `gerbyBase?` below;
- the parser, `syntax (name := ...)`, `registerBuiltinAttribute`, and
`#X_tags` trace command in `Mathlib/Tactic/CrossRefAttribute.lean`;
- the parallel `Database` enum and per-database fetch implementation in
`scripts/crossref-snippet.lean`;
- the `databases` list in `scripts/extract-crossref-tags.lean`;
- the `pretty` dict in `mathlib-ci`'s
`scripts/crossref_review/crossref-pr-comment.py`. -/
public inductive Database where
| kerodon
| stacks
| wikidata
deriving BEq, Hashable, Repr

/-- The base URL for an external database's tag pages. Always ends with `/`. -/
public def databaseURL : Database → String
| .kerodon => "https://kerodon.net/tag/"
| .stacks => "https://stacks.math.columbia.edu/tag/"
| .wikidata => "https://www.wikidata.org/wiki/"

/-- The display label used in docstring links and trace output. -/
public def databaseLabel : Database → String
| .kerodon => "Kerodon Tag"
| .stacks => "Stacks Tag"
| .wikidata => "Wikidata"

/-- A short machine-readable tag (`"kerodon"`, `"stacks"`, `"wikidata"`)
that's stable to round-trip through JSON. Used by the widget. -/
public def Database.name : Database → String
| .kerodon => "kerodon"
| .stacks => "stacks"
| .wikidata => "wikidata"

/-- Parse the short name back into a `Database`. -/
public def Database.ofName? : String → Option Database
| "kerodon" => some .kerodon
| "stacks" => some .stacks
| "wikidata" => some .wikidata
| _ => none

/-- `Database.name` and `Database.ofName?` roundtrip. This catches at compile time
any drift between the two when a new case is added to `Database`. -/
public theorem Database.ofName?_name (d : Database) : Database.ofName? d.name = some d := by
cases d <;> rfl

/-- A successfully fetched snippet from an upstream database. -/
public structure Snippet where
/-- The display title (e.g. `"Lemma 14.32.3"`). May be empty. -/
title : String
/-- The natural-language description. May be empty. -/
description : String
deriving Repr

/-- A problem encountered while fetching a snippet. -/
public inductive SnippetError where
/-- A transient problem (network, parse, …). -/
| network (reason : String)
deriving Repr

/-- The outcome of trying to fetch a snippet.

* `.ok (some s)` — upstream returned a snippet (either field of `s` may be empty).
* `.ok none` — the tag was authoritatively missing upstream.
* `.error e` — a transient problem (network, parse, …). -/
public abbrev SnippetOutcome := Except SnippetError (Option Snippet)

/-- The `User-Agent` curl sends. Wikidata's API will throttle anonymous clients
without one, so we identify ourselves. -/
private def userAgent : String :=
"mathlib-crossref-widget/1 (https://github.com/leanprover-community/mathlib4)"

/-- Make a GET request and return `(http-status, body)`. Status is `0` if curl
itself failed. We append the HTTP status to the body via `-w '\n%{http_code}'`
and peel it off the end — that avoids juggling a temp file. -/
private def fetchUrl (url : String) : IO (Nat × String) := do
let output ← IO.Process.output {
cmd := "curl"
args := #["-sSL", "--max-time", "10", "-A", userAgent,
"-w", "\n%{http_code}", url]
}
if output.exitCode != 0 then return (0, "")
Comment on lines +110 to +116
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use Std.Http here instead? Or do the query javacsript-side?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Std.Http is not yet production ready.

Why would querying javascript-side be better than curl?

let s := output.stdout.toRawSubstring
let status := (s.takeRightWhile Char.isDigit).toNat?.getD 0
let body := ((s.dropRightWhile Char.isDigit).dropRightWhile (· == '\n')).toString
return (status, body)

/-- Replace runs of whitespace by a single space and strip leading/trailing ws. -/
private def flattenWhitespace (s : String) : String :=
let go : Char → (String × Bool) → (String × Bool) := fun c (acc, prevSpace) =>
let isWs := c == ' ' || c == '\t' || c == '\n' || c == '\r'
if isWs then
if prevSpace || acc.isEmpty then (acc, true)
else (acc.push ' ', true)
else
(acc.push c, false)
let (out, _) := s.toList.foldl (fun st c => go c st) ("", false)
out.trimAscii.toString

/-- Best-effort HTML→text. We treat `<x…>` as a tag only when `x` is a letter,
`/`, or `!`, so a literal `<` inside LaTeX (`0 < 1`) is preserved. -/
private def stripHtml (html : String) : String :=
let chars := html.toList
let rec go : List Char → Bool → String → String
| [], _, acc => acc
| '<' :: rest, false, acc =>
match rest with
| c :: _ =>
if c.isAlpha || c == '/' || c == '!' then go rest true acc
else go rest false (acc.push '<')
| [] => acc.push '<'
| '>' :: rest, true, acc => go rest false acc
| _ :: rest, true, acc => go rest true acc
| c :: rest, false, acc => go rest false (acc.push c)
let raw := go chars false ""
let decoded := raw
|>.replace "&nbsp;" " "
|>.replace "&amp;" "&"
|>.replace "&lt;" "<"
|>.replace "&gt;" ">"
|>.replace "&quot;" "\""
|>.replace "&#39;" "'"
flattenWhitespace decoded
Comment on lines +134 to +157
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we doing this when the infoview can display HTML?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying this out now.


open Lean in
/-- Walk a path of object keys in a `Json` value, returning the leaf as a
string if every step succeeds and the leaf is a string. -/
private def jsonStrPath? (j : Json) (path : List String) : Option String :=
let rec go (cur : Json) : List String → Option String
| [] => cur.getStr?.toOption
| k :: rest =>
match cur.getObjVal? k with
| .ok next => go next rest
| .error _ => none
go j path

/-! ### Wikidata -/

open Lean in
private def fetchWikidata (qid : String) : IO SnippetOutcome := do
let url := s!"https://www.wikidata.org/w/api.php?action=wbgetentities&ids={qid}\
&languages=en&props=labels%7Cdescriptions&format=json"
let (status, body) ← fetchUrl url
if status != 200 then return .error (.network s!"wikidata HTTP {status}")
match Json.parse body with
| .error e => return .error (.network s!"wikidata json: {e}")
| .ok json =>
match json.getObjVal? "error" with
| .ok err =>
let code := jsonStrPath? err ["code"] |>.getD ""
let info := jsonStrPath? err ["info"] |>.getD code
if code == "no-such-entity" then return .ok none
else return .error (.network s!"wikidata {code}: {info}")
| _ =>
match json.getObjVal? "entities" with
| .error _ => return .error (.network "wikidata: no `entities` field")
| .ok entities =>
match entities.getObjVal? qid with
| .error _ => return .ok none
| .ok ent =>
match ent.getObjVal? "missing" with
| .ok _ => return .ok none
| _ =>
let label := jsonStrPath? ent ["labels", "en", "value"] |>.getD ""
let desc := jsonStrPath? ent ["descriptions", "en", "value"] |>.getD ""
return .ok (some { title := flattenWhitespace label
description := flattenWhitespace desc })

/-! ### Stacks / Kerodon (Gerby) -/

/-- The base URL for a Gerby-style database, or `none` for Wikidata. -/
private def gerbyBase? : Database → Option String
| .stacks => some "https://stacks.math.columbia.edu"
| .kerodon => some "https://kerodon.net"
| .wikidata => none

/-- Return the substring of `s` after the first occurrence of `needle`,
or `none` if `needle` is absent. -/
private def afterFirst? (s needle : String) : Option String :=
let parts := s.splitOn needle
match parts with
| _ :: rest@(_ :: _) => some (needle.intercalate rest)
| _ => none

/-- Take everything up to (but not including) the first occurrence of `c`. -/
private def takeUntilChar (s : String) (c : Char) : String :=
String.ofList (s.toList.takeWhile (· != c))

/-- Pull the environment type (`Lemma`, `Proposition`, …) and reference number
from the `/content/statement` HTML. Both Stacks and Kerodon wrap each tag in
`<article class="env-{TYPE}" id="{TAG}">` and lead with
`<a ...>Lemma <span data-tag="...">14.32.3</span>.</a>`. -/
private def parseGerbyTitle (html : String) : String :=
let envType :=
match afterFirst? html "class=\"env-" with
| none => ""
| some rest => takeUntilChar rest '"'
let reference :=
match afterFirst? html "data-tag=\"" with
| none => ""
| some afterAttr =>
match afterFirst? afterAttr ">" with
| none => ""
| some inside => takeUntilChar inside '<'
let cap := envType.capitalize
flattenWhitespace (if reference.isEmpty then cap else s!"{cap} {reference}")

private def fetchGerby (db : Database) (tag : String) : IO SnippetOutcome := do
let some base := gerbyBase? db | return .error (.network s!"{db.name}: no Gerby base")
let url := s!"{base}/data/tag/{tag}/content/statement"
let (status, body) ← fetchUrl url
if status != 200 then return .error (.network s!"{db.name} HTTP {status}")
-- Gerby returns HTTP 200 even for missing tags; detect via body text.
if body.trimAscii.toString == "This tag does not exist." then return .ok none
let title := parseGerbyTitle body
let snippet := stripHtml body
if title.isEmpty && snippet.isEmpty then
return .error (.network s!"{db.name}: could not parse statement")
return .ok (some { title, description := snippet })

/-- Fetch one snippet from the appropriate upstream database. -/
public def fetchSnippet (db : Database) (tag : String) : IO SnippetOutcome :=
match db with
| .wikidata => fetchWikidata tag
| _ => fetchGerby db tag

end Mathlib.CrossRef
55 changes: 30 additions & 25 deletions Mathlib/Tactic/CrossRefAttribute.lean
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ module

public meta import Lean.Elab.Command
public import Mathlib.Init
public meta import Mathlib.Tactic.CrossRef.Fetch
public meta import Mathlib.Tactic.Widget.CrossRefHover

/-!
# Cross-reference attributes
Expand All @@ -18,39 +20,26 @@ to entries in external mathematical databases:
* `@[kerodon TAG]` — [Kerodon](https://kerodon.net/tag/)
* `@[wikidata QID]` — [Wikidata](https://www.wikidata.org)

Each attribute records the cross-reference in an environment extension and appends
a link to the declaration's docstring.
Each attribute records the cross-reference in an environment extension, appends
a link to the declaration's docstring, and attaches the `CrossRefHoverPanel`
widget to the attribute syntax so the info view shows the upstream label /
description when the cursor sits on the tag.

The shared infrastructure (`Database`, `Tag`, `tagExt`, `addCrossRefDoc`,
The `Database` enum and the `databaseURL` / `databaseLabel` projections live in
`Mathlib.Tactic.CrossRef.Fetch`; the widget itself in
`Mathlib.Tactic.Widget.CrossRefHover`.

The remaining shared infrastructure (`Tag`, `tagExt`, `addCrossRefDoc`,
`traceCrossRefs`) is database-agnostic; per-database code defines a parser, the
attribute syntax, and the trace command.
-/

public meta section

open Lean Elab
open Lean Elab Server

namespace Mathlib.CrossRef

/-- The supported external databases -/
inductive Database where
| kerodon
| stacks
| wikidata
deriving BEq, Hashable

/-- The base URL for an external database's tag pages. Always ends with `/`. -/
def databaseURL : Database → String
| .kerodon => "https://kerodon.net/tag/"
| .stacks => "https://stacks.math.columbia.edu/tag/"
| .wikidata => "https://www.wikidata.org/wiki/"

/-- The display label used in docstring links and trace output. -/
def databaseLabel : Database → String
| .kerodon => "Kerodon Tag"
| .stacks => "Stacks Tag"
| .wikidata => "Wikidata"

/-- A cross-reference from a Mathlib declaration to an entry in an external database. -/
structure Tag where
/-- The name of the declaration carrying the cross-reference. -/
Expand Down Expand Up @@ -86,6 +75,16 @@ def addCrossRefDoc (db : Database) (decl : Name) (idStr comment : String) : Core
addDocStringCore decl <| "\n\n".intercalate ([oldDoc, link].filter (· != ""))
addTagEntry decl db idStr comment

/-- Attach the `CrossRefHoverPanel` info-view widget to the attribute syntax
`attrStx`. This is what makes the info view show the upstream snippet when the
cursor lands on a cross-reference tag. -/
def attachCrossRefWidget (db : Database) (tag comment : String) (attrStx : Syntax) :
CoreM Unit :=
Widget.savePanelWidgetInfo
CrossRefHoverPanel.javascriptHash
(rpcEncode { database := db.name, tag := tag, comment := comment : CrossRefHoverProps })
attrStx

open Parser

/-! ### Stacks (and Kerodon) parser -/
Expand Down Expand Up @@ -228,7 +227,10 @@ initialize Lean.registerBuiltinAttribute {
| `(attr| stacks $tag $[$comment]?) => return (Database.stacks, tag, comment)
| `(attr| kerodon $tag $[$comment]?) => return (Database.kerodon, tag, comment)
| _ => throwUnsupportedSyntax
addCrossRefDoc db decl (← tag.getStacksTag) ((comment.map (·.getString)).getD "")
let tagStr ← tag.getStacksTag
let commentStr := (comment.map (·.getString)).getD ""
addCrossRefDoc db decl tagStr commentStr
attachCrossRefWidget db tagStr commentStr stx
-- docstrings are immutable once an asynchronous elaboration task has been started
applicationTime := .beforeElaboration
}
Expand All @@ -250,7 +252,10 @@ initialize Lean.registerBuiltinAttribute {
let (id, comment) := ← match stx with
| `(attr| wikidata $id $[$comment]?) => return (id, comment)
| _ => throwUnsupportedSyntax
addCrossRefDoc .wikidata decl (← id.getWikidataId) ((comment.map (·.getString)).getD "")
let tagStr ← id.getWikidataId
let commentStr := (comment.map (·.getString)).getD ""
addCrossRefDoc .wikidata decl tagStr commentStr
attachCrossRefWidget .wikidata tagStr commentStr stx
-- docstrings are immutable once an asynchronous elaboration task has been started
applicationTime := .beforeElaboration
}
Expand Down
Loading
Loading