Skip to content

Commit 99c9cb0

Browse files
committed
fix: tighten parse-time validation and document matching limits
Four small corrections: Varname grammar: the RFC grammar requires dots only between varchar groups, so {foo..bar} and {foo.} are now rejected. Previously the regex allowed any dot placement after the first char. Adjacent explodes: previously only same-operator adjacent explodes ({/a*}{/b*}) were rejected. Different operators ({/a*}{.b*}) are equally ambiguous because the first operator's character class typically includes the second's separator, so the first explode greedily consumes both. All adjacent explodes are now rejected; a literal or non-explode variable between them still disambiguates. Documented the inherent ambiguity of multi-var reserved expressions ({+x,y} with commas in values) and the intentional tradeoff that {+var} match stops at ? and # so {+path}{?q} can separate correctly.
1 parent 93e742b commit 99c9cb0

File tree

2 files changed

+70
-23
lines changed

2 files changed

+70
-23
lines changed

src/mcp/shared/uri_template.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,26 @@
88
Supports Levels 1-3 fully, plus Level 4 explode modifier for path-like
99
operators (``{/var*}``, ``{.var*}``, ``{;var*}``). The Level 4 prefix
1010
modifier (``{var:N}``) and query-explode (``{?var*}``) are not supported.
11+
12+
Known matching limitations
13+
--------------------------
14+
15+
Matching is not specified by RFC 6570. A few templates can expand to
16+
URIs that ``match()`` cannot unambiguously reverse:
17+
18+
* Multi-variable reserved expressions like ``{+x,y}`` use a comma as
19+
separator but also permit commas *inside* values (commas are in the
20+
reserved set). ``match("a,b,c")`` cannot know which comma is the
21+
separator. The matcher takes the last comma as the split point; if
22+
your values contain commas, prefer separate expressions (``{+x}/{+y}``)
23+
or a different operator.
24+
25+
* Reserved expansion ``{+var}`` leaves ``?`` and ``#`` unencoded, but
26+
the match pattern stops at those characters so that templates like
27+
``{+path}{?q}`` can correctly separate path from query. A value
28+
containing a literal ``?`` or ``#`` expands fine but will not
29+
round-trip through ``match()``. Use simple ``{var}`` (which encodes
30+
them) if round-trip matters for such values.
1131
"""
1232

1333
from __future__ import annotations
@@ -25,8 +45,9 @@
2545
_OPERATORS: frozenset[str] = frozenset({"+", "#", ".", "/", ";", "?", "&"})
2646

2747
# RFC 6570 §2.3: varname = varchar *(["."] varchar), varchar = ALPHA / DIGIT / "_"
48+
# Dots appear only between varchar groups — not consecutive, not trailing.
2849
# (Percent-encoded varchars are technically allowed but unseen in practice.)
29-
_VARNAME_RE = re.compile(r"^[A-Za-z0-9_][A-Za-z0-9_.]*$")
50+
_VARNAME_RE = re.compile(r"^[A-Za-z0-9_]+(?:\.[A-Za-z0-9_]+)*$")
3051

3152
DEFAULT_MAX_TEMPLATE_LENGTH = 1_000_000
3253
DEFAULT_MAX_EXPRESSIONS = 10_000
@@ -717,33 +738,35 @@ def _check_duplicate_variables(template: str, variables: list[Variable]) -> None
717738

718739

719740
def _check_adjacent_explodes(template: str, parts: list[_Part]) -> None:
720-
"""Reject templates with adjacent same-operator explode variables.
741+
"""Reject templates with adjacent explode variables.
721742
722743
Patterns like ``{/a*}{/b*}`` are ambiguous for matching: given
723-
``/x/y/z``, the split between ``a`` and ``b`` is undetermined. We
724-
reject these at parse time rather than picking an arbitrary
725-
resolution. A literal between them (``{/a*}/x{/b*}``) or a different
726-
operator (``{/a*}{.b*}``) disambiguates.
744+
``/x/y/z``, the split between ``a`` and ``b`` is undetermined.
745+
Different operators (``{/a*}{.b*}``) do not help in general because
746+
the first operator's character class often includes the second's
747+
separator, so the first explode greedily consumes both. We reject
748+
all adjacent explodes at parse time rather than picking an arbitrary
749+
resolution. A literal between them (``{/a*}/x{/b*}``) still
750+
disambiguates.
727751
728752
Raises:
729-
InvalidUriTemplate: If two explode variables with the same
730-
operator appear with no literal or non-explode variable
731-
between them.
753+
InvalidUriTemplate: If two explode variables appear with no
754+
literal or non-explode variable between them.
732755
"""
733-
prev_explode_op: Operator | None = None
756+
prev_explode = False
734757
for part in parts:
735758
if isinstance(part, str):
736759
# Literal text breaks any adjacency.
737-
prev_explode_op = None
760+
prev_explode = False
738761
continue
739762
for var in part.variables:
740763
if var.explode:
741-
if prev_explode_op == var.operator:
764+
if prev_explode:
742765
raise InvalidUriTemplate(
743-
f"Adjacent explode expressions with operator {var.operator!r} are ambiguous and not supported",
766+
"Adjacent explode expressions are ambiguous for matching and not supported",
744767
template=template,
745768
)
746-
prev_explode_op = var.operator
769+
prev_explode = True
747770
else:
748771
# A non-explode variable also breaks adjacency.
749-
prev_explode_op = None
772+
prev_explode = False

tests/shared/test_uri_template.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,28 @@ def test_parse_rejects_operator_without_variable():
112112
UriTemplate.parse("{+}")
113113

114114

115-
@pytest.mark.parametrize("name", ["-bad", "bad-name", "bad name", "bad/name"])
115+
@pytest.mark.parametrize(
116+
"name",
117+
[
118+
"-bad",
119+
"bad-name",
120+
"bad name",
121+
"bad/name",
122+
# RFC §2.3: dots only between varchars, not consecutive or trailing
123+
"foo..bar",
124+
"foo.",
125+
],
126+
)
116127
def test_parse_rejects_invalid_varname(name: str):
117128
with pytest.raises(InvalidUriTemplate, match="Invalid variable name"):
118129
UriTemplate.parse(f"{{{name}}}")
119130

120131

132+
def test_parse_accepts_dotted_varname():
133+
t = UriTemplate.parse("{a.b.c}")
134+
assert t.variable_names == ("a.b.c",)
135+
136+
121137
def test_parse_rejects_empty_spec_in_list():
122138
with pytest.raises(InvalidUriTemplate, match="Invalid variable name"):
123139
UriTemplate.parse("{a,,b}")
@@ -134,9 +150,17 @@ def test_parse_rejects_unsupported_explode(template: str):
134150
UriTemplate.parse(template)
135151

136152

137-
def test_parse_rejects_adjacent_explodes_same_operator():
153+
@pytest.mark.parametrize(
154+
"template",
155+
[
156+
"{/a*}{/b*}", # same operator
157+
"{/a*}{.b*}", # different operators: / char class includes ., still ambiguous
158+
"{.a*}{;b*}",
159+
],
160+
)
161+
def test_parse_rejects_adjacent_explodes(template: str):
138162
with pytest.raises(InvalidUriTemplate, match="Adjacent explode"):
139-
UriTemplate.parse("{/a*}{/b*}")
163+
UriTemplate.parse(template)
140164

141165

142166
@pytest.mark.parametrize(
@@ -201,16 +225,16 @@ def test_parse_stray_close_brace_between_expressions():
201225
assert tmpl.variable_names == ("a", "b")
202226

203227

204-
def test_parse_allows_adjacent_explodes_different_operator():
205-
tmpl = UriTemplate.parse("{/a*}{.b*}")
206-
assert len(tmpl.variables) == 2
207-
208-
209228
def test_parse_allows_explode_separated_by_literal():
210229
tmpl = UriTemplate.parse("{/a*}/x{/b*}")
211230
assert len(tmpl.variables) == 2
212231

213232

233+
def test_parse_allows_explode_separated_by_non_explode_var():
234+
tmpl = UriTemplate.parse("{/a*}{b}{.c*}")
235+
assert len(tmpl.variables) == 3
236+
237+
214238
def test_parse_rejects_oversized_template():
215239
with pytest.raises(InvalidUriTemplate, match="maximum length"):
216240
UriTemplate.parse("x" * 101, max_length=100)

0 commit comments

Comments
 (0)