Skip to content

Commit 80c7934

Browse files
committed
fix: preserve pct-triplets in reserved expansion; allow empty match captures
Two RFC-conformance fixes: Reserved expansion ({+var}, {#var}) now passes through existing %XX pct-triplets unchanged per RFC 6570 section 3.2.3, while still encoding bare %. Previously quote() double-encoded path%2Fto into path%252Fto. Simple expansion is unchanged (still encodes % unconditionally). Match patterns now use * instead of + quantifiers so defined-but-empty values round-trip. RFC says empty variables still emit the operator prefix: {#section} with section='' expands to '#', but the previous .+ pattern could not match the empty capture after it. All eight operators now consistently accept empty values. The quantifier change affects adjacent-unrestricted-var resolution: {a}{b} matching 'xy' now gives {a: 'xy', b: ''} (greedy first-wins) instead of the previous {a: 'x', b: 'y'} (artifact of + backtracking). Adjacent vars without a separating literal are inherently ambiguous either way; a literal between them ({a}-{b}) still disambiguates.
1 parent 99c9cb0 commit 80c7934

File tree

2 files changed

+52
-14
lines changed

2 files changed

+52
-14
lines changed

src/mcp/shared/uri_template.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ class _OperatorSpec:
8686
# the characters that can appear in an expanded value for that operator,
8787
# stopping at the next structural delimiter.
8888
_MATCH_PATTERN: dict[Operator, str] = {
89-
"": r"[^/?#&,]+", # simple: everything structural is pct-encoded
90-
"+": r"[^?#]+", # reserved: / allowed, stop at query/fragment
91-
"#": r".+", # fragment: tail of URI
92-
".": r"[^./?#]+", # label: stop at next .
93-
"/": r"[^/?#]+", # path segment: stop at next /
89+
"": r"[^/?#&,]*", # simple: everything structural is pct-encoded
90+
"+": r"[^?#]*", # reserved: / allowed, stop at query/fragment
91+
"#": r".*", # fragment: tail of URI
92+
".": r"[^./?#]*", # label: stop at next .
93+
"/": r"[^/?#]*", # path segment: stop at next /
9494
";": r"[^;/?#]*", # path-param value (may be empty: ;name)
9595
"?": r"[^&#]*", # query value (may be empty: ?name=)
9696
"&": r"[^&#]*", # query-cont value
@@ -140,15 +140,32 @@ def _is_str_sequence(value: object) -> bool:
140140
return all(isinstance(item, str) for item in seq)
141141

142142

143+
_PCT_TRIPLET_RE = re.compile(r"%[0-9A-Fa-f]{2}")
144+
145+
143146
def _encode(value: str, *, allow_reserved: bool) -> str:
144147
"""Percent-encode a value per RFC 6570 §3.2.1.
145148
146149
Simple expansion encodes everything except unreserved characters.
147-
Reserved expansion ({+var}, {#var}) additionally keeps RFC 3986
148-
reserved characters intact.
150+
Reserved expansion (``{+var}``, ``{#var}``) additionally keeps
151+
RFC 3986 reserved characters intact and passes through existing
152+
``%XX`` pct-triplets unchanged (RFC 6570 §3.2.3). A bare ``%`` not
153+
followed by two hex digits is still encoded to ``%25``.
149154
"""
150-
safe = _RESERVED if allow_reserved else ""
151-
return quote(value, safe=safe)
155+
if not allow_reserved:
156+
return quote(value, safe="")
157+
158+
# Reserved expansion: walk the string, pass through triplets as-is,
159+
# quote the gaps between them. A bare % with no triplet lands in a
160+
# gap and gets encoded normally.
161+
out: list[str] = []
162+
last = 0
163+
for m in _PCT_TRIPLET_RE.finditer(value):
164+
out.append(quote(value[last : m.start()], safe=_RESERVED))
165+
out.append(m.group())
166+
last = m.end()
167+
out.append(quote(value[last:], safe=_RESERVED))
168+
return "".join(out)
152169

153170

154171
def _expand_expression(expr: _Expression, variables: Mapping[str, str | Sequence[str]]) -> str:

tests/shared/test_uri_template.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,16 @@ def test_frozen():
277277
# Level 2: reserved expansion keeps / ? # etc.
278278
("{+var}", {"var": "a/b/c"}, "a/b/c"),
279279
("{+var}", {"var": "a?b#c"}, "a?b#c"),
280+
# RFC §3.2.3: reserved expansion passes through existing
281+
# pct-triplets unchanged; bare % is still encoded.
282+
("{+var}", {"var": "path%2Fto"}, "path%2Fto"),
283+
("{+var}", {"var": "50%"}, "50%25"),
284+
("{+var}", {"var": "50%2"}, "50%252"),
285+
("{+var}", {"var": "a%2Fb%20c"}, "a%2Fb%20c"),
286+
("{#var}", {"var": "a%2Fb"}, "#a%2Fb"),
287+
# Simple expansion still encodes % unconditionally (triplet
288+
# preservation is reserved-only).
289+
("{var}", {"var": "path%2Fto"}, "path%252Fto"),
280290
("file://docs/{+path}", {"path": "src/main.py"}, "file://docs/src/main.py"),
281291
# Level 2: fragment
282292
("{#var}", {"var": "section"}, "#section"),
@@ -422,12 +432,17 @@ def test_match_no_match(template: str, uri: str):
422432
def test_match_adjacent_vars_with_prefix_names():
423433
# Two adjacent simple vars where one name is a prefix of the other.
424434
# We use positional capture groups, so names only affect the result
425-
# dict keys, not the regex. Standard greedy matching: the first var
426-
# takes as much as it can while still letting the second satisfy +.
435+
# dict keys, not the regex. Adjacent unrestricted vars are inherently
436+
# ambiguous; greedy * resolution means the first takes everything.
427437
t = UriTemplate.parse("{var}{vara}")
428-
assert t.match("ab") == {"var": "a", "vara": "b"}
429-
assert t.match("abc") == {"var": "ab", "vara": "c"}
430-
assert t.match("abcd") == {"var": "abc", "vara": "d"}
438+
assert t.match("ab") == {"var": "ab", "vara": ""}
439+
assert t.match("abcd") == {"var": "abcd", "vara": ""}
440+
441+
442+
def test_match_adjacent_vars_disambiguated_by_literal():
443+
# A literal between vars resolves the ambiguity.
444+
t = UriTemplate.parse("{a}-{b}")
445+
assert t.match("foo-bar") == {"a": "foo", "b": "bar"}
431446

432447

433448
def test_match_decodes_percent_encoding():
@@ -515,6 +530,12 @@ def test_match_explode_encoded_separator_in_segment():
515530
("{var}", {"var": "hello world"}),
516531
("item{;id}", {"id": "42"}),
517532
("item{;id}", {"id": ""}),
533+
# Defined-but-empty values still emit the operator prefix; match
534+
# must accept the empty capture after it.
535+
("page{#section}", {"section": ""}),
536+
("file{.ext}", {"ext": ""}),
537+
("api{/v}", {"v": ""}),
538+
("x{name}y", {"name": ""}),
518539
("item{;keys*}", {"keys": ["a", "b", "c"]}),
519540
("item{;keys*}", {"keys": ["a", "", "b"]}),
520541
# Partial query expansion round-trips: expand omits undefined

0 commit comments

Comments
 (0)