Cache compiled path parser

p1c2u · p1c2u · commit 3fd09ebd3b8e · 2025-12-16T10:30:36.000Z
diff --git a/Makefile b/Makefile
@@ -38,3 +38,6 @@ docs-cleanup:
 	@rm -rf docs_build
 
 cleanup: dist-cleanup test-cleanup
+
+bench-paths:
+	@PYTHONHASHSEED=0 python tests/benchmarks/bench_paths.py --paths 500 --templates-ratio 0.7 --lookups 2000 --output bench-paths.json
diff --git a/openapi_core/templating/paths/iterators.py b/openapi_core/templating/paths/iterators.py
@@ -1,3 +1,4 @@
+from functools import lru_cache
 from typing import Iterator
 from typing import List
 from typing import Optional
@@ -12,9 +13,8 @@
 from openapi_core.templating.paths.datatypes import PathOperation
 from openapi_core.templating.paths.datatypes import PathOperationServer
 from openapi_core.templating.paths.exceptions import PathsNotFound
+from openapi_core.templating.paths.parsers import PathParser
 from openapi_core.templating.paths.util import template_path_len
-from openapi_core.templating.util import parse
-from openapi_core.templating.util import search
 
 
 class SimplePathsIterator:
@@ -52,14 +52,19 @@ def __call__(
                 yield Path(path, path_result)
             # template path
             else:
-                result = search(path_pattern, name)
+                path_parser = self._get_path_parser(path_pattern)
+                result = path_parser.search(name)
                 if result:
                     path_result = TemplateResult(path_pattern, result.named)
                     template_paths.append(Path(path, path_result))
 
         # Fewer variables -> more concrete path
         yield from sorted(template_paths, key=template_path_len)
 
+    @lru_cache(maxsize=4096)
+    def _get_path_parser(self, path_pattern: str) -> PathParser:
+        return PathParser(path_pattern, post_expression="$")
+
 
 class SimpleOperationsIterator:
     def __call__(
@@ -156,7 +161,8 @@ def __call__(
                     )
                 # template path
                 else:
-                    result = parse(server["url"], server_url_pattern)
+                    server_parser = self._get_server_parser(server["url"])
+                    result = server_parser.parse(server_url_pattern)
                     if result:
                         server_result = TemplateResult(
                             server["url"], result.named
@@ -171,7 +177,7 @@ def __call__(
                     # servers should'n end with tailing slash
                     # but let's search for this too
                     server_url_pattern += "/"
-                    result = parse(server["url"], server_url_pattern)
+                    result = server_parser.parse(server_url_pattern)
                     if result:
                         server_result = TemplateResult(
                             server["url"], result.named
@@ -183,3 +189,7 @@ def __call__(
                             path_result,
                             server_result,
                         )
+
+    @lru_cache(maxsize=1024)
+    def _get_server_parser(self, server_url: str) -> PathParser:
+        return PathParser(server_url, pre_expression="^")
diff --git a/openapi_core/templating/paths/parsers.py b/openapi_core/templating/paths/parsers.py
@@ -0,0 +1,27 @@
+from typing import Any
+
+from parse import Parser
+
+
+class PathParameter:
+    name = "PathParameter"
+    pattern = r"[^\/]*"
+
+    def __call__(self, text: str) -> str:
+        return text
+
+
+class PathParser(Parser):  # type: ignore
+
+    parse_path_parameter = PathParameter()
+
+    def __init__(self, pattern: str, pre_expression: str = "", post_expression: str = "") -> None:
+        extra_types = {self.parse_path_parameter.name: self.parse_path_parameter}
+        super().__init__(pattern, extra_types)
+        self._expression = pre_expression + self._expression + post_expression
+
+    def _handle_field(self, field: str) -> Any:
+        # handle as path parameter field
+        field = field[1:-1]
+        path_parameter_field = "{%s:PathParameter}" % field
+        return super()._handle_field(path_parameter_field)
diff --git a/openapi_core/templating/util.py b/openapi_core/templating/util.py
diff --git a/tests/benchmarks/bench_paths.py b/tests/benchmarks/bench_paths.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+import argparse
+import gc
+import json
+import random
+import statistics
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, List
+
+from jsonschema_path import SchemaPath
+
+from openapi_core.templating.paths.finders import APICallPathFinder
+
+
+@dataclass(frozen=True)
+class Result:
+    paths: int
+    templates_ratio: float
+    lookups: int
+    repeats: int
+    warmup: int
+    seconds: List[float]
+
+    def as_dict(self) -> Dict[str, Any]:
+        return {
+            "paths": self.paths,
+            "templates_ratio": self.templates_ratio,
+            "lookups": self.lookups,
+            "repeats": self.repeats,
+            "warmup": self.warmup,
+            "seconds": self.seconds,
+            "median_s": statistics.median(self.seconds),
+            "mean_s": statistics.mean(self.seconds),
+            "stdev_s": statistics.pstdev(self.seconds),
+            "ops_per_sec_median": self.lookups / statistics.median(self.seconds),
+        }
+
+
+def build_spec(paths: int, templates_ratio: float) -> SchemaPath:
+    # Mix of exact and templated paths.
+    # Keep it minimal so we measure finder cost, not schema complexity.
+    tmpl = int(paths * templates_ratio)
+    exact = paths - tmpl
+
+    paths_obj: Dict[str, Any] = {}
+
+    # Exact paths (fast case)
+    for i in range(exact):
+        p = f"/resource/{i}/sub"
+        paths_obj[p] = {"get": {"responses": {"200": {"description": "ok"}}}}
+
+    # Template paths (slow case)
+    for i in range(tmpl):
+        p = f"/resource/{i}" + "/{item_id}/sub/{sub_id}"
+        paths_obj[p] = {"get": {"responses": {"200": {"description": "ok"}}}}
+
+    spec_dict = {
+        "openapi": "3.0.0",
+        "info": {"title": "bench", "version": "0"},
+        "servers": [{"url": "http://example.com"}],
+        "paths": paths_obj,
+    }
+    return SchemaPath.from_dict(spec_dict)
+
+
+def build_urls(paths: int, templates_ratio: float, lookups: int, seed: int) -> List[str]:
+    rnd = random.Random(seed)
+    tmpl = int(paths * templates_ratio)
+    exact = paths - tmpl
+
+    urls: List[str] = []
+    for _ in range(lookups):
+        # 50/50 choose from each population, weighted by how many exist
+        if tmpl > 0 and (exact == 0 or rnd.random() < (tmpl / paths)):
+            i = rnd.randrange(tmpl)  # matches template bucket
+            item_id = rnd.randrange(1_000_000)
+            sub_id = rnd.randrange(1_000_000)
+            urls.append(f"http://example.com/resource/{i}/{item_id}/sub/{sub_id}")
+        else:
+            i = rnd.randrange(exact) if exact > 0 else 0
+            urls.append(f"http://example.com/resource/{i}/sub")
+    return urls
+
+
+def run_once(finder: APICallPathFinder, urls: List[str]) -> float:
+    t0 = time.perf_counter()
+    for u in urls:
+        finder.find("get", u)
+    return time.perf_counter() - t0
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--paths", type=int, default=2000)
+    ap.add_argument("--templates-ratio", type=float, default=0.6)
+    ap.add_argument("--lookups", type=int, default=100_000)
+    ap.add_argument("--repeats", type=int, default=7)
+    ap.add_argument("--warmup", type=int, default=2)
+    ap.add_argument("--seed", type=int, default=1)
+    ap.add_argument("--output", type=str, default="")
+    ap.add_argument("--no-gc", action="store_true")
+    args = ap.parse_args()
+
+    spec = build_spec(args.paths, args.templates_ratio)
+    finder = APICallPathFinder(spec)
+
+    urls = build_urls(args.paths, args.templates_ratio, args.lookups, args.seed)
+
+    if args.no_gc:
+        gc.disable()
+
+    # Warmup (JIT-less, but warms caches, alloc patterns, etc.)
+    for _ in range(args.warmup):
+        run_once(finder, urls)
+
+    seconds: List[float] = []
+    for _ in range(args.repeats):
+        seconds.append(run_once(finder, urls))
+
+    if args.no_gc:
+        gc.enable()
+
+    result = Result(
+        paths=args.paths,
+        templates_ratio=args.templates_ratio,
+        lookups=args.lookups,
+        repeats=args.repeats,
+        warmup=args.warmup,
+        seconds=seconds,
+    )
+
+    payload = result.as_dict()
+    print(json.dumps(payload, indent=2, sort_keys=True))
+
+    if args.output:
+        with open(args.output, "w", encoding="utf-8") as f:
+            json.dump(payload, f, indent=2, sort_keys=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/unit/templating/test_paths_parsers.py b/tests/unit/templating/test_paths_parsers.py
@@ -1,24 +1,26 @@
 import pytest
 
-from openapi_core.templating.util import search
+from openapi_core.templating.paths.parsers import PathParser
 
 
 class TestSearch:
     def test_endswith(self):
         path_pattern = "/{test}/test"
+        parser = PathParser(path_pattern, post_expression="$")
         full_url_pattern = "/test1/test/test2/test"
 
-        result = search(path_pattern, full_url_pattern)
+        result = parser.search(full_url_pattern)
 
         assert result.named == {
             "test": "test2",
         }
 
     def test_exact(self):
         path_pattern = "/{test}/test"
+        parser = PathParser(path_pattern, post_expression="$")
         full_url_pattern = "/test/test"
 
-        result = search(path_pattern, full_url_pattern)
+        result = parser.search(full_url_pattern)
 
         assert result.named == {
             "test": "test",
@@ -33,9 +35,10 @@ def test_exact(self):
         ],
     )
     def test_chars_valid(self, path_pattern, expected):
+        parser = PathParser(path_pattern, post_expression="$")
         full_url_pattern = "/test/test"
 
-        result = search(path_pattern, full_url_pattern)
+        result = parser.search(full_url_pattern)
 
         assert result.named == expected
 
@@ -53,8 +56,9 @@ def test_chars_valid(self, path_pattern, expected):
         ],
     )
     def test_special_chars_valid(self, path_pattern, expected):
+        parser = PathParser(path_pattern, post_expression="$")
         full_url_pattern = "/test/test"
 
-        result = search(path_pattern, full_url_pattern)
+        result = parser.search(full_url_pattern)
 
         assert result.named == expected