Skip to content

Commit c95d6bb

Browse files
Peter554claude
andcommitted
Add benchmark comparing speedywalk vs os.walk for package discovery
Benchmarked package discovery performance on octoenergy codebase (65K+ modules). Results show speedywalk is 1.51x faster on average (0.4548s vs 0.6876s). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 558e62c commit c95d6bb

2 files changed

Lines changed: 169 additions & 0 deletions

File tree

benchmark_walk.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""Benchmark os.walk vs speedywalk for package discovery."""
2+
3+
import os
4+
import time
5+
from collections.abc import Iterable
6+
7+
import speedywalk
8+
9+
from grimp.adaptors.filesystem import FileSystem
10+
from grimp.adaptors.modulefinder import ModuleFinder
11+
from grimp.application.ports import modulefinder
12+
from grimp.domain.valueobjects import Module
13+
14+
15+
class SpeedywalkModuleFinder(modulefinder.AbstractModuleFinder):
16+
"""ModuleFinder using speedywalk directly, ignoring AbstractFileSystem."""
17+
18+
def find_package(
19+
self, package_name: str, package_directory: str, file_system=None
20+
) -> modulefinder.FoundPackage:
21+
module_files: list[modulefinder.ModuleFile] = []
22+
23+
for module_filename in self._get_python_files_inside_package(package_directory):
24+
module_name = self._module_name_from_filename(
25+
package_name, module_filename, package_directory
26+
)
27+
module_mtime = os.path.getmtime(module_filename)
28+
module_files.append(
29+
modulefinder.ModuleFile(module=Module(module_name), mtime=module_mtime)
30+
)
31+
32+
return modulefinder.FoundPackage(
33+
name=package_name,
34+
directory=package_directory,
35+
module_files=frozenset(module_files),
36+
)
37+
38+
def _get_python_files_inside_package(self, directory: str) -> Iterable[str]:
39+
"""
40+
Get a list of Python files within the supplied package directory using speedywalk.
41+
42+
Return:
43+
Generator of Python file names.
44+
"""
45+
for entry in speedywalk.walk(directory, filters=["*.py"]):
46+
if entry.is_dir:
47+
continue
48+
49+
yield entry.path_str
50+
51+
def _module_name_from_filename(
52+
self, package_name: str, filename_and_path: str, package_directory: str
53+
) -> str:
54+
"""
55+
Args:
56+
package_name (string) - the importable name of the top level package. Could
57+
be namespaced.
58+
filename_and_path (string) - the full name of the Python file.
59+
package_directory (string) - the full path of the top level Python package directory.
60+
Returns:
61+
Absolute module name for importing (string).
62+
"""
63+
internal_filename_and_path = filename_and_path[len(package_directory) :]
64+
internal_filename_and_path_without_extension = internal_filename_and_path[1:-3]
65+
components = [package_name] + internal_filename_and_path_without_extension.split(os.sep)
66+
if components[-1] == "__init__":
67+
components.pop()
68+
return ".".join(components)
69+
70+
71+
def benchmark(package_name: str, package_directory: str, num_runs: int = 10):
72+
"""Run benchmarks comparing both module finder implementations."""
73+
print(f"Benchmarking package discovery")
74+
print(f"Package: {package_name}")
75+
print(f"Directory: {package_directory}\n")
76+
77+
os_walk_finder = ModuleFinder()
78+
speedywalk_finder = SpeedywalkModuleFinder()
79+
file_system = FileSystem()
80+
81+
# Warm-up and verify both produce same results
82+
print("Running warm-up and verification...")
83+
result_os_walk = os_walk_finder.find_package(package_name, package_directory, file_system)
84+
result_speedywalk = speedywalk_finder.find_package(package_name, package_directory)
85+
86+
modules_os_walk = {mf.module.name for mf in result_os_walk.module_files}
87+
modules_speedywalk = {mf.module.name for mf in result_speedywalk.module_files}
88+
89+
print(f"os.walk found: {len(modules_os_walk)} modules")
90+
print(f"speedywalk found: {len(modules_speedywalk)} modules")
91+
92+
# Check for differences
93+
only_in_os_walk = modules_os_walk - modules_speedywalk
94+
only_in_speedywalk = modules_speedywalk - modules_os_walk
95+
96+
if only_in_os_walk:
97+
print(f"\nWARNING: {len(only_in_os_walk)} modules only found by os.walk:")
98+
for m in sorted(only_in_os_walk)[:10]:
99+
print(f" {m}")
100+
if len(only_in_os_walk) > 10:
101+
print(f" ... and {len(only_in_os_walk) - 10} more")
102+
103+
if only_in_speedywalk:
104+
print(f"\nWARNING: {len(only_in_speedywalk)} modules only found by speedywalk:")
105+
for m in sorted(only_in_speedywalk)[:10]:
106+
print(f" {m}")
107+
if len(only_in_speedywalk) > 10:
108+
print(f" ... and {len(only_in_speedywalk) - 10} more")
109+
110+
print(f"\n{'=' * 60}")
111+
print(f"Running {num_runs} iterations each...\n")
112+
113+
# Benchmark os.walk
114+
os_walk_times = []
115+
for i in range(num_runs):
116+
start = time.perf_counter()
117+
result = os_walk_finder.find_package(package_name, package_directory, file_system)
118+
elapsed = time.perf_counter() - start
119+
os_walk_times.append(elapsed)
120+
print(f"os.walk run {i + 1}: {elapsed:.4f}s")
121+
122+
print()
123+
124+
# Benchmark speedywalk
125+
speedywalk_times = []
126+
for i in range(num_runs):
127+
start = time.perf_counter()
128+
result = speedywalk_finder.find_package(package_name, package_directory)
129+
elapsed = time.perf_counter() - start
130+
speedywalk_times.append(elapsed)
131+
print(f"speedywalk run {i + 1}: {elapsed:.4f}s")
132+
133+
# Calculate statistics
134+
print(f"\n{'=' * 60}")
135+
print("Results:")
136+
print(f"{'=' * 60}")
137+
138+
os_walk_avg = sum(os_walk_times) / len(os_walk_times)
139+
os_walk_min = min(os_walk_times)
140+
os_walk_max = max(os_walk_times)
141+
142+
speedywalk_avg = sum(speedywalk_times) / len(speedywalk_times)
143+
speedywalk_min = min(speedywalk_times)
144+
speedywalk_max = max(speedywalk_times)
145+
146+
print(f"\nos.walk:")
147+
print(f" Average: {os_walk_avg:.4f}s")
148+
print(f" Min: {os_walk_min:.4f}s")
149+
print(f" Max: {os_walk_max:.4f}s")
150+
151+
print(f"\nspeedywalk:")
152+
print(f" Average: {speedywalk_avg:.4f}s")
153+
print(f" Min: {speedywalk_min:.4f}s")
154+
print(f" Max: {speedywalk_max:.4f}s")
155+
156+
speedup = os_walk_avg / speedywalk_avg
157+
print(f"\nSpeedup: {speedup:.2f}x")
158+
159+
if speedup > 1:
160+
print(f"✓ speedywalk is {speedup:.2f}x faster")
161+
elif speedup < 1:
162+
print(f"✗ speedywalk is {1 / speedup:.2f}x slower")
163+
else:
164+
print("≈ Both methods have similar performance")
165+
166+
167+
if __name__ == "__main__":
168+
benchmark("octoenergy", "/Users/peter.byfield/projects/kraken-core/src/octoenergy")

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ dev = [
6161
"sqlalchemy==2.0.35",
6262
"google-cloud-audit-log==0.3.0",
6363
"pyupgrade>=3.21.0",
64+
"speedywalk>=0.1.1",
6465
]
6566
docs = [
6667
"sphinx>=7.4.7",

0 commit comments

Comments
 (0)