@@ -306,7 +306,16 @@ status: ## Show library status
306306BENCH_SAMPLES := tests/samples
307307BENCH_OUTPUT := examples/output
308308BENCH_LIMIT := 20
309- BENCH_FORMATS := yaml toon logicml json
309+ BENCH_FORMATS := yaml toon logicml json markdown csv gherkin
310+
311+ # Set BENCH_USE_LLM=1 to run benchmarks with a configured LLM provider
312+ # (e.g. OpenRouter) instead of offline template mode.
313+ BENCH_USE_LLM ?= 0
314+ ifeq ($(BENCH_USE_LLM ) ,1)
315+ BENCH_NO_LLM_FLAG :=
316+ else
317+ BENCH_NO_LLM_FLAG := --no-llm
318+ endif
310319
311320benchmark : benchmark-format benchmark-function benchmark-token benchmark-project benchmark-toon benchmark-compare # # Run all benchmarks (no LLM)
312321 @echo " "
@@ -324,19 +333,19 @@ benchmark-format: ## Benchmark format reproduction (yaml/toon/logicml/json)
324333 @echo " # Auto-generated by make benchmark" > $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
325334 @echo " set -euo pipefail" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
326335 @echo " " >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
327- @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py --no-llm --type format --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_format.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
336+ @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py $( BENCH_NO_LLM_FLAG ) --type format --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_format.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
328337 $(PYTHON ) examples/15_unified_benchmark.py \
329- --no-llm --type format \
338+ $( BENCH_NO_LLM_FLAG ) --type format \
330339 --folder $(BENCH_SAMPLES ) / \
331340 --formats $(BENCH_FORMATS ) \
332341 --limit $(BENCH_LIMIT ) --verbose \
333342 --output $(BENCH_OUTPUT ) /benchmark_format.json
334343
335344benchmark-function : # # Benchmark function-level reproduction
336345 @echo " $( BLUE) ━━━ Function Benchmark ━━━$( NC) "
337- @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py --no-llm --type function --file $( BENCH_SAMPLES) /sample_functions.py --limit 10 --verbose --output $( BENCH_OUTPUT) /benchmark_function.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
346+ @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py $( BENCH_NO_LLM_FLAG ) --type function --file $( BENCH_SAMPLES) /sample_functions.py --limit 10 --verbose --output $( BENCH_OUTPUT) /benchmark_function.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
338347 $(PYTHON ) examples/15_unified_benchmark.py \
339- --no-llm --type function \
348+ $( BENCH_NO_LLM_FLAG ) --type function \
340349 --file $(BENCH_SAMPLES ) /sample_functions.py \
341350 --limit 10 --verbose \
342351 --output $(BENCH_OUTPUT ) /benchmark_function.json
@@ -347,19 +356,19 @@ benchmark-function: ## Benchmark function-level reproduction
347356
348357benchmark-token : # # Benchmark token efficiency across formats
349358 @echo " $( BLUE) ━━━ Token Efficiency Benchmark ━━━$( NC) "
350- @printf ' %s\n' " $( PYTHON) examples/11_token_benchmark.py --no-llm --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_token.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
359+ @printf ' %s\n' " $( PYTHON) examples/11_token_benchmark.py $( BENCH_NO_LLM_FLAG ) --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_token.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
351360 $(PYTHON ) examples/11_token_benchmark.py \
352- --no-llm \
361+ $( BENCH_NO_LLM_FLAG ) \
353362 --folder $(BENCH_SAMPLES ) / \
354363 --formats $(BENCH_FORMATS ) \
355364 --limit $(BENCH_LIMIT ) --verbose \
356365 --output $(BENCH_OUTPUT ) /benchmark_token.json
357366
358367benchmark-project : # # Benchmark project-level reproduction
359368 @echo " $( BLUE) ━━━ Project Benchmark ━━━$( NC) "
360- @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py --no-llm --type project --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_project.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
369+ @printf ' %s\n' " $( PYTHON) examples/15_unified_benchmark.py $( BENCH_NO_LLM_FLAG ) --type project --folder $( BENCH_SAMPLES) / --formats $( BENCH_FORMATS) --limit $( BENCH_LIMIT) --verbose --output $( BENCH_OUTPUT) /benchmark_project.json" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
361370 $(PYTHON ) examples/15_unified_benchmark.py \
362- --no-llm --type project \
371+ $( BENCH_NO_LLM_FLAG ) --type project \
363372 --folder $(BENCH_SAMPLES ) / \
364373 --formats $(BENCH_FORMATS ) \
365374 --limit $(BENCH_LIMIT ) --verbose \
@@ -368,14 +377,15 @@ benchmark-project: ## Benchmark project-level reproduction
368377benchmark-toon : # # Generate TOON + function-logic for self-analysis
369378 @echo " $( BLUE) ━━━ TOON Self-Analysis ━━━$( NC) "
370379 @mkdir -p $(BENCH_OUTPUT )
380+ @rm -f $(BENCH_OUTPUT ) /function.toon $(BENCH_OUTPUT ) /function-schema.json 2> /dev/null || true
371381 @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f toon --compact --name project -o ./" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
372382 $(PYTHON ) -m code2logic ./ -f toon --compact --name project -o ./
373- @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f toon --compact --no-repeat-module --function-logic function.toon --with-schema --name project -o ./" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
374- $(PYTHON ) -m code2logic ./ -f toon --compact --no-repeat-module --function-logic function.toon --with-schema --name project -o ./
383+ @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f toon --compact --no-repeat-module --function-logic --with-schema --name project -o ./" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
384+ $(PYTHON ) -m code2logic ./ -f toon --compact --no-repeat-module --function-logic --with-schema --name project -o ./
375385 @cp -f project.toon $(BENCH_OUTPUT ) /project.toon 2> /dev/null || true
376386 @cp -f project.toon-schema.json $(BENCH_OUTPUT ) /project.toon-schema.json 2> /dev/null || true
377- @cp -f function. toon $(BENCH_OUTPUT ) /function .toon 2> /dev/null || true
378- @cp -f function -schema.json $(BENCH_OUTPUT ) /function -schema.json 2> /dev/null || true
387+ @cp -f project.functions. toon $(BENCH_OUTPUT ) /project.functions .toon 2> /dev/null || true
388+ @cp -f project.functions -schema.json $(BENCH_OUTPUT ) /project.functions -schema.json 2> /dev/null || true
379389 @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f yaml --compact --name project -o $( BENCH_OUTPUT) /" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
380390 $(PYTHON ) -m code2logic ./ -f yaml --compact --name project -o $(BENCH_OUTPUT ) /
381391 @printf ' %s\n' " $( PYTHON) -m code2logic ./ -f json --name project -o $( BENCH_OUTPUT) /" >> $(BENCH_OUTPUT ) /BENCHMARK_COMMANDS.sh
@@ -390,7 +400,7 @@ benchmark-toon: ## Generate TOON + function-logic for self-analysis
390400 @echo " $( BLUE) Format size comparison (self-analysis):$( NC) "
391401 @printf " %-25s %10s %10s\n" " Format" " Size" " ~Tokens"
392402 @printf " %-25s %10s %10s\n" " -------------------------" " ----------" " ----------"
393- @for f in $(BENCH_OUTPUT ) /project.toon $(BENCH_OUTPUT ) /function .toon $(BENCH_OUTPUT ) /project.yaml $(BENCH_OUTPUT ) /project.json $(BENCH_OUTPUT ) /project.md $(BENCH_OUTPUT ) /project.txt $(BENCH_OUTPUT ) /project.csv; do \
403+ @for f in $(BENCH_OUTPUT ) /project.toon $(BENCH_OUTPUT ) /project.functions .toon $(BENCH_OUTPUT ) /project.yaml $(BENCH_OUTPUT ) /project.json $(BENCH_OUTPUT ) /project.md $(BENCH_OUTPUT ) /project.txt $(BENCH_OUTPUT ) /project.csv; do \
394404 if [ -f " $$ f" ]; then \
395405 sz=$$(wc -c < "$$f" ) ; \
396406 tok=$$((sz / 4 ) ); \
@@ -399,7 +409,7 @@ benchmark-toon: ## Generate TOON + function-logic for self-analysis
399409 done
400410 @echo " "
401411 @echo " $( GREEN) TOON files:$( NC) "
402- @ls -lh $(BENCH_OUTPUT ) /project.toon $(BENCH_OUTPUT ) /function. toon $(BENCH_OUTPUT ) /project.toon-schema.json $(BENCH_OUTPUT ) /function -schema.json 2> /dev/null
412+ @ls -lh $(BENCH_OUTPUT ) /project.toon $(BENCH_OUTPUT ) /project.functions. toon $(BENCH_OUTPUT ) /project.toon-schema.json $(BENCH_OUTPUT ) /project.functions -schema.json 2> /dev/null
403413
404414benchmark-compare : # # Show summary comparison of all benchmark results
405415 @echo " "
0 commit comments