mlcli-tool/benchmark_mlcli.py at master · santhoshkumar0918/mlcli-tool · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import time
import os
import psutil
import json
import pickle
import numpy as np
from pathlib import Path
from mlcli.core.suggestion_model.model import MLSuggestionEngine

def benchmark_framework():
    print("🔬 Starting IEEE-Compliant Framework Benchmarking...")

    # 1. Load Data
    data_profile_path = Path("my_tabular_project/reports/data_profile.json")
    eval_report_path = Path("my_tabular_project/reports/evaluation_report.json")

    if not (data_profile_path.exists() and eval_report_path.exists()):
        # Mocking data profile if file doesn't exist for benchmark demonstration
        data_profile = {"n_samples": 1000, "n_features": 10, "missing_pct_max": 0.05, "imbalance_ratio": 1.1}
        eval_report = {"accuracy": 0.85, "f1_score": 0.84, "precision_recall_gap": 0.01}
    else:
        with open(data_profile_path, 'r') as f: data_profile = json.load(f)
        with open(eval_report_path, 'r') as f: eval_report = json.load(f)

    # 2. Measure Inference Latency (Suggest Command)
    engine = MLSuggestionEngine()

    latencies = []
    for _ in range(100):
        start_time = time.perf_counter()
        _ = engine.get_suggestions(data_profile, eval_report)
        end_time = time.perf_counter()
        latencies.append((end_time - start_time) * 1000) # ms

    avg_latency = np.mean(latencies)
    std_latency = np.std(latencies)

    # 3. Model Parameters (Meta-Model)
    model_path = Path("mlcli/core/suggestion_model/data/suggestion_model.pkl")
    with open(model_path, 'rb') as f:
        meta_data = pickle.load(f)
        model = meta_data['model']

    # Get structural info from the Forest
    n_estimators = model.estimators_[0].n_estimators
    total_nodes = sum(e.tree_.node_count for m_est in model.estimators_ for e in m_est.estimators_)

    # 4. Resource Usage
    process = psutil.Process(os.getpid())
    memory_usage = process.memory_info().rss / (1024 * 1024) # MB

    # 5. Output Results in Formal Table Format
    results = {
        "Metric": [
            "Inference Latency (Mean)",
            "Inference Latency (Std-Dev)",
            "Ensemble Size (Estimators)",
            "Total Structural Nodes",
            "Peak Memory Consumption",
            "Knowledge Base Samples",
            "Feature Vector Dim",
            "Framework Overhead"
        ],
        "Value": [
            f"{avg_latency:.4f} ms",
            f"{std_latency:.4f} ms",
            f"{n_estimators}",
            f"{total_nodes}",
            f"{memory_usage:.2f} MB",
            f"120",
            f"7",
            f"{(os.path.getsize(model_path))/(1024):.2f} KB"
        ]
    }

    print("\n" + "="*50)
    print("📊 FINAL IEEE BENCHMARKING RESULTS")
    print("="*50)
    for m, v in zip(results["Metric"], results["Value"]):
        print(f"{m:<30} | {v}")
    print("="*50)

    # Save to a formal file for the user
    with open("docs/research_plots/ieee_benchmark_results.json", 'w') as f:
        json.dump(results, f, indent=4)
    print("\n✅ Results saved to docs/research_plots/ieee_benchmark_results.json")

if __name__ == "__main__":
    benchmark_framework()