powerapi-ng · Inkedstinct · Feb 13, 2025 · Jan 17, 2025 · Jan 28, 2025 · Feb 11, 2025
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,5 @@
+[flake8]
+extend-ignore = E203
+exclude = .git,__pycache__,batches,resources
+max-complexity = 10
+max-line-length = 88
diff --git a/.gitignore b/.gitignore
@@ -1,20 +1,10 @@
-/logs*.d/**/*
 .env
 /Cargo.lock
-/tasks.d/**/*
-/jobs.d/**/*
-/results*.d/**/*
-/scripts*.d/**/*
-/inventories*.d/**/*
-/backup/**/*
-/test_results/**/*
-/jobs*.yaml
-/menage.sh
-/resources
-/*.tar.*
 /*ipynb* 
-/batches/**
+/batches
 /.ssh_g5k.pub
+/__pycache__
+/*.png
 
 # Added by cargo
 

diff --git a/analysis/data_analysis.py b/analysis/data_analysis.py
@@ -0,0 +1,286 @@
+# IMPORTS
+import os
+import sys
+import polars as pl
+import schemas
+import extract
+import load
+import rq1
+import rq2
+import rq3
+import rq34
+import visualization
+
+
+vendor_generation_map = {
+    "E5-2620 v4": {
+        "architecture": "Broadwell-E",
+        "vendor": "Intel",
+        "generation": 6,
+        "launch_date": "Q1 2016",
+    },
+    "E5-2630L v4": {
+        "architecture": "Broadwell-E",
+        "vendor": "Intel",
+        "generation": 6,
+        "launch_date": "Q1 2016",
+    },
+    "E5-2698 v4": {
+        "architecture": "Broadwell-E",
+        "vendor": "Intel",
+        "generation": 6,
+        "launch_date": "Q1 2016",
+    },
+    "E5-2630 v3": {
+        "architecture": "Haswell-E",
+        "vendor": "Intel",
+        "generation": 5,
+        "launch_date": "Q3 2014",
+    },
+    "Gold 5220": {
+        "architecture": "Cascade Lake-SP",
+        "vendor": "Intel",
+        "generation": 10,
+        "launch_date": "Q2 2019",
+    },
+    "Gold 5218": {
+        "architecture": "Cascade Lake-SP",
+        "vendor": "Intel",
+        "generation": 10,
+        "launch_date": "Q2 2019",
+    },
+    "i7-9750H": {
+        "architecture": "Coffee Lake",
+        "vendor": "Intel",
+        "generation": 9,
+        "launch_date": "Q2 2019",
+    },
+    "Silver 4314": {
+        "architecture": "Ice Lake-SP",
+        "vendor": "Intel",
+        "generation": 10,
+        "launch_date": "Q2 2021",
+    },
+    "Gold 5320": {
+        "architecture": "Ice Lake-SP",
+        "vendor": "Intel",
+        "generation": 10,
+        "launch_date": "Q2 2021",
+    },
+    "Gold 6126": {
+        "architecture": "Skylake-SP",
+        "vendor": "Intel",
+        "generation": 6,
+        "launch_date": "Q3 2017",
+    },
+    "Gold 6130": {
+        "architecture": "Skylake-SP",
+        "vendor": "Intel",
+        "generation": 6,
+        "launch_date": "Q3 2017",
+    },
+    "E5-2620": {
+        "architecture": "Sandy Bridge-EP",
+        "vendor": "Intel",
+        "generation": 3,
+        "launch_date": "Q1 2012",
+    },
+    "E5-2630": {
+        "architecture": "Sandy Bridge-EP",
+        "vendor": "Intel",
+        "generation": 3,
+        "launch_date": "Q1 2012",
+    },
+    "E5-2630L": {
+        "architecture": "Sandy Bridge-EP",
+        "vendor": "Intel",
+        "generation": 3,
+        "launch_date": "Q1 2012",
+    },
+    "E5-2660": {
+        "architecture": "Sandy Bridge-EP",
+        "vendor": "Intel",
+        "generation": 3,
+        "launch_date": "Q1 2012",
+    },
+    "7301": {
+        "architecture": "Zen",
+        "vendor": "AMD",
+        "generation": 1,
+        "launch_date": "Q2 2017",
+    },
+    "7352": {
+        "architecture": "Zen 2",
+        "vendor": "AMD",
+        "generation": 2,
+        "launch_date": "Q3 2019",
+    },
+    "7452": {
+        "architecture": "Zen 2",
+        "vendor": "AMD",
+        "generation": 2,
+        "launch_date": "Q3 2019",
+    },
+    "7642": {
+        "architecture": "Zen 2",
+        "vendor": "AMD",
+        "generation": 2,
+        "launch_date": "Q3 2019",
+    },
+    "7742": {
+        "architecture": "Zen 2",
+        "vendor": "AMD",
+        "generation": 2,
+        "launch_date": "Q3 2019",
+    },
+}
+
+
+def main():
+
+    test = sys.argv[1]
+    if test == "test":
+        test = True
+    else:
+        test = False
+
+    debian11_energy_stats_df = energy_for_os(
+        "debian11-5.10-0",
+        r"batches/debian11-5\.10-0\.d/results-debian11-5\.10-0\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
+        test,
+    )
+    ubuntu2404_energy_stats_df = energy_for_os(
+        "ubuntu2404nfs-6.8-0",
+        r"batches/ubuntu2404nfs-6\.8-0\.d/results-ubuntu2404nfs-6\.8-0\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
+        test,
+    )
+
+    powerapi_energy_stats_df = energy_for_os(
+        "powerapi",
+        r"batches/powerapi\.d/results-powerapi\.d/([^/]+)/([^/]+)/([^/]+)/[^_]*_([^_]+).*",
+        test,
+    )
+
+
+    rq3.correlation_perf_perf_hwpc_hwpc_cv_os(ubuntu2404_energy_stats_df, debian11_energy_stats_df, "alone")
+    rq1.correlation_perf_hwpc_cv(debian11_energy_stats_df, "alone", "debian11 Kernel 5.10")
+    rq1.correlation_perf_hwpc_cv(debian11_energy_stats_df, "not_alone", "debian11 Kernel 5.10")
+    rq1.correlation_perf_hwpc_cv(ubuntu2404_energy_stats_df, "alone", "ubuntu2404 Kernel 6.8")
+    rq1.correlation_perf_hwpc_cv(ubuntu2404_energy_stats_df, "not_alone", "ubuntu2404 Kernel 6.8")
+
+    rq2.boxplots_perf_hwpc_cv_processor(debian11_energy_stats_df, "processor_detail", "pkg_coefficient_of_variation", "job", "25 000 Operations")
+
+
+    concatenated_dfs = pl.concat([debian11_energy_stats_df, ubuntu2404_energy_stats_df])
+    concatenated_dfs = concatenated_dfs.sql(
+        "SELECT * FROM self WHERE nb_ops_per_core > 25"
+    )
+
+    joined_df = ubuntu2404_energy_stats_df.join(
+        debian11_energy_stats_df,
+        on=["node", "nb_ops_per_core", "nb_core", "job"],
+        suffix="_debian",
+    )
+
+    # Get rid of 25 OPS as it may be unrelevant
+    joined_df = joined_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25")
+
+
+    # RQ3/4
+    rq34.os_comparison_boxplots_processor_versions_pkg_all(
+        [debian11_energy_stats_df, ubuntu2404_energy_stats_df]
+    )
+    rq34.os_comparison_boxplots_processor_versions_ram_all(
+        [debian11_energy_stats_df, ubuntu2404_energy_stats_df]
+    )
+    print("Heatmaps pkg perf alone")
+    rq34.os_comparison_heatmap_processor_versions_pkg_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'perf_alone'"), "PERF")
+    print("Heatmaps pkg hwpc alone")
+    rq34.os_comparison_heatmap_processor_versions_pkg_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'hwpc_alone'"), "HWPC")
+    print("Heatmaps ram perf alone")
+    rq34.os_comparison_heatmap_processor_versions_ram_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'perf_alone'"), "PERF")
+    print("Heatmaps ram hwpc alone")
+    rq34.os_comparison_heatmap_processor_versions_ram_nb_ops(joined_df.sql("SELECT * FROM self WHERE job = 'hwpc_alone'"), "HWPC")
+    rq34.os_comparison_heatmap_processor_versions_pkg_percent_used(joined_df)
+    rq34.os_comparison_heatmap_processor_versions_ram_percent_used(joined_df)
+
+    rq34.debian_facetgrid_processor_versions_pkg_cv_nb_ops(debian11_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
+    rq34.debian_facetgrid_processor_versions_ram_cv_nb_ops(debian11_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
+    rq34.ubuntu_facetgrid_processor_versions_pkg_cv_nb_ops(ubuntu2404_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
+    rq34.ubuntu_facetgrid_processor_versions_ram_cv_nb_ops(ubuntu2404_energy_stats_df.sql("SELECT * FROM self WHERE nb_ops_per_core > 25"))
+
+
+def energy_for_os(os_flavor, results_directory_match, test):
+    if test:
+        energy_stats_csv_file = (
+            f"batches/{os_flavor}.d/{os_flavor}_energy_stats_sample.csv"
+        )
+    else:
+        energy_stats_csv_file = f"batches/{os_flavor}.d/{os_flavor}_energy_stats.csv"
+    if os.path.exists(energy_stats_csv_file):
+        return pl.read_csv(energy_stats_csv_file)
+    results_directory: str = f"batches/{os_flavor}.d/results-{os_flavor}.d/"
+    inventories_directory: str = f"batches/{os_flavor}.d/inventories-{os_flavor}.d/"
+    (hwpc_files, perf_files) = extract.extract_csv_files(results_directory)
+
+    nodes_df = extract.extract_json_files(
+        directory=inventories_directory, schema=schemas.nodes_configuration_columns
+    )
+
+    nodes_df = nodes_df.with_columns(
+        [
+            # (pl.col("processor_version").map_elements(lambda x: f"{x}\nGen: {vendor_generation_map[x]['architecture']}\nRelease: {vendor_generation_map[x]['launch_date']}", return_dtype=pl.String).alias("processor_detail")),
+            (
+                pl.col("processor_version")
+                .map_elements(
+                    lambda x: f"{x}\n{vendor_generation_map[x]['architecture']}",
+                    return_dtype=pl.String,
+                )
+                .alias("processor_detail")
+            ),
+            (
+                pl.col("processor_version")
+                .map_elements(
+                    lambda x: vendor_generation_map[x]["generation"],
+                    return_dtype=pl.String,
+                )
+                .alias("processor_generation")
+            ),
+            (
+                pl.col("processor_version")
+                .map_elements(
+                    lambda x: vendor_generation_map[x]["vendor"], return_dtype=pl.String
+                )
+                .alias("processor_vendor")
+            ),
+        ]
+    )
+
+    print("Nodes Configuration glimpse:\n", nodes_df.head())
+
+    # Data Exploration
+    (hwpc_results, perf_results) = load.load_results(
+        hwpc_files, perf_files, results_directory_match, test
+    )
+    print(
+        "HWPC Results glimpse:\n",
+        hwpc_results.head(),
+        "\nHWPC Results stats:\n",
+        hwpc_results.describe(),
+    )
+    print(hwpc_results.sql("select energy_pkg from self").describe())
+    print(
+        "Perf Results glimpse:\n",
+        perf_results.head(),
+        "\nPerf Results stats:\n",
+        perf_results.describe(),
+    )
+
+    energy_stats_df = load.load_energy(hwpc_results, perf_results, nodes_df, os_flavor)
+    energy_stats_df.write_csv(energy_stats_csv_file, separator=",")
+
+    return energy_stats_df
+
+
+if __name__ == "__main__":
+    main()
diff --git a/analysis/execution_time.py b/analysis/execution_time.py
@@ -0,0 +1,71 @@
+import os
+import glob
+import pandas as pd
+
+def compute_mean_std(directory, nb_ops):
+    # Define the file pattern to search for
+    pattern = os.path.join(directory, f"**/perf_*_{nb_ops}.csv")
+    files = glob.glob(pattern, recursive=True)
+
+    if not files:
+        print(f"No files found for NB_OPS={nb_ops}")
+        return
+
+    time_elapsed_values = []
+
+    # Loop through all matching files
+    for file in files:
+        try:
+            # Read the CSV file
+            df = pd.read_csv(file)
+            # Append the time_elapsed column to the list
+            time_elapsed_values.extend(df["time_elapsed"].dropna())
+        except Exception as e:
+            print(f"Error reading file {file}: {e}")
+
+    if not time_elapsed_values:
+        print(f"No valid time_elapsed values found in files for NB_OPS={nb_ops}")
+        return
+
+    # Compute mean and standard deviation
+    mean_time = sum(time_elapsed_values) / len(time_elapsed_values)
+    std_dev_time = (sum((x - mean_time) ** 2 for x in time_elapsed_values) / len(time_elapsed_values)) ** 0.5
+
+    print(f"Results for NB_OPS={nb_ops}:")
+    print(f"  Mean time_elapsed: {mean_time:.6f} seconds")
+    print(f"  Standard deviation: {std_dev_time:.6f} seconds")
+
+# Example usage
+# Replace "your_directory_path" with the actual path to the directory containing the files
+print("For Ubuntu")
+directory = "./batches/ubuntu2404nfs-6.8-0.d/results-ubuntu2404nfs-6.8-0.d/"
+nb_ops = 25  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 250  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 2500  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 25000  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+
+print("For Debian")
+directory = "./batches/debian11-5.10-0.d/results-debian11-5.10-0.d/"
+nb_ops = 25  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 250  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 2500  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 25000  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+
+print("For Powerapi")
+directory = "./results_powerapi2u"
+nb_ops = 25  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 250  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 2500  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)
+nb_ops = 25000  # Change this to 250, 2500, or 25000 as needed
+compute_mean_std(directory, nb_ops)