MPSENet/scripts/benchmark.py at main · JacobLinCool/MPSENet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import time
import numpy as np
import torch
import matplotlib.pyplot as plt
from MPSENet import MPSENet

# Set PyTorch CUDA configuration for optimal memory management
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

model = MPSENet.from_pretrained("JacobLinCool/MP-SENet-DNS").to("cuda")

sample_rate = 16000  # Hz
durations = range(1, 30)  # From 1 second onwards
memory_usage = []
runtimes = []


def measure_memory_and_runtime(duration, sample_rate, model):
    """Measure memory usage and runtime for a given audio duration."""
    # Generate random audio data
    n_samples = int(duration * sample_rate)
    audio_data = np.random.uniform(-1, 1, n_samples).astype(np.float32)
    audio_tensor = torch.tensor(audio_data, device="cuda")

    # Reset max memory stats
    torch.cuda.reset_max_memory_allocated()

    # Measure runtime of the forward pass
    start_time = time.time()
    try:
        model(audio_tensor, segment_size=99999999)
    except RuntimeError as e:
        if "out of memory" in str(e):
            print(f"OOM at {duration} sec; stopping early.")
            torch.cuda.empty_cache()
            return None, None
        else:
            raise e
    end_time = time.time()

    # Calculate runtime and memory usage
    runtime = end_time - start_time
    max_memory = torch.cuda.max_memory_allocated() / 1e6  # Convert to MB

    # Clear GPU cache to avoid memory accumulation
    del audio_tensor
    torch.cuda.empty_cache()
    time.sleep(1)

    return max_memory, runtime


# Warm-up with 0.5 seconds of audio
print("Warming up the model...")
warmup_duration = 0.5  # 0.5 second
warmup_memory, warmup_runtime = measure_memory_and_runtime(
    warmup_duration, 16000, model
)
if warmup_memory is not None:
    print(
        f"Warm-up completed - Memory: {warmup_memory:.2f} MB - Runtime: {warmup_runtime:.4f} sec"
    )

# Run measurements for each duration
for duration in durations:
    max_memory, runtime = measure_memory_and_runtime(duration, sample_rate, model)
    if max_memory is None:
        break  # Stop the loop if OOM occurs
    print(
        f"Duration: {duration} sec - Memory: {max_memory:.2f} MB - Runtime: {runtime:.4f} sec"
    )
    memory_usage.append(max_memory)
    runtimes.append(runtime)

# Plot memory usage and runtime
plt.figure(figsize=(12, 6))

# Plot 1: Memory Usage
plt.subplot(1, 2, 1)
plt.plot(durations[: len(memory_usage)], memory_usage, marker="o", color="b")
plt.xlabel("Audio Duration (seconds)")
plt.ylabel("Max Memory Allocated (MB)")
plt.title("Memory Usage vs. Audio Duration")
plt.grid(True)

# Plot 2: Runtime
plt.subplot(1, 2, 2)
plt.plot(durations[: len(runtimes)], runtimes, marker="o", color="r")
plt.xlabel("Audio Duration (seconds)")
plt.ylabel("Runtime (seconds)")
plt.title("Runtime vs. Audio Duration")
plt.grid(True)

# Display the plots
plt.tight_layout()
plt.show()