-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbenchmark.py
More file actions
97 lines (80 loc) · 2.82 KB
/
benchmark.py
File metadata and controls
97 lines (80 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import time
import numpy as np
import torch
import matplotlib.pyplot as plt
from MPSENet import MPSENet
# Set PyTorch CUDA configuration for optimal memory management
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
model = MPSENet.from_pretrained("JacobLinCool/MP-SENet-DNS").to("cuda")
sample_rate = 16000 # Hz
durations = range(1, 30) # From 1 second onwards
memory_usage = []
runtimes = []
def measure_memory_and_runtime(duration, sample_rate, model):
"""Measure memory usage and runtime for a given audio duration."""
# Generate random audio data
n_samples = int(duration * sample_rate)
audio_data = np.random.uniform(-1, 1, n_samples).astype(np.float32)
audio_tensor = torch.tensor(audio_data, device="cuda")
# Reset max memory stats
torch.cuda.reset_max_memory_allocated()
# Measure runtime of the forward pass
start_time = time.time()
try:
model(audio_tensor, segment_size=99999999)
except RuntimeError as e:
if "out of memory" in str(e):
print(f"OOM at {duration} sec; stopping early.")
torch.cuda.empty_cache()
return None, None
else:
raise e
end_time = time.time()
# Calculate runtime and memory usage
runtime = end_time - start_time
max_memory = torch.cuda.max_memory_allocated() / 1e6 # Convert to MB
# Clear GPU cache to avoid memory accumulation
del audio_tensor
torch.cuda.empty_cache()
time.sleep(1)
return max_memory, runtime
# Warm-up with 0.5 seconds of audio
print("Warming up the model...")
warmup_duration = 0.5 # 0.5 second
warmup_memory, warmup_runtime = measure_memory_and_runtime(
warmup_duration, 16000, model
)
if warmup_memory is not None:
print(
f"Warm-up completed - Memory: {warmup_memory:.2f} MB - Runtime: {warmup_runtime:.4f} sec"
)
# Run measurements for each duration
for duration in durations:
max_memory, runtime = measure_memory_and_runtime(duration, sample_rate, model)
if max_memory is None:
break # Stop the loop if OOM occurs
print(
f"Duration: {duration} sec - Memory: {max_memory:.2f} MB - Runtime: {runtime:.4f} sec"
)
memory_usage.append(max_memory)
runtimes.append(runtime)
# Plot memory usage and runtime
plt.figure(figsize=(12, 6))
# Plot 1: Memory Usage
plt.subplot(1, 2, 1)
plt.plot(durations[: len(memory_usage)], memory_usage, marker="o", color="b")
plt.xlabel("Audio Duration (seconds)")
plt.ylabel("Max Memory Allocated (MB)")
plt.title("Memory Usage vs. Audio Duration")
plt.grid(True)
# Plot 2: Runtime
plt.subplot(1, 2, 2)
plt.plot(durations[: len(runtimes)], runtimes, marker="o", color="r")
plt.xlabel("Audio Duration (seconds)")
plt.ylabel("Runtime (seconds)")
plt.title("Runtime vs. Audio Duration")
plt.grid(True)
# Display the plots
plt.tight_layout()
plt.show()