Skip to content

Commit 1b29c73

Browse files
committed
优化vad检测:如果检测区间最大值的绝对值小于1则维持原样,反之则做归一化。
1 parent a7819d0 commit 1b29c73

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

sense-voice/csrc/sense-voice-frontend.h

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,13 +127,28 @@ bool load_wav_file(const char *filename, int32_t *sampling_rate,
127127

128128

129129
template<typename T>
130-
bool vad_energy_zcr(const typename std::vector<T>::const_iterator& pcmf32, size_t siz, int sample_rate, T energy_threshold = 0.01, T zcr_threshold = 0.2, bool verbose = false)
130+
bool vad_energy_zcr(const typename std::vector<T>::const_iterator& pcmf32, size_t siz, int sample_rate,
131+
T energy_threshold = 0.01, T zcr_threshold = 0.2, bool verbose = false)
131132
{
132133
const int frame_size = 256; // 16ms at 16kHz
133134
const int frame_shift = 128; // 50% overlap
134-
135+
135136
if (siz < frame_size) return false;
136137

138+
// 1. 计算输入信号的绝对最大值进行归一化
139+
T max_val = 0.0f;
140+
for (size_t i = 0; i < siz; ++i) {
141+
T val = std::abs(*(pcmf32 + i));
142+
if (val > max_val) {
143+
max_val = val;
144+
}
145+
}
146+
// 防止除以零
147+
if (max_val < 1) {
148+
max_val = 1;
149+
}
150+
T scale = 1.0f / max_val;
151+
137152
int num_frames = (siz - frame_size) / frame_shift + 1;
138153
std::vector<T> energies(num_frames);
139154
std::vector<T> zcrs(num_frames);
@@ -142,17 +157,14 @@ bool vad_energy_zcr(const typename std::vector<T>::const_iterator& pcmf32, size_
142157
for (int f = 0; f < num_frames; f++) {
143158
T energy = 0.0f;
144159
int zcr = 0;
145-
146160
int frame_start = f * frame_shift;
147-
148161
// Calculate energy
149162
for (int i = 0; i < frame_size; i++) {
150-
T sample = pcmf32[frame_start + i];
163+
T sample = pcmf32[frame_start + i] * scale;
151164
energy += sample * sample;
152165
}
153166
energy /= frame_size;
154167
energies[f] = energy;
155-
156168
// Calculate zero-crossing rate
157169
for (int i = 1; i < frame_size; i++) {
158170
if ((pcmf32[frame_start + i - 1] * pcmf32[frame_start + i]) < 0)
@@ -172,7 +184,7 @@ bool vad_energy_zcr(const typename std::vector<T>::const_iterator& pcmf32, size_
172184
avg_zcr /= num_frames;
173185

174186
if (verbose) {
175-
fprintf(stderr, "%s: avg_energy: %f, avg_zcr: %f, energy_threshold: %f, zcr_threshold: %f\n",
187+
fprintf(stderr, "%s: avg_energy: %f, avg_zcr: %f, energy_threshold: %f, zcr_threshold: %f\n",
176188
__func__, avg_energy, avg_zcr, energy_threshold, zcr_threshold);
177189
}
178190

0 commit comments

Comments
 (0)