-
Notifications
You must be signed in to change notification settings - Fork 357
Audio: MFCC: Add Voice Activity Detection based on Mel spectrum #10782
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d495cc1
1c5fb79
5537875
2fa783a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,14 +21,17 @@ | |
| #include <stddef.h> | ||
| #include <stdint.h> | ||
|
|
||
| #include <sof/audio/mfcc/mfcc_vad.h> | ||
|
|
||
| LOG_MODULE_REGISTER(mfcc_common, CONFIG_SOF_LOG_LEVEL); | ||
|
|
||
| /* | ||
| * The main processing function for MFCC | ||
| */ | ||
|
|
||
| static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *cd) | ||
| static int mfcc_stft_process(struct processing_module *mod, struct mfcc_comp_data *cd) | ||
| { | ||
| const struct comp_dev *dev = mod->dev; | ||
| struct sof_mfcc_config *config = cd->config; | ||
| struct mfcc_state *state = &cd->state; | ||
| struct mfcc_buffer *buf = &state->buf; | ||
|
|
@@ -169,6 +172,32 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data * | |
|
|
||
| cc_count += state->dct.num_out; | ||
| } | ||
|
|
||
| /* Use hop counter for frame numbering (independent of VAD enable) */ | ||
| state->header.frame_number = state->hop_count; | ||
|
|
||
| /* Run VAD on the mel log spectrum (available in both modes) */ | ||
| if (config->enable_vad) { | ||
| mfcc_vad_update(&cd->vad, state->mel_log_32); | ||
|
|
||
| /* Populate data header for this output frame */ | ||
| state->header.energy = cd->vad.energy; | ||
| state->header.noise_energy = cd->vad.noise_energy; | ||
| state->header.vad_flag = cd->vad.is_speech ? 1 : 0; | ||
| } | ||
|
|
||
| /* Increment hop counter at end of hop processing */ | ||
| state->hop_count++; | ||
|
|
||
| /* Send notification when VAD state changes */ | ||
| if (config->enable_vad && config->update_controls) { | ||
| bool vad_now = cd->vad.is_speech; | ||
|
|
||
| if (vad_now != cd->vad_prev) { | ||
| mfcc_send_vad_notification(mod, vad_now ? 1 : 0); | ||
| cd->vad_prev = vad_now; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return cc_count; | ||
|
|
@@ -267,9 +296,8 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer | |
| struct mfcc_comp_data *cd = module_get_private_data(mod); | ||
| struct mfcc_state *state = &cd->state; | ||
| struct mfcc_buffer *buf = &cd->state.buf; | ||
| uint32_t magic = MFCC_MAGIC; | ||
| int16_t *w_ptr = audio_stream_get_wptr(sink); | ||
| const int num_magic = 2; | ||
| const int num_header_s16 = sizeof(state->header) / sizeof(int16_t); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the division guaranteed to be exact or you do want rounding-down? |
||
| int num_ceps; | ||
| int sink_samples; | ||
| int to_copy; | ||
|
|
@@ -278,27 +306,35 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer | |
| mfcc_source_copy_s16(bsource, buf, &state->emph, frames, state->source_channel); | ||
|
|
||
| /* Run STFT and processing after FFT: Mel auditory filter and DCT. */ | ||
| num_ceps = mfcc_stft_process(mod->dev, cd); | ||
| num_ceps = mfcc_stft_process(mod, cd); | ||
|
|
||
| /* If new output produced, set up pointer into scratch data and mark magic pending */ | ||
| /* If new output produced, set up pointer into scratch data and mark header pending */ | ||
| if (num_ceps > 0) { | ||
| if (state->mel_only) | ||
| if (state->mel_only) { | ||
| state->out_data_ptr = state->mel_spectra->data; | ||
| else | ||
| } else { | ||
| state->out_data_ptr = state->cepstral_coef->data; | ||
| } | ||
|
|
||
| state->out_remain = num_ceps; | ||
| state->magic_pending = true; | ||
| state->header_pending = true; | ||
| } | ||
|
|
||
| /* Write to sink, limited by period size */ | ||
| sink_samples = frames * audio_stream_get_channels(sink); | ||
|
|
||
| /* Write magic word first if pending */ | ||
| if (state->magic_pending && sink_samples >= num_magic) { | ||
| w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_magic, (int16_t *)&magic); | ||
| sink_samples -= num_magic; | ||
| state->magic_pending = false; | ||
| /* Write data header first if pending */ | ||
| if (state->header_pending) { | ||
| if (sink_samples < num_header_s16) { | ||
| /* Not enough sink space for header, defer entire frame */ | ||
| mfcc_sink_copy_zero_s16(sink, w_ptr, sink_samples); | ||
| return; | ||
| } | ||
|
|
||
| w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_header_s16, | ||
| (int16_t *)&state->header); | ||
| sink_samples -= num_header_s16; | ||
| state->header_pending = false; | ||
|
singalsu marked this conversation as resolved.
singalsu marked this conversation as resolved.
|
||
| } | ||
|
|
||
| /* Write cepstral/mel data from scratch buffer */ | ||
|
|
@@ -363,9 +399,8 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer | |
| struct mfcc_comp_data *cd = module_get_private_data(mod); | ||
| struct mfcc_state *state = &cd->state; | ||
| struct mfcc_buffer *buf = &cd->state.buf; | ||
| uint32_t magic = MFCC_MAGIC; | ||
| int32_t *w_ptr = audio_stream_get_wptr(sink); | ||
| const int num_magic = 1; /* one int32_t word for magic */ | ||
| const int num_header_s32 = sizeof(state->header) / sizeof(int32_t); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same about rounding |
||
| int num_ceps; | ||
| int sink_samples; | ||
| int remain_s32; | ||
|
|
@@ -376,7 +411,7 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer | |
| mfcc_source_copy_s24(bsource, buf, &state->emph, frames, state->source_channel); | ||
|
|
||
| /* Run STFT and processing after FFT */ | ||
| num_ceps = mfcc_stft_process(mod->dev, cd); | ||
| num_ceps = mfcc_stft_process(mod, cd); | ||
|
|
||
| /* If new output produced, set up pointer into scratch data */ | ||
| if (num_ceps > 0) { | ||
|
|
@@ -391,17 +426,24 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer | |
| } | ||
|
|
||
| state->out_remain = num_ceps; | ||
| state->magic_pending = true; | ||
| state->header_pending = true; | ||
| } | ||
|
|
||
| /* Write to sink, limited by period size */ | ||
| sink_samples = frames * audio_stream_get_channels(sink); | ||
|
|
||
| /* Write magic word first if pending */ | ||
| if (state->magic_pending && sink_samples >= num_magic) { | ||
| w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic); | ||
| sink_samples -= num_magic; | ||
| state->magic_pending = false; | ||
| /* Write data header first if pending */ | ||
| if (state->header_pending) { | ||
| if (sink_samples < num_header_s32) { | ||
| /* Not enough sink space for header, defer entire frame */ | ||
| mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples); | ||
| return; | ||
| } | ||
|
|
||
| w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_header_s32, | ||
| (int32_t *)&state->header); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it looks like we expect the header size to be a multiple of 4 bytes, so maybe we could add a build-assertion and comments here to make it clear. Can be a follow-up, just checking that this is indeed the case
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no assertions, return an error. |
||
| sink_samples -= num_header_s32; | ||
| state->header_pending = false; | ||
| } | ||
|
|
||
| if (state->mel_only) { | ||
|
|
@@ -443,9 +485,8 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer | |
| struct mfcc_comp_data *cd = module_get_private_data(mod); | ||
| struct mfcc_state *state = &cd->state; | ||
| struct mfcc_buffer *buf = &cd->state.buf; | ||
| uint32_t magic = MFCC_MAGIC; | ||
| int32_t *w_ptr = audio_stream_get_wptr(sink); | ||
| const int num_magic = 1; /* one int32_t word for magic */ | ||
| const int num_header_s32 = sizeof(state->header) / sizeof(int32_t); | ||
| int num_ceps; | ||
| int sink_samples; | ||
| int remain_s32; | ||
|
|
@@ -455,7 +496,7 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer | |
| mfcc_source_copy_s32(bsource, buf, &state->emph, frames, state->source_channel); | ||
|
|
||
| /* Run STFT and processing after FFT */ | ||
| num_ceps = mfcc_stft_process(mod->dev, cd); | ||
| num_ceps = mfcc_stft_process(mod, cd); | ||
|
|
||
| /* If new output produced, set up pointer into scratch data */ | ||
| if (num_ceps > 0) { | ||
|
|
@@ -466,17 +507,24 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer | |
| } | ||
|
|
||
| state->out_remain = num_ceps; | ||
| state->magic_pending = true; | ||
| state->header_pending = true; | ||
| } | ||
|
|
||
| /* Write to sink, limited by period size */ | ||
| sink_samples = frames * audio_stream_get_channels(sink); | ||
|
|
||
| /* Write magic word first if pending */ | ||
| if (state->magic_pending && sink_samples >= num_magic) { | ||
| w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic); | ||
| sink_samples -= num_magic; | ||
| state->magic_pending = false; | ||
| /* Write data header first if pending */ | ||
| if (state->header_pending) { | ||
| if (sink_samples < num_header_s32) { | ||
| /* Not enough sink space for header, defer entire frame */ | ||
| mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples); | ||
| return; | ||
| } | ||
|
|
||
| w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_header_s32, | ||
| (int32_t *)&state->header); | ||
| sink_samples -= num_header_s32; | ||
| state->header_pending = false; | ||
| } | ||
|
|
||
| if (state->mel_only) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.