Skip to content

Commit d06f950

Browse files
committed
Audio: Level Multiplier: Add HiFi5 processing
This patch adds the HiFi5 optimized processing. The MCPS performance reported for stereo 48 kHz audio by: process_test('level_multiplier', N, N, 48e3, 0, 1, 'xt-run --mem_model'); is 2.21 / 2.41 / 2.41 for S16_LE / S24_LE / S32_LE formats when processing with non-unity gain. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent dcd3c83 commit d06f950

File tree

4 files changed

+357
-2
lines changed

4 files changed

+357
-2
lines changed

src/audio/level_multiplier/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ else()
77
add_local_sources(sof level_multiplier.c)
88
add_local_sources(sof level_multiplier-generic.c)
99
add_local_sources(sof level_multiplier-hifi3.c)
10+
add_local_sources(sof level_multiplier-hifi5.c)
1011

1112
if(CONFIG_IPC_MAJOR_3)
1213
add_local_sources(sof level_multiplier-ipc3.c)

src/audio/level_multiplier/level_multiplier-hifi3.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
#define LEVEL_MULTIPLIER_S32_SHIFT 8 /* See explanation from level_multiplier_s32() */
1414

15-
#if SOF_USE_MIN_HIFI(3, VOLUME)
15+
#if SOF_USE_HIFI(3, VOLUME) || SOF_USE_HIFI(4, VOLUME)
1616

1717
#include <xtensa/tie/xt_hifi3.h>
1818

@@ -337,4 +337,4 @@ level_multiplier_func level_multiplier_find_proc_func(enum sof_ipc_frame src_fmt
337337
return NULL;
338338
}
339339

340-
#endif /* SOF_USE_MIN_HIFI(3, VOLUME) */
340+
#endif /* SOF_USE_HIFI(3, VOLUME) || SOF_USE_HIFI(4, VOLUME) */
Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
//
3+
// Copyright(c) 2025 Intel Corporation.
4+
5+
#include <sof/audio/module_adapter/module/generic.h>
6+
#include <sof/audio/component.h>
7+
#include <sof/audio/sink_api.h>
8+
#include <sof/audio/sink_source_utils.h>
9+
#include <sof/audio/source_api.h>
10+
#include <stdint.h>
11+
#include "level_multiplier.h"
12+
13+
#define LEVEL_MULTIPLIER_S32_SHIFT 8 /* See explanation from level_multiplier_s32() */
14+
15+
#if SOF_USE_MIN_HIFI(5, VOLUME)
16+
17+
#include <xtensa/tie/xt_hifi3.h>
18+
19+
#if CONFIG_FORMAT_S16LE
20+
/**
21+
* level_multiplier_s16() - Process S16_LE format.
22+
* @mod: Pointer to module data.
23+
* @source: Source for PCM samples data.
24+
* @sink: Sink for PCM samples data.
25+
* @frames: Number of audio data frames to process.
26+
*
27+
* This is the processing function for 16-bit signed integer PCM formats. The
28+
* audio samples are copied from source to sink with gain defined in cd->gain.
29+
*
30+
* Return: Value zero for success, otherwise an error code.
31+
*/
32+
static int level_multiplier_s16(const struct processing_module *mod,
33+
struct sof_source *source,
34+
struct sof_sink *sink,
35+
uint32_t frames)
36+
{
37+
struct level_multiplier_comp_data *cd = module_get_private_data(mod);
38+
ae_valignx2 x_align;
39+
ae_valignx2 y_align = AE_ZALIGN128();
40+
ae_f32x2 tmp0;
41+
ae_f32x2 tmp1;
42+
const ae_f32x2 gain = cd->gain;
43+
ae_f16x4 samples0;
44+
ae_f16x4 samples1;
45+
ae_int16x8 const *x;
46+
ae_int16x8 *y;
47+
int16_t const *x_start, *x_end;
48+
int16_t *y_start, *y_end;
49+
int x_size, y_size;
50+
int source_samples_without_wrap;
51+
int samples_without_wrap;
52+
int remaining_samples = frames * cd->channels;
53+
int bytes = frames * cd->frame_bytes;
54+
int ret;
55+
int n, i;
56+
57+
ret = source_get_data_s16(source, bytes, (const int16_t **)&x, &x_start, &x_size);
58+
if (ret)
59+
return ret;
60+
61+
/* Similarly get pointer to sink data in circular buffer, buffer start and size. */
62+
ret = sink_get_buffer_s16(sink, bytes, (int16_t **)&y, &y_start, &y_size);
63+
if (ret)
64+
return ret;
65+
66+
/* Set helper pointers to buffer end for wrap check. Then loop until all
67+
* samples are processed.
68+
*/
69+
x_end = x_start + x_size;
70+
y_end = y_start + y_size;
71+
while (remaining_samples) {
72+
/* Find out samples to process before first wrap or end of data. */
73+
source_samples_without_wrap = x_end - (int16_t *)x;
74+
samples_without_wrap = y_end - (int16_t *)y;
75+
samples_without_wrap = MIN(samples_without_wrap, source_samples_without_wrap);
76+
samples_without_wrap = MIN(samples_without_wrap, remaining_samples);
77+
x_align = AE_LA128_PP(x);
78+
79+
/* Process with 128 bit loads and stores */
80+
n = samples_without_wrap >> 3;
81+
for (i = 0; i < n; i++) {
82+
AE_LA16X4X2_IP(samples0, samples1, x_align, x);
83+
84+
AE_MULF2P32X16X4RS(tmp0, tmp1, gain, gain, samples0);
85+
/* Q9.23 to Q1.31 */
86+
tmp0 = AE_SLAI32S(tmp0, 8);
87+
tmp1 = AE_SLAI32S(tmp1, 8);
88+
samples0 = AE_ROUND16X4F32SSYM(tmp0, tmp1);
89+
90+
AE_MULF2P32X16X4RS(tmp0, tmp1, gain, gain, samples1);
91+
/* Q9.23 to Q1.31 */
92+
tmp0 = AE_SLAI32S(tmp0, 8);
93+
tmp1 = AE_SLAI32S(tmp1, 8);
94+
samples1 = AE_ROUND16X4F32SSYM(tmp0, tmp1);
95+
96+
AE_SA16X4X2_IP(samples0, samples1, y_align, y);
97+
}
98+
99+
AE_SA128POS_FP(y_align, y);
100+
n = samples_without_wrap - (n << 3);
101+
for (i = 0; i < n; i++) {
102+
AE_L16_IP(samples0, (ae_f16 *)x, sizeof(ae_f16));
103+
tmp0 = AE_MULFP32X16X2RS_H(gain, samples0);
104+
tmp0 = AE_SLAI32S(tmp0, 8);
105+
samples0 = AE_ROUND16X4F32SSYM(tmp0, tmp0);
106+
AE_S16_0_IP(samples0, (ae_f16 *)y, sizeof(ae_f16));
107+
}
108+
109+
/* One of the buffers needs a wrap (or end of data), so check for wrap */
110+
x = (x >= (ae_int16x8 *)x_end) ? x - x_size : x;
111+
y = (y >= (ae_int16x8 *)y_end) ? y - y_size : y;
112+
remaining_samples -= samples_without_wrap;
113+
}
114+
115+
/* Update the source and sink for bytes consumed and produced. Return success. */
116+
source_release_data(source, bytes);
117+
sink_commit_buffer(sink, bytes);
118+
return 0;
119+
}
120+
#endif /* CONFIG_FORMAT_S16LE */
121+
122+
#if CONFIG_FORMAT_S24LE
123+
/**
124+
* level_multiplier_s24() - Process S24_4LE format.
125+
* @mod: Pointer to module data.
126+
* @source: Source for PCM samples data.
127+
* @sink: Sink for PCM samples data.
128+
* @frames: Number of audio data frames to process.
129+
*
130+
* This is the processing function for 24-bit signed integer PCM formats. The
131+
* audio samples are copied from source to sink with gain defined in cd->gain.
132+
*
133+
* Return: Value zero for success, otherwise an error code.
134+
*/
135+
static int level_multiplier_s24(const struct processing_module *mod,
136+
struct sof_source *source,
137+
struct sof_sink *sink,
138+
uint32_t frames)
139+
{
140+
struct level_multiplier_comp_data *cd = module_get_private_data(mod);
141+
ae_valignx2 x_align;
142+
ae_valignx2 y_align = AE_ZALIGN128();
143+
const ae_f32x2 gain = cd->gain;
144+
ae_f32x2 samples0;
145+
ae_f32x2 samples1;
146+
ae_f32x2 tmp0;
147+
ae_f32x2 tmp1;
148+
ae_int32x4 const *x;
149+
ae_int32x4 *y;
150+
int32_t const *x_start, *x_end;
151+
int32_t *y_start, *y_end;
152+
int x_size, y_size;
153+
int source_samples_without_wrap;
154+
int samples_without_wrap;
155+
int remaining_samples = frames * cd->channels;
156+
int bytes = frames * cd->frame_bytes;
157+
int ret;
158+
int n, i;
159+
160+
ret = source_get_data_s32(source, bytes, (const int32_t **)&x, &x_start, &x_size);
161+
if (ret)
162+
return ret;
163+
164+
/* Similarly get pointer to sink data in circular buffer, buffer start and size. */
165+
ret = sink_get_buffer_s32(sink, bytes, (int32_t **)&y, &y_start, &y_size);
166+
if (ret)
167+
return ret;
168+
169+
/* Set helper pointers to buffer end for wrap check. Then loop until all
170+
* samples are processed.
171+
*/
172+
x_end = x_start + x_size;
173+
y_end = y_start + y_size;
174+
while (remaining_samples) {
175+
/* Find out samples to process before first wrap or end of data. */
176+
source_samples_without_wrap = x_end - (int32_t *)x;
177+
samples_without_wrap = y_end - (int32_t *)y;
178+
samples_without_wrap = MIN(samples_without_wrap, source_samples_without_wrap);
179+
samples_without_wrap = MIN(samples_without_wrap, remaining_samples);
180+
x_align = AE_LA128_PP(x);
181+
182+
/* Process with 64 bit loads and stores */
183+
n = samples_without_wrap >> 2;
184+
for (i = 0; i < n; i++) {
185+
AE_LA32X2X2_IP(samples0, samples1, x_align, x);
186+
AE_MULF2P32X4RS(tmp0, tmp1, gain, gain,
187+
AE_SLAI32(samples0, 8),
188+
AE_SLAI32(samples1, 8));
189+
samples0 = AE_SRAI32(AE_SLAI32S(tmp0, 8), 8);
190+
samples1 = AE_SRAI32(AE_SLAI32S(tmp1, 8), 8);
191+
AE_SA32X2X2_IP(samples0, samples1, y_align, y);
192+
}
193+
194+
AE_SA128POS_FP(y_align, y);
195+
n = samples_without_wrap - (n << 2);
196+
for (i = 0; i < n; i++) {
197+
AE_L32_IP(samples0, (ae_f32 *)x, sizeof(ae_f32));
198+
samples0 = AE_MULFP32X2RS(gain, AE_SLAI32(samples0, 8));
199+
samples0 = AE_SRAI32(AE_SLAI32S(samples0, 8), 8);
200+
AE_S32_L_IP(samples0, (ae_f32 *)y, sizeof(ae_f32));
201+
}
202+
203+
/* One of the buffers needs a wrap (or end of data), so check for wrap */
204+
x = (x >= (ae_int32x4 *)x_end) ? x - x_size : x;
205+
y = (y >= (ae_int32x4 *)y_end) ? y - y_size : y;
206+
remaining_samples -= samples_without_wrap;
207+
}
208+
209+
/* Update the source and sink for bytes consumed and produced. Return success. */
210+
source_release_data(source, bytes);
211+
sink_commit_buffer(sink, bytes);
212+
return 0;
213+
}
214+
#endif /* CONFIG_FORMAT_S24LE */
215+
216+
#if CONFIG_FORMAT_S32LE
217+
/**
218+
* level_multiplier_s32() - Process S32_LE format.
219+
* @mod: Pointer to module data.
220+
* @source: Source for PCM samples data.
221+
* @sink: Sink for PCM samples data.
222+
* @frames: Number of audio data frames to process.
223+
*
224+
* This is the processing function for 32-bit signed integer PCM formats. The
225+
* audio samples are copied from source to sink with gain defined in cd->gain.
226+
*
227+
* Return: Value zero for success, otherwise an error code.
228+
*/
229+
static int level_multiplier_s32(const struct processing_module *mod,
230+
struct sof_source *source,
231+
struct sof_sink *sink,
232+
uint32_t frames)
233+
{
234+
struct level_multiplier_comp_data *cd = module_get_private_data(mod);
235+
ae_valignx2 x_align;
236+
ae_valignx2 y_align = AE_ZALIGN128();
237+
ae_f64 mult0;
238+
ae_f64 mult1;
239+
const ae_f32x2 gain = cd->gain;
240+
ae_f32x2 samples0;
241+
ae_f32x2 samples1;
242+
ae_int32x4 const *x;
243+
ae_int32x4 *y;
244+
int32_t const *x_start, *x_end;
245+
int32_t *y_start, *y_end;
246+
int x_size, y_size;
247+
int source_samples_without_wrap;
248+
int samples_without_wrap;
249+
int remaining_samples = frames * cd->channels;
250+
int bytes = frames * cd->frame_bytes;
251+
int ret;
252+
int n, i;
253+
254+
ret = source_get_data_s32(source, bytes, (const int32_t **)&x, &x_start, &x_size);
255+
if (ret)
256+
return ret;
257+
258+
/* Similarly get pointer to sink data in circular buffer, buffer start and size. */
259+
ret = sink_get_buffer_s32(sink, bytes, (int32_t **)&y, &y_start, &y_size);
260+
if (ret)
261+
return ret;
262+
263+
/* Set helper pointers to buffer end for wrap check. Then loop until all
264+
* samples are processed.
265+
*/
266+
x_end = x_start + x_size;
267+
y_end = y_start + y_size;
268+
while (remaining_samples) {
269+
/* Find out samples to process before first wrap or end of data. */
270+
source_samples_without_wrap = x_end - (int32_t *)x;
271+
samples_without_wrap = y_end - (int32_t *)y;
272+
samples_without_wrap = MIN(samples_without_wrap, source_samples_without_wrap);
273+
samples_without_wrap = MIN(samples_without_wrap, remaining_samples);
274+
x_align = AE_LA128_PP(x);
275+
276+
/* Process with 64 bit loads and stores */
277+
n = samples_without_wrap >> 2;
278+
for (i = 0; i < n; i++) {
279+
AE_LA32X2X2_IP(samples0, samples1, x_align, x);
280+
281+
AE_MULF32X2R_HH_LL(mult0, mult1, gain, samples0);
282+
mult0 = AE_SLAI64(mult0, LEVEL_MULTIPLIER_S32_SHIFT);
283+
mult1 = AE_SLAI64(mult1, LEVEL_MULTIPLIER_S32_SHIFT);
284+
samples0 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */
285+
286+
AE_MULF32X2R_HH_LL(mult0, mult1, gain, samples1);
287+
mult0 = AE_SLAI64(mult0, LEVEL_MULTIPLIER_S32_SHIFT);
288+
mult1 = AE_SLAI64(mult1, LEVEL_MULTIPLIER_S32_SHIFT);
289+
samples1 = AE_ROUND32X2F48SSYM(mult0, mult1); /* Q2.47 -> Q1.31 */
290+
291+
AE_SA32X2X2_IP(samples0, samples1, y_align, y);
292+
}
293+
294+
AE_SA128POS_FP(y_align, y);
295+
n = samples_without_wrap - (n << 2);
296+
for (i = 0; i < n; i++) {
297+
AE_L32_IP(samples0, (ae_f32 *)x, sizeof(ae_f32));
298+
mult0 = AE_MULF32R_HH(gain, samples0);
299+
mult0 = AE_SLAI64(mult0, LEVEL_MULTIPLIER_S32_SHIFT);
300+
samples0 = AE_ROUND32F48SSYM(mult0);
301+
AE_S32_L_IP(samples0, (ae_f32 *)y, sizeof(ae_f32));
302+
}
303+
304+
/* One of the buffers needs a wrap (or end of data), so check for wrap */
305+
x = (x >= (ae_int32x4 *)x_end) ? x - x_size : x;
306+
y = (y >= (ae_int32x4 *)y_end) ? y - y_size : y;
307+
remaining_samples -= samples_without_wrap;
308+
}
309+
310+
/* Update the source and sink for bytes consumed and produced. Return success. */
311+
source_release_data(source, bytes);
312+
sink_commit_buffer(sink, bytes);
313+
return 0;
314+
}
315+
#endif /* CONFIG_FORMAT_S32LE */
316+
317+
/* This struct array defines the used processing functions for
318+
* the PCM formats
319+
*/
320+
const struct level_multiplier_proc_fnmap level_multiplier_proc_fnmap[] = {
321+
#if CONFIG_FORMAT_S16LE
322+
{ SOF_IPC_FRAME_S16_LE, level_multiplier_s16 },
323+
#endif
324+
#if CONFIG_FORMAT_S24LE
325+
{ SOF_IPC_FRAME_S24_4LE, level_multiplier_s24 },
326+
#endif
327+
#if CONFIG_FORMAT_S32LE
328+
{ SOF_IPC_FRAME_S32_LE, level_multiplier_s32 },
329+
#endif
330+
};
331+
332+
/**
333+
* level_multiplier_find_proc_func() - Find suitable processing function.
334+
* @src_fmt: Enum value for PCM format.
335+
*
336+
* This function finds the suitable processing function to use for
337+
* the used PCM format. If not found, return NULL.
338+
*
339+
* Return: Pointer to processing function for the requested PCM format.
340+
*/
341+
level_multiplier_func level_multiplier_find_proc_func(enum sof_ipc_frame src_fmt)
342+
{
343+
int i;
344+
345+
/* Find suitable processing function from map */
346+
for (i = 0; i < ARRAY_SIZE(level_multiplier_proc_fnmap); i++)
347+
if (src_fmt == level_multiplier_proc_fnmap[i].frame_fmt)
348+
return level_multiplier_proc_fnmap[i].level_multiplier_proc_func;
349+
350+
return NULL;
351+
}
352+
353+
#endif /* SOF_USE_MIN_HIFI(5, VOLUME) */

src/audio/level_multiplier/llext/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ sof_llext_build("template_comp"
55
SOURCES ../level_multiplier.c
66
../level_multiplier-generic.c
77
../level_multiplier-hifi3.c
8+
../level_multiplier-hifi5.c
89
../level_multiplier-ipc3.c
910
../level_multiplier-ipc4.c
1011
LIB openmodules

0 commit comments

Comments
 (0)