@@ -98,6 +98,107 @@ static void mix_s16(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
9898 }
9999 }
100100}
101+
102+ static void mix_s16_gain (struct cir_buf_ptr * sink , int32_t start_sample , int32_t mixed_samples ,
103+ const struct cir_buf_ptr * source ,
104+ int32_t sample_count , uint16_t gain )
105+ {
106+ int samples_to_mix , samples_to_copy , left_samples ;
107+ int n , nmax , i , m , left ;
108+ ae_int16x4 in_sample , in_sample1 ;
109+ ae_int16x4 out_sample , out_sample1 ;
110+ ae_int16x8 * in ;
111+ ae_int16x8 * out ;
112+ ae_valignx2 inu = AE_ZALIGN128 ();
113+ ae_valignx2 outu1 = AE_ZALIGN128 ();
114+ ae_valignx2 outu2 = AE_ZALIGN128 ();
115+ /* cir_buf_wrap() is required and is done below in a loop */
116+ ae_int16 * dst = (ae_int16 * )sink -> ptr + start_sample ;
117+ ae_int16 * src = source -> ptr ;
118+ ae_f16x4 gain_vec ;
119+
120+ /* this func does not support unity gain as 1 cannot be represented as Q1.15 value */
121+ assert (gain < IPC4_MIXIN_UNITY_GAIN );
122+
123+ gain_vec = AE_L16_I ((ae_int16 * )& gain , 0 );
124+ gain_vec = AE_SLAI16S (gain_vec , 5 ); /* convert to Q1.15 */
125+
126+ assert (mixed_samples >= start_sample );
127+ samples_to_mix = AE_MIN32 (mixed_samples - start_sample , sample_count );
128+ samples_to_copy = sample_count - samples_to_mix ;
129+ n = 0 ;
130+
131+ for (left_samples = samples_to_mix ; left_samples > 0 ; left_samples -= n ) {
132+ src = cir_buf_wrap (src + n , source -> buf_start , source -> buf_end );
133+ dst = cir_buf_wrap (dst + n , sink -> buf_start , sink -> buf_end );
134+ /* calculate the remaining samples*/
135+ nmax = (ae_int16 * )source -> buf_end - src ;
136+ n = AE_MIN32 (left_samples , nmax );
137+ nmax = (ae_int16 * )sink -> buf_end - dst ;
138+ n = AE_MIN32 (n , nmax );
139+ in = (ae_int16x8 * )src ;
140+ out = (ae_int16x8 * )dst ;
141+ inu = AE_LA128_PP (in );
142+ outu1 = AE_LA128_PP (out );
143+ m = n >> 3 ;
144+ left = n & 0x07 ;
145+ /* process 8 samples per loop */
146+ for (i = 0 ; i < m ; i ++ ) {
147+ AE_LA16X4X2_IP (in_sample , in_sample1 , inu , in );
148+ AE_LA16X4X2_IP (out_sample , out_sample1 , outu1 , out );
149+ out -- ;
150+ in_sample = AE_MULFP16X4RS (in_sample , gain_vec );
151+ in_sample1 = AE_MULFP16X4RS (in_sample1 , gain_vec );
152+ out_sample = AE_ADD16S (in_sample , out_sample );
153+ out_sample1 = AE_ADD16S (in_sample1 , out_sample1 );
154+ AE_SA16X4X2_IP (out_sample , out_sample1 , outu2 , out );
155+ }
156+ AE_SA128POS_FP (outu2 , out );
157+
158+ /* process the left samples that less than 8
159+ * one by one to avoid memory access overrun
160+ */
161+ for (i = 0 ; i < left ; i ++ ) {
162+ AE_L16_IP (in_sample , (ae_int16 * )in , sizeof (ae_int16 ));
163+ AE_L16_IP (out_sample , (ae_int16 * )out , 0 );
164+ in_sample = AE_MULFP16X4RS (in_sample , gain_vec );
165+ out_sample = AE_ADD16S (in_sample , out_sample );
166+ AE_S16_0_IP (out_sample , (ae_int16 * )out , sizeof (ae_int16 ));
167+ }
168+ }
169+
170+ for (left_samples = samples_to_copy ; left_samples > 0 ; left_samples -= n ) {
171+ src = cir_buf_wrap (src + n , source -> buf_start , source -> buf_end );
172+ dst = cir_buf_wrap (dst + n , sink -> buf_start , sink -> buf_end );
173+ /* calculate the remaining samples*/
174+ nmax = (ae_int16 * )source -> buf_end - src ;
175+ n = AE_MIN32 (left_samples , nmax );
176+ nmax = (ae_int16 * )sink -> buf_end - dst ;
177+ n = AE_MIN32 (n , nmax );
178+ in = (ae_int16x8 * )src ;
179+ out = (ae_int16x8 * )dst ;
180+ inu = AE_LA128_PP (in );
181+ m = n >> 3 ;
182+ left = n & 0x07 ;
183+ /* process 8 samples per loop */
184+ for (i = 0 ; i < m ; i ++ ) {
185+ AE_LA16X4X2_IP (in_sample , in_sample1 , inu , in );
186+ in_sample = AE_MULFP16X4RS (in_sample , gain_vec );
187+ in_sample1 = AE_MULFP16X4RS (in_sample1 , gain_vec );
188+ AE_SA16X4X2_IP (in_sample , in_sample1 , outu2 , out );
189+ }
190+ AE_SA128POS_FP (outu2 , out );
191+
192+ /* process the left samples that less than 8
193+ * one by one to avoid memory access overrun
194+ */
195+ for (i = 0 ; i < left ; i ++ ) {
196+ AE_L16_IP (in_sample , (ae_int16 * )in , sizeof (ae_int16 ));
197+ in_sample = AE_MULFP16X4RS (in_sample , gain_vec );
198+ AE_S16_0_IP (in_sample , (ae_int16 * )out , sizeof (ae_int16 ));
199+ }
200+ }
201+ }
101202#endif /* CONFIG_FORMAT_S16LE */
102203
103204#if CONFIG_FORMAT_S24LE
@@ -193,6 +294,102 @@ static void mix_s24(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
193294 }
194295}
195296
297+ static void mix_s24_gain (struct cir_buf_ptr * sink , int32_t start_sample , int32_t mixed_samples ,
298+ const struct cir_buf_ptr * source ,
299+ int32_t sample_count , uint16_t gain )
300+ {
301+ int samples_to_mix , samples_to_copy , left_samples ;
302+ int n , nmax , i , m , left ;
303+ ae_int32x2 in_sample , in_sample1 ;
304+ ae_int32x2 out_sample , out_sample1 ;
305+ ae_int32x4 * in ;
306+ ae_int32x4 * out ;
307+ ae_valignx2 inu = AE_ZALIGN128 ();
308+ ae_valignx2 outu1 = AE_ZALIGN128 ();
309+ ae_valignx2 outu2 = AE_ZALIGN128 ();
310+ /* cir_buf_wrap() is required and is done below in a loop */
311+ int32_t * dst = (int32_t * )sink -> ptr + start_sample ;
312+ int32_t * src = source -> ptr ;
313+ ae_f24x2 gain_vec ;
314+ ae_int32 gain32 = (ae_int32 )gain ;
315+
316+ /* this func does not support unity gain as 1 cannot be represented as Q1.23 value */
317+ assert (gain < IPC4_MIXIN_UNITY_GAIN );
318+
319+ gain_vec = AE_MOVF24X2_FROMINT32X2 (AE_L32_I (& gain32 , 0 ));
320+ gain_vec = AE_SLAI24S (gain_vec , 13 ); /* convert to Q1.23 */
321+
322+ assert (mixed_samples >= start_sample );
323+ samples_to_mix = AE_MIN32 (mixed_samples - start_sample , sample_count );
324+ samples_to_copy = sample_count - samples_to_mix ;
325+ n = 0 ;
326+
327+ for (left_samples = samples_to_mix ; left_samples > 0 ; left_samples -= n ) {
328+ src = cir_buf_wrap (src + n , source -> buf_start , source -> buf_end );
329+ dst = cir_buf_wrap (dst + n , sink -> buf_start , sink -> buf_end );
330+ /* calculate the remaining samples*/
331+ nmax = (int32_t * )source -> buf_end - src ;
332+ n = AE_MIN32 (left_samples , nmax );
333+ nmax = (int32_t * )sink -> buf_end - dst ;
334+ n = AE_MIN32 (n , nmax );
335+ in = (ae_int32x4 * )src ;
336+ out = (ae_int32x4 * )dst ;
337+ inu = AE_LA128_PP (in );
338+ outu1 = AE_LA128_PP (out );
339+ m = n >> 2 ;
340+ left = n & 3 ;
341+ /* process 4 samples per time */
342+ for (i = 0 ; i < m ; i ++ ) {
343+ AE_LA32X2X2_IP (in_sample , in_sample1 , inu , in );
344+ AE_LA32X2X2_IP (out_sample , out_sample1 , outu1 , out );
345+ out -- ;
346+ in_sample = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample ), gain_vec );
347+ in_sample1 = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample1 ), gain_vec );
348+ /* out samples are already sign extended by other mixin in a loop below */
349+ out_sample = AE_ADD24S (in_sample , out_sample );
350+ out_sample1 = AE_ADD24S (in_sample1 , out_sample1 );
351+ AE_SA32X2X2_IP (out_sample , out_sample1 , outu2 , out );
352+ }
353+ AE_SA128POS_FP (outu2 , out );
354+
355+ /* process the left samples to avoid memory access overrun */
356+ for (i = 0 ; i < left ; i ++ ) {
357+ AE_L32_IP (in_sample , (ae_int32 * )in , sizeof (ae_int32 ));
358+ AE_L32_IP (out_sample , (ae_int32 * )out , 0 );
359+ in_sample = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample ), gain_vec );
360+ /* out samples are already sign extended by other mixin in a loop below */
361+ out_sample = AE_ADD24S (in_sample , out_sample );
362+ AE_S32_L_IP (out_sample , (ae_int32 * )out , sizeof (ae_int32 ));
363+ }
364+ }
365+
366+ for (left_samples = samples_to_copy ; left_samples > 0 ; left_samples -= n ) {
367+ src = cir_buf_wrap (src + n , source -> buf_start , source -> buf_end );
368+ dst = cir_buf_wrap (dst + n , sink -> buf_start , sink -> buf_end );
369+ nmax = (int32_t * )source -> buf_end - src ;
370+ n = AE_MIN32 (left_samples , nmax );
371+ nmax = (int32_t * )sink -> buf_end - dst ;
372+ n = AE_MIN32 (n , nmax );
373+ in = (ae_int32x4 * )src ;
374+ out = (ae_int32x4 * )dst ;
375+ inu = AE_LA128_PP (in );
376+ m = n >> 2 ;
377+ left = n & 3 ;
378+ for (i = 0 ; i < m ; i ++ ) {
379+ AE_LA32X2X2_IP (in_sample , in_sample1 , inu , in );
380+ in_sample = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample ), gain_vec );
381+ in_sample1 = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample1 ), gain_vec );
382+ AE_SA32X2X2_IP (in_sample , in_sample1 , outu2 , out );
383+ }
384+ AE_SA128POS_FP (outu2 , out );
385+ /* process the left samples to avoid memory access overrun */
386+ for (i = 0 ; i < left ; i ++ ) {
387+ AE_L32_IP (in_sample , (ae_int32 * )in , sizeof (ae_int32 ));
388+ in_sample = AE_MULFP24X2R (AE_MOVF24X2_FROMINT32X2 (in_sample ), gain_vec );
389+ AE_S32_L_IP (in_sample , (ae_int32 * )out , sizeof (ae_int32 ));
390+ }
391+ }
392+ }
196393#endif /* CONFIG_FORMAT_S24LE */
197394
198395#if CONFIG_FORMAT_S32LE
@@ -277,18 +474,108 @@ static void mix_s32(struct cir_buf_ptr *sink, int32_t start_sample, int32_t mixe
277474 }
278475}
279476
477+ static void mix_s32_gain (struct cir_buf_ptr * sink , int32_t start_sample , int32_t mixed_samples ,
478+ const struct cir_buf_ptr * source ,
479+ int32_t sample_count , uint16_t gain )
480+ {
481+ int samples_to_mix , samples_to_copy , left_samples ;
482+ int n , nmax , i , m , left ;
483+ ae_int32x2 in_sample , in_sample1 ;
484+ ae_int32x2 out_sample , out_sample1 ;
485+ ae_int32x4 * in ;
486+ ae_int32x4 * out ;
487+ ae_valignx2 inu = AE_ZALIGN128 ();
488+ ae_valignx2 outu1 = AE_ZALIGN128 ();
489+ ae_valignx2 outu2 = AE_ZALIGN128 ();
490+ /* cir_buf_wrap() is required and is done below in a loop */
491+ int32_t * dst = (int32_t * )sink -> ptr + start_sample ;
492+ int32_t * src = source -> ptr ;
493+ ae_f16x4 gain_vec ;
494+
495+ /* this func does not support unity gain as 1 cannot be represented as Q1.15 value */
496+ assert (gain < IPC4_MIXIN_UNITY_GAIN );
497+
498+ gain_vec = AE_L16_I ((ae_int16 * )& gain , 0 );
499+ gain_vec = AE_SLAI16S (gain_vec , 5 ); /* convert to Q1.15 */
500+
501+ assert (mixed_samples >= start_sample );
502+ samples_to_mix = AE_MIN32 (mixed_samples - start_sample , sample_count );
503+ samples_to_copy = sample_count - samples_to_mix ;
504+ n = 0 ;
505+
506+ for (left_samples = samples_to_mix ; left_samples > 0 ; left_samples -= n ) {
507+ src = cir_buf_wrap (src + n , source -> buf_start , source -> buf_end );
508+ dst = cir_buf_wrap (dst + n , sink -> buf_start , sink -> buf_end );
509+ /* calculate the remaining samples*/
510+ nmax = (int32_t * )source -> buf_end - src ;
511+ n = AE_MIN32 (left_samples , nmax );
512+ nmax = (int32_t * )sink -> buf_end - dst ;
513+ n = AE_MIN32 (n , nmax );
514+ in = (ae_int32x4 * )src ;
515+ out = (ae_int32x4 * )dst ;
516+ inu = AE_LA128_PP (in );
517+ outu1 = AE_LA128_PP (out );
518+ m = n >> 2 ;
519+ left = n & 3 ;
520+ for (i = 0 ; i < m ; i ++ ) {
521+ AE_LA32X2X2_IP (in_sample , in_sample1 , inu , in );
522+ AE_LA32X2X2_IP (out_sample , out_sample1 , outu1 , out );
523+ out -- ;
524+ AE_MULAFP32X16X2RS_L (out_sample , in_sample , gain_vec );
525+ AE_MULAFP32X16X2RS_L (out_sample1 , in_sample1 , gain_vec );
526+ AE_SA32X2X2_IP (out_sample , out_sample1 , outu2 , out );
527+ }
528+ AE_SA128POS_FP (outu2 , out );
529+
530+ /* process the left samples to avoid memory access overrun */
531+ for (i = 0 ; i < left ; i ++ ) {
532+ AE_L32_IP (in_sample , (ae_int32 * )in , sizeof (ae_int32 ));
533+ AE_L32_IP (out_sample , (ae_int32 * )out , 0 );
534+ AE_MULAFP32X16X2RS_L (out_sample , in_sample , gain_vec );
535+ AE_S32_L_IP (out_sample , (ae_int32 * )out , sizeof (ae_int32 ));
536+ }
537+ }
538+
539+ for (left_samples = samples_to_copy ; left_samples > 0 ; left_samples -= n ) {
540+ src = cir_buf_wrap (src + n , source -> buf_start , source -> buf_end );
541+ dst = cir_buf_wrap (dst + n , sink -> buf_start , sink -> buf_end );
542+ /* calculate the remaining samples*/
543+ nmax = (int32_t * )source -> buf_end - src ;
544+ n = AE_MIN32 (left_samples , nmax );
545+ nmax = (int32_t * )sink -> buf_end - dst ;
546+ n = AE_MIN32 (n , nmax );
547+ in = (ae_int32x4 * )src ;
548+ out = (ae_int32x4 * )dst ;
549+ inu = AE_LA128_PP (in );
550+ m = n >> 2 ;
551+ left = n & 3 ;
552+ for (i = 0 ; i < m ; i ++ ) {
553+ AE_LA32X2X2_IP (in_sample , in_sample1 , inu , in );
554+ in_sample = AE_MULFP32X16X2RS_L (in_sample , gain_vec );
555+ in_sample1 = AE_MULFP32X16X2RS_L (in_sample1 , gain_vec );
556+ AE_SA32X2X2_IP (in_sample , in_sample1 , outu2 , out );
557+ }
558+ AE_SA128POS_FP (outu2 , out );
559+ /* process the left samples to avoid memory access overrun */
560+ for (i = 0 ; i < left ; i ++ ) {
561+ AE_L32_IP (in_sample , (ae_int32 * )in , sizeof (ae_int32 ));
562+ in_sample = AE_MULFP32X16X2RS_L (in_sample , gain_vec );
563+ AE_S32_L_IP (in_sample , (ae_int32 * )out , sizeof (ae_int32 ));
564+ }
565+ }
566+ }
280567#endif /* CONFIG_FORMAT_S32LE */
281568
282569/* TODO: implement mixing functions with gain support!*/
283570__cold_rodata const struct mix_func_map mix_func_map [] = {
284571#if CONFIG_FORMAT_S16LE
285- { SOF_IPC_FRAME_S16_LE , mix_s16 , mix_s16 },
572+ { SOF_IPC_FRAME_S16_LE , mix_s16 , mix_s16_gain },
286573#endif
287574#if CONFIG_FORMAT_S24LE
288- { SOF_IPC_FRAME_S24_4LE , mix_s24 , mix_s24 },
575+ { SOF_IPC_FRAME_S24_4LE , mix_s24 , mix_s24_gain },
289576#endif
290577#if CONFIG_FORMAT_S32LE
291- { SOF_IPC_FRAME_S32_LE , mix_s32 , mix_s32 }
578+ { SOF_IPC_FRAME_S32_LE , mix_s32 , mix_s32_gain }
292579#endif
293580};
294581
0 commit comments