Commit 04a376f9 authored by David Geldreich's avatar David Geldreich Committed by Jean-Baptiste Kempf

add ARM/NEON version of simple channel mixer

Signed-off-by: default avatarJean-Baptiste Kempf <jb@videolan.org>
Signed-off-by: default avatarRafaël Carré <funman@videolan.org>
parent c311b5fa
...@@ -289,6 +289,7 @@ $Id$ ...@@ -289,6 +289,7 @@ $Id$
* shm: Shared memory framebuffer access module * shm: Shared memory framebuffer access module
* sid: Sidplay demuxer * sid: Sidplay demuxer
* simple_channel_mixer: channel mixer * simple_channel_mixer: channel mixer
* simple_channel_mixer_neon: channel mixer using NEON assembly
* skins2: Skinnable interface, new generation * skins2: Skinnable interface, new generation
* smf: Standard MIDI file demuxer * smf: Standard MIDI file demuxer
* smooth: Microsoft Smooth Streaming input * smooth: Microsoft Smooth Streaming input
......
...@@ -8,6 +8,13 @@ libaudio_format_neon_plugin_la_SOURCES = \ ...@@ -8,6 +8,13 @@ libaudio_format_neon_plugin_la_SOURCES = \
libaudio_format_neon_plugin_la_CFLAGS = $(AM_CFLAGS) libaudio_format_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
libaudio_format_neon_plugin_la_LIBADD = $(AM_LIBADD) libaudio_format_neon_plugin_la_LIBADD = $(AM_LIBADD)
libsimple_channel_mixer_neon_plugin_la_SOURCES = \
simple_channel_mixer.S \
simple_channel_mixer.c
libsimple_channel_mixer_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
libsimple_channel_mixer_neon_plugin_la_LIBADD = $(AM_LIBADD)
libsimple_channel_mixer_neon_plugin_la_DEPENDENCIES =
libchroma_yuv_neon_plugin_la_SOURCES = \ libchroma_yuv_neon_plugin_la_SOURCES = \
i420_yuyv.S \ i420_yuyv.S \
i422_yuyv.S \ i422_yuyv.S \
...@@ -30,6 +37,7 @@ libyuv_rgb_neon_plugin_la_LIBADD = $(AM_LIBADD) ...@@ -30,6 +37,7 @@ libyuv_rgb_neon_plugin_la_LIBADD = $(AM_LIBADD)
libvlc_LTLIBRARIES += \ libvlc_LTLIBRARIES += \
libaudio_format_neon_plugin.la \ libaudio_format_neon_plugin.la \
libsimple_channel_mixer_neon_plugin.la \
libchroma_yuv_neon_plugin.la \ libchroma_yuv_neon_plugin.la \
libvolume_neon_plugin.la \ libvolume_neon_plugin.la \
libyuv_rgb_neon_plugin.la \ libyuv_rgb_neon_plugin.la \
......
@*****************************************************************************
@ simple_channel_mixer.S : ARM NEON channel mixer
@*****************************************************************************
@ Copyright (C) 2012 David Geldreich <david.geldreich@free.fr>
@ Sébastien Toque
@
@ This program is free software; you can redistribute it and/or modify
@ it under the terms of the GNU General Public License as published by
@ the Free Software Foundation; either version 2 of the License, or
@ (at your option) any later version.
@
@ This program is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ GNU General Public License for more details.
@
@ You should have received a copy of the GNU General Public License
@ along with this program; if not, write to the Free Software Foundation,
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
@****************************************************************************/
.fpu neon
.text
.align
#define DST r0
#define SRC r1
#define NUM r2
#define LFE r3
#define COEFF r4
coeff_7to2:
.float 0.5
.float 0.5
.float 0.25
.float 0.25
.global convert_7to2_neon_asm
.type convert_7to2_neon_asm, %function
convert_7to2_neon_asm:
push {r4,lr}
adr COEFF, coeff_7to2
vld1.32 {q0},[COEFF]
0: @ use local label
vld1.32 {q2},[SRC]! @ load 0,1,2,3
vmul.f32 q2,q2,q0 @ 0.5*src[0] 0.5*src[1] 0.25*src[2] 0.25*src[3]
vld1.32 {d6},[SRC]! @ load 4,5
vmul.f32 d6,d6,d1 @ 0.25*src[4] 0.25*src[5]
vadd.f32 d4,d4,d5 @ 0.5*src[0] + 0.25*src[2]
@ 0.5*src[1] + 0.25*src[3]
vadd.f32 d4,d4,d6 @ 0.5*src[0] + 0.25*src[2] + 0.25*src[4]
@ 0.5*src[1] + 0.25*src[3] + 0.25*src[5]
flds s14,[SRC] @ load 6
vdup.32 d7,d7[0]
teq LFE,#0
ite eq
addeq SRC,SRC,#4
addne SRC,SRC,#8 @ skip the lfe channel
vadd.f32 d4,d4,d7 @ 0.5*src[0] + 0.25*src[2] + 0.25*src[4] + src[6]
@ 0.5*src[1] + 0.25*src[3] + 0.25*src[5] + src[6]
vst1.32 d4, [DST]!
subs NUM,NUM,#1
bne 0b
pop {r4,pc}
coeff_5to2:
.float 0.5
.float 0.5
.float 0.33
.float 0.33
.global convert_5to2_neon_asm
.type convert_5to2_neon_asm, %function
convert_5to2_neon_asm:
push {r4,lr}
adr COEFF, coeff_5to2
vld1.32 {q0},[COEFF] @ load constants
0: @ use local label
vld1.32 {q1},[SRC]! @ load 0,1,2,3
flds s8,[SRC] @ load 4
vdup.32 d4,d4[0]
teq LFE,#0
ite eq
addeq SRC,SRC,#4
addne SRC,SRC,#8 @ skip the lfe channel
vmul.f32 q1,q1,q0 @ 0.5*src[0] 0.5*src[1] 0.33*src[2] 0.33*src[3]/3
vadd.f32 d2,d2,d3 @ 0.5*src[0] + 0.33*src[2]
@ 0.5*src[1] + 0.33*src[3]
vadd.f32 d2,d2,d4 @ 0.5*src[0] + 0.33*src[2] + src[4]
@ 0.5*src[1] + 0.33*src[3] + src[4]
vst1.32 d2,[DST]!
subs NUM,NUM,#1
bne 0b
pop {r4,pc}
coeff_4to2:
.float 0.5
.float 0.5
.global convert_4to2_neon_asm
.type convert_4to2_neon_asm, %function
convert_4to2_neon_asm:
push {r4,lr}
adr COEFF, coeff_4to2
vld1.32 {d0},[COEFF] @ load constants
0: @ use local label
vld1.32 {q1},[SRC]!
vmul.f32 d2,d2,d0 @ 0.5*src[0] 0.5*src[1]
vdup.32 d4,d3[0] @ dup src[2]
vdup.32 d3,d3[1] @ dup src[3]
vadd.f32 d2,d2,d3 @ +src[3]
vadd.f32 d2,d2,d4 @ +src[2]
vst1.32 d2,[DST]!
subs NUM,NUM,#1
bne 0b
pop {r4,pc}
coeff_3to2:
.float 0.5
.float 0.5
.global convert_3to2_neon_asm
.type convert_3to2_neon_asm, %function
convert_3to2_neon_asm:
push {r4,lr}
adr COEFF, coeff_3to2
vld1.32 {d0},[COEFF] @ load constants
0: @ use local label
vld1.32 {d1},[SRC]! @ load 0,1
flds s4,[SRC] @ load 2
vdup.32 d2,d2[0]
teq LFE,#0
ite eq
addeq SRC,SRC,#4
addne SRC,SRC,#8 @ skip the lfe channel
vmul.f32 d1,d1,d0 @ 0.5*src[0] 0.5*src[1]
vadd.f32 d1,d1,d2 @ 0.5*src[0] + src[2]
@ 0.5*src[1] + src[2]
vst1.32 d1,[DST]!
subs NUM,NUM,#1
bne 0b
pop {r4,pc}
coeff_7to1:
.float 0.25
.float 0.25
.float 0.125
.float 0.125
.global convert_7to1_neon_asm
.type convert_7to1_neon_asm, %function
convert_7to1_neon_asm:
push {r4,lr}
adr COEFF, coeff_7to1
vld1.32 {q0},[COEFF]
0: @ use local label
vld1.32 {q1},[SRC]! @ load 0,1,2,3
vmul.f32 q1,q1,q0 @ 0.25*src[0] 0.25*src[1] 0.125*src[2] 0.125*src[3]
vld1.32 {d4},[SRC]! @ load 4,5
vmul.f32 d4,d4,d1 @ 0.125*src[4] 0.125*src[5]
vadd.f32 d2,d2,d3
vadd.f32 d2,d2,d4
flds s10,[SRC] @ load 6
teq LFE,#0
ite eq
addeq SRC,SRC,#4
addne SRC,SRC,#8 @ skip the lfe channel
vadd.f32 s4,s4,s5
vadd.f32 s4,s4,s10
fsts s4,[DST]
add DST,DST,#4
subs NUM,NUM,#1
bne 0b
pop {r4,pc}
coeff_5to1:
.float 0.25
.float 0.25
.float 0.16666667
.float 0.16666667
.global convert_5to1_neon_asm
.type convert_5to1_neon_asm, %function
convert_5to1_neon_asm:
push {r4,lr}
adr COEFF, coeff_5to1
vld1.32 {q0},[COEFF]
0: @ use local label
vld1.32 {q1},[SRC]! @ load 0,1,2,3
vmul.f32 q1,q1,q0 @ 0.25*src[0] 0.25*src[1] src[2]/6 src[3]/6
vadd.f32 d2,d2,d3
flds s10,[SRC] @ load 4
teq LFE,#0
ite eq
addeq SRC,SRC,#4
addne SRC,SRC,#8 @ skip the lfe channel
vadd.f32 s4,s4,s5
vadd.f32 s4,s4,s10
fsts s4,[DST]
add DST,DST,#4
subs NUM,NUM,#1
bne 0b
pop {r4,pc}
coeff_7to4:
.float 0.5
.float 0.5
.float 0.16666667
.float 0.16666667
.global convert_7to4_neon_asm
.type convert_7to4_neon_asm, %function
convert_7to4_neon_asm:
push {r4,lr}
adr COEFF, coeff_7to4
vld1.32 {q0},[COEFF]
0: @ use local label
vld1.32 {q1},[SRC]! @ load 0,1,2,3
vmul.f32 q1,q1,q0 @ 0.5*src[0] 0.5*src[1] src[2]/6 src[3]/6
vld1.32 {d5},[SRC]! @ load 4,5
flds s14,[SRC] @ load 6
vadd.f32 d2,d2,d3 @ 0.5*src[0] + src[2]/6
@ 0.5*src[1] + src[3]/6
vdup.32 d4,d7[0] @ so q2 : src[6] src[6] src[4] src[5]
vadd.f32 q2,q2,q1 @ src[6] + 0.5*src[0] + src[2]/6
@ src[6] + 0.5*src[1] + src[3]/6
@ src[4] + src[2]/6
@ src[5] + src[3]/6
teq LFE,#0
ite eq
addeq SRC,SRC,#4
addne SRC,SRC,#8 @ skip the lfe channel
vst1.32 {q2}, [DST]!
subs NUM,NUM,#1
bne 0b
pop {r4,pc}
coeff_5to4:
.float 0.5
.float 0.5
.global convert_5to4_neon_asm
.type convert_5to4_neon_asm, %function
convert_5to4_neon_asm:
push {r4,lr}
adr COEFF, coeff_5to4
vld1.32 {d0},[COEFF]
0: @ use local label
vld1.32 {q1},[SRC]! @ load 0,1,2,3
vmul.f32 d2,d2,d0 @ 0.5*src[0] 0.5*src[1]
flds s8,[SRC] @ load 4
vdup.32 d4,d4[0]
vadd.f32 d2,d2,d4 @ 0.5*src[0] + src[4]
@ 0.5*src[1] + src[4]
@ src[2]
@ src[3]
teq LFE,#0
ite eq
addeq SRC,SRC,#4
addne SRC,SRC,#8 @ skip the lfe channel
vst1.32 {q1}, [DST]!
subs NUM,NUM,#1
bne 0b
pop {r4,pc}
/*****************************************************************************
* simple_channel_mixer.c : simple channel mixer plug-in using NEON assembly
*****************************************************************************
* Copyright (C) 2002, 2004, 2006-2009, 2012 the VideoLAN team
* $Id$
*
* Authors: Gildas Bazin <gbazin@videolan.org>
* David Geldreich <david.geldreich@free.fr>
* Sébastien Toque
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <vlc_common.h>
#include <vlc_plugin.h>
#include <vlc_aout.h>
#include <vlc_filter.h>
#include <vlc_block.h>
#include <vlc_cpu.h>
#include <assert.h>
/*****************************************************************************
* Module descriptor
*****************************************************************************/
static int OpenFilter( vlc_object_t * );
vlc_module_begin ()
set_description( N_("Audio filter for simple channel mixing using NEON assembly") )
set_category( CAT_AUDIO )
set_subcategory( SUBCAT_AUDIO_MISC )
set_capability( "audio filter", 20 )
set_callbacks( OpenFilter, NULL )
vlc_module_end ()
#define FILTER_WRAPPER(in, out) \
void convert_##in##to##out##_neon_asm(float *dst, const float *src, int num, bool lfeChannel); \
static block_t *Filter_##in##to##out (filter_t *p_filter, block_t *p_block) \
{ \
block_t *p_out; \
if (!FilterInit( p_filter, p_block, &p_out )) \
return NULL; \
const float *p_src = (const float *)p_block->p_buffer; \
float *p_dest = (float *)p_out->p_buffer; \
convert_##in##to##out##_neon_asm( p_dest, p_src, p_block->i_nb_samples, \
p_filter->fmt_in.audio.i_physical_channels & AOUT_CHAN_LFE ); \
block_Release( p_block ); \
return p_out; \
}
#define TRY_FILTER(in, out) \
if ( b_input_##in && b_output_##out ) \
{ \
p_filter->pf_audio_filter = Filter_##in##to##out ; \
return VLC_SUCCESS; \
}
/*****************************************************************************
* Filter:
*****************************************************************************/
static bool FilterInit( filter_t *p_filter, block_t *p_block, block_t **pp_out )
{
if( !p_block || !p_block->i_nb_samples )
{
if( p_block )
block_Release( p_block );
return false;
}
size_t i_out_size = p_block->i_nb_samples *
p_filter->fmt_out.audio.i_bitspersample *
p_filter->fmt_out.audio.i_channels / 8;
block_t *p_out = filter_NewAudioBuffer( p_filter, i_out_size );
if( !p_out )
{
msg_Warn( p_filter, "can't get output buffer" );
block_Release( p_block );
return false;
}
p_out->i_nb_samples = p_block->i_nb_samples;
p_out->i_dts = p_block->i_dts;
p_out->i_pts = p_block->i_pts;
p_out->i_length = p_block->i_length;
int i_input_nb = aout_FormatNbChannels( &p_filter->fmt_in.audio );
int i_output_nb = aout_FormatNbChannels( &p_filter->fmt_out.audio );
p_out->i_buffer = p_block->i_buffer * i_output_nb / i_input_nb;
*pp_out = p_out;
return true;
}
FILTER_WRAPPER(7,2)
FILTER_WRAPPER(5,2)
FILTER_WRAPPER(4,2)
FILTER_WRAPPER(3,2)
FILTER_WRAPPER(7,1)
FILTER_WRAPPER(5,1)
FILTER_WRAPPER(7,4)
FILTER_WRAPPER(5,4)
/*****************************************************************************
* OpenFilter:
*****************************************************************************/
static int OpenFilter( vlc_object_t *p_this )
{
filter_t *p_filter = (filter_t *)p_this;
if (!vlc_CPU_ARM_NEON())
return VLC_EGENERIC;
audio_format_t fmt_in = p_filter->fmt_in.audio;
audio_format_t fmt_out = p_filter->fmt_out.audio;
fmt_in.i_format = p_filter->fmt_in.i_codec;
fmt_out.i_format = p_filter->fmt_out.i_codec;
if( fmt_in.i_format != VLC_CODEC_FL32 ||
fmt_in.i_format != fmt_out.i_format ||
fmt_in.i_rate != fmt_out.i_rate )
{
return VLC_EGENERIC;
}
if( fmt_in.i_physical_channels == fmt_out.i_physical_channels &&
fmt_in.i_original_channels == fmt_out.i_original_channels )
{
return VLC_EGENERIC;
}
const bool b_input_7 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_7_0;
const bool b_input_5 = ( (fmt_in.i_physical_channels & AOUT_CHANS_5_0) == AOUT_CHANS_5_0 ||
(fmt_in.i_physical_channels & AOUT_CHANS_5_0_MIDDLE) == AOUT_CHANS_5_0_MIDDLE );
const bool b_input_4 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_4_CENTER_REAR;
const bool b_input_3 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_3_0;
const bool b_output_1 = fmt_out.i_physical_channels == AOUT_CHAN_CENTER;
const bool b_output_2 = fmt_out.i_physical_channels == AOUT_CHANS_2_0;
const bool b_output_4 = fmt_out.i_physical_channels == AOUT_CHANS_4_0;
/* Only conversion to Mono, Stereo and 4.0 right now */
/* Only from 7/7.1/5/5.1/3/3.1/2.0
* XXX 5.X rear and middle are handled the same way */
TRY_FILTER(7,2)
TRY_FILTER(5,2)
TRY_FILTER(4,2)
TRY_FILTER(3,2)
TRY_FILTER(7,1)
TRY_FILTER(5,1)
TRY_FILTER(7,4)
TRY_FILTER(5,4)
return VLC_EGENERIC;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment