Commit db7b0421 authored by Renaud Dartus's avatar Renaud Dartus

* Begin of SSE/3DNow! support for imdct and downmix

If you have a PIII or a Athlon and you want to try this, just comment #if 0
in ac3_downmix.c and ac3_imdct.c and add in AC3_DECODER section of Makefile :
	src/ac3_decoder/ac3_imdct_sse.o \
	src/ac3_decoder/ac3_srfft_sse.o \
	src/ac3_decoder/ac3_downmix_sse.o \
	src/ac3_decoder/ac3_downmix_3dn.o \
parent c1df8159
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* tests.h: several test functions needed by the plugins * tests.h: several test functions needed by the plugins
***************************************************************************** *****************************************************************************
* Copyright (C) 1996, 1997, 1998, 1999, 2000 VideoLAN * Copyright (C) 1996, 1997, 1998, 1999, 2000 VideoLAN
* $Id: tests.h,v 1.9 2001/03/21 13:42:33 sam Exp $ * $Id: tests.h,v 1.10 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Samuel Hocevar <sam@zoy.org> * Authors: Samuel Hocevar <sam@zoy.org>
* *
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#define CPU_CAPABILITY_MMX 1<<3 #define CPU_CAPABILITY_MMX 1<<3
#define CPU_CAPABILITY_3DNOW 1<<4 #define CPU_CAPABILITY_3DNOW 1<<4
#define CPU_CAPABILITY_MMXEXT 1<<5 #define CPU_CAPABILITY_MMXEXT 1<<5
#define CPU_CAPABILITY_SSE 1<<6
#define CPU_CAPABILITY_ALTIVEC 1<<16 #define CPU_CAPABILITY_ALTIVEC 1<<16
/***************************************************************************** /*****************************************************************************
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_bit_allocate.c: ac3 allocation tables * ac3_bit_allocate.c: ac3 allocation tables
***************************************************************************** *****************************************************************************
* Copyright (C) 2000 VideoLAN * Copyright (C) 2000 VideoLAN
* $Id: ac3_bit_allocate.c,v 1.20 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_bit_allocate.c,v 1.21 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h" #include "ac3_internal.h" /* DELTA_BIT_REUSE */
static void ba_compute_psd (bit_allocate_t * p_bit, s16 start, s16 end, s16 exps[]); static void ba_compute_psd (bit_allocate_t * p_bit, s16 start, s16 end, s16 exps[]);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder.c: core ac3 decoder * ac3_decoder.c: core ac3 decoder
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.c,v 1.32 2001/05/07 03:14:09 stef Exp $ * $Id: ac3_decoder.c,v 1.33 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Michel Lespinasse <walken@zoy.org> * Michel Lespinasse <walken@zoy.org>
...@@ -40,21 +40,14 @@ ...@@ -40,21 +40,14 @@
#include "audio_output.h" #include "audio_output.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_decoder_thread.h" #include "ac3_decoder_thread.h" /* ac3dec_thread_t */
#include "ac3_internal.h" #include "ac3_internal.h"
#include <stdio.h> static const float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
static const float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
void imdct_init (imdct_t * p_imdct);
void downmix_init (downmix_t * p_downmix);
static float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
static float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
int ac3_init (ac3dec_t * p_ac3dec) int ac3_init (ac3dec_t * p_ac3dec)
{ {
// p_ac3dec->bit_stream.buffer = 0;
// p_ac3dec->bit_stream.i_available = 0;
p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */ p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */
imdct_init(&p_ac3dec->imdct); imdct_init(&p_ac3dec->imdct);
downmix_init(&p_ac3dec->downmix); downmix_init(&p_ac3dec->downmix);
...@@ -69,7 +62,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -69,7 +62,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (parse_bsi (p_ac3dec)) if (parse_bsi (p_ac3dec))
{ {
intf_WarnMsg (3,"Error during ac3parsing"); intf_WarnMsg (3,"ac3dec warn: error during parsing");
parse_auxdata (p_ac3dec); parse_auxdata (p_ac3dec);
return 1; return 1;
} }
...@@ -102,7 +95,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -102,7 +95,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (parse_audblk (p_ac3dec, i)) if (parse_audblk (p_ac3dec, i))
{ {
intf_WarnMsg (3,"Error during ac3audioblock"); intf_WarnMsg (3,"ac3dec warn: error during audioblock");
parse_auxdata (p_ac3dec); parse_auxdata (p_ac3dec);
return 1; return 1;
} }
...@@ -114,7 +107,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -114,7 +107,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
if (exponent_unpack (p_ac3dec)) if (exponent_unpack (p_ac3dec))
{ {
intf_WarnMsg (3,"Error during ac3unpack"); intf_WarnMsg (3,"ac3dec warn: error during unpack");
parse_auxdata (p_ac3dec); parse_auxdata (p_ac3dec);
return 1; return 1;
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder.h : ac3 decoder interface * ac3_decoder.h : ac3 decoder interface
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.h,v 1.7 2001/04/30 21:04:20 reno Exp $ * $Id: ac3_decoder.h,v 1.8 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org> * Renaud Dartus <reno@videolan.org>
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder_thread.h : ac3 decoder thread interface * ac3_decoder_thread.h : ac3 decoder thread interface
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder_thread.h,v 1.6 2001/05/01 04:18:18 sam Exp $ * $Id: ac3_decoder_thread.h,v 1.7 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* *
...@@ -30,14 +30,11 @@ typedef struct ac3dec_thread_s ...@@ -30,14 +30,11 @@ typedef struct ac3dec_thread_s
* Thread properties * Thread properties
*/ */
vlc_thread_t thread_id; /* id for thread functions */ vlc_thread_t thread_id; /* id for thread functions */
// bit_stream_t bit_stream;
/* /*
* Input properties * Input properties
*/ */
decoder_fifo_t * p_fifo; /* stores the PES stream data */ decoder_fifo_t * p_fifo; /* stores the PES stream data */
// data_packet_t * p_data;
int sync_ptr; /* sync ptr from ac3 magic header */ int sync_ptr; /* sync ptr from ac3 magic header */
adec_config_t * p_config; adec_config_t * p_config;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_downmix.c: ac3 downmix functions * ac3_downmix.c: ac3 downmix functions
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_downmix.c,v 1.22 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_downmix.c,v 1.23 2001/05/14 15:58:03 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -31,29 +31,41 @@ ...@@ -31,29 +31,41 @@
#include "threads.h" #include "threads.h"
#include "mtime.h" #include "mtime.h"
#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
#include "tests.h" #include "tests.h"
#include "stream_control.h" #include "stream_control.h"
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h"
#include "ac3_downmix.h" #include "ac3_downmix.h"
void downmix_init (downmix_t * p_downmix) void downmix_init (downmix_t * p_downmix)
{ {
#if 0 #if 0
if ( TestCPU (CPU_CAPABILITY_MMX) ) if ( TestCPU (CPU_CAPABILITY_SSE) )
{ {
fprintf(stderr,"Using MMX for downmix\n"); intf_WarnMsg (1,"ac3dec: using MMX_SSE for downmix");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_kni; p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_sse;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_kni; p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_sse;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_kni; p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_sse;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_kni; p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_sse;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_kni; p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_sse;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_kni; p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_sse;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_kni; p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_sse;
} else }
else if ( TestCPU (CPU_CAPABILITY_3DNOW) )
{
intf_WarnMsg (1,"ac3dec: using MMX_3DNOW for downmix");
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_3dn;
p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_3dn;
p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_3dn;
p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_3dn;
p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_3dn;
p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_3dn;
p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_3dn;
}
else
#endif #endif
{ {
p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c; p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_downmix.h: ac3 downmix functions * ac3_downmix.h: ac3 downmix functions
***************************************************************************** *****************************************************************************
* Copyright (C) 2000, 2001 VideoLAN * Copyright (C) 2000, 2001 VideoLAN
* $Id: ac3_downmix.h,v 1.6 2001/04/30 21:04:20 reno Exp $ * $Id: ac3_downmix.h,v 1.7 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* *
...@@ -30,13 +30,22 @@ void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par); ...@@ -30,13 +30,22 @@ void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right); void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center); void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center);
#if 0 /* SSE functions */
/* Kni functions */ void downmix_3f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_3f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par); void downmix_3f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par); void downmix_2f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par); void downmix_2f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par); void downmix_3f_0r_to_2ch_sse(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_kni(float *samples, dm_par_t * dm_par); void stream_sample_2ch_to_s16_sse(s16 *s16_samples, float *left, float *right);
void stream_sample_2ch_to_s16_kni(s16 *s16_samples, float *left, float *right); void stream_sample_1ch_to_s16_sse(s16 *s16_samples, float *center);
void stream_sample_1ch_to_s16_kni(s16 *s16_samples, float *center);
#endif /* 3DNow! functions */
void downmix_3f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_3f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_2f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_2f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void downmix_3f_0r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
void stream_sample_2ch_to_s16_3dn(s16 *s16_samples, float *left, float *right);
void stream_sample_1ch_to_s16_3dn(s16 *s16_samples, float *center);
/*****************************************************************************
* ac3_downmix_3dn.c: ac3 downmix functions
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_3dn.c,v 1.1 2001/05/14 15:58:04 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include "defs.h"
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "tests.h"
#include "stream_control.h"
#include "input_ext-dec.h"
#include "ac3_decoder.h"
void downmix_3f_2r_to_2ch_3dn (float * samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"movq 3072(%%eax), %%mm3\n" /* leftsur */
"movq 4096(%%eax), %%mm4\n" /* rightsur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfadd %%mm2, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfmul %%mm7, %%mm4\n"
"pfadd %%mm3, %%mm0\n"
"pfadd %%mm4, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_2r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop3:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */
"movq 2048(%%eax), %%mm3\n" /* leftsur */
"movq 3072(%%eax), %%mm4\n" /* rightsur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfmul %%mm7, %%mm4\n"
"pfadd %%mm3, %%mm0\n"
"pfadd %%mm4, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop3\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_3f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop4:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"movq 3072(%%eax), %%mm3\n" /* sur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfmul %%mm7, %%mm3\n"
"pfadd %%mm2, %%mm1\n"
"pfsub %%mm3, %%mm0\n"
"pfadd %%mm3, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop4\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 8(%%ebx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */
".loop5:\n"
"movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */
"movq 2048(%%eax), %%mm3\n" /* sur */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm7, %%mm3\n"
"pfsub %%mm3, %%mm0\n"
"pfadd %%mm3, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop5\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_3f_0r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n" /* loop counter */
"movd (%%ebx), %%mm5\n" /* unit */
"punpckldq %%mm5, %%mm5\n" /* unit | unit */
"movd 4(%%ebx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */
".loop6:\n"
"movq (%%eax), %%mm0\n" /*left */
"movq 2048(%%eax), %%mm1\n" /* right */
"movq 1024(%%eax), %%mm2\n" /* center */
"pfmul %%mm5, %%mm0\n"
"pfmul %%mm5, %%mm1\n"
"pfmul %%mm6, %%mm2\n"
"pfadd %%mm2, %%mm0\n"
"pfadd %%mm2, %%mm1\n"
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 1024(%%eax)\n"
"addl $8, %%eax\n"
"decl %%ecx\n"
"jnz .loop6\n"
"popl %%ecx\n"
"femms\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void stream_sample_1ch_to_s16_3dn (s16 *s16_samples, float *left)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"pushl %%edx\n"
"movl $sqrt2, %%edx\n"
"movd (%%edx), %%mm7\n"
"punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */
"movl $128, %%ecx\n"
".loop2:\n"
"movq (%%ebx), %%mm0\n" /* c1 | c0 */
"pfmul %%mm7, %%mm0\n"
"pf2id %%mm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
"packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
"movq %%mm0, (%%eax)\n"
"addl $8, %%eax\n"
"addl $8, %%ebx\n"
"decl %%ecx\n"
"jnz .loop2\n"
"popl %%edx\n"
"popl %%ecx\n"
"femms\n"
: "=a" (s16_samples), "=b" (left)
: "a" (s16_samples), "b" (left));
}
void stream_sample_2ch_to_s16_3dn (s16 *s16_samples, float *left, float *right)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $128, %%ecx\n"
".loop1:\n"
"movq (%%ebx), %%mm0\n" /* l1 | l0 */
"movq (%%edx), %%mm1\n" /* r1 | r0 */
"movq %%mm0, %%mm2\n" /* l1 | l0 */
"punpckldq %%mm1, %%mm0\n" /* r0 | l0 */
"punpckhdq %%mm1, %%mm2\n" /* r1 | l1 */
"pf2id %%mm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
"pf2id %%mm2, %%mm2\n" /* r0 l0 --> mm0, int_32 */
"packssdw %%mm2, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
"movq %%mm0, (%%eax)\n"
"movq %%mm2, 8(%%eax)\n"
"addl $8, %%eax\n"
"addl $8, %%ebx\n"
"addl $8, %%edx\n"
"decl %%ecx\n"
"jnz .loop1\n"
"popl %%ecx\n"
"femms\n"
: "=a" (s16_samples), "=b" (left), "=d" (right)
: "a" (s16_samples), "b" (left), "d" (right));
}
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_downmix_c.c: ac3 downmix functions * ac3_downmix_c.c: ac3 downmix functions
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_c.c,v 1.7 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_downmix_c.c,v 1.8 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -35,11 +35,8 @@ ...@@ -35,11 +35,8 @@
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h"
#include "ac3_downmix.h" void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
{ {
int i; int i;
float *left, *right, *center, *left_sur, *right_sur; float *left, *right, *center, *left_sur, *right_sur;
...@@ -59,7 +56,7 @@ void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) ...@@ -59,7 +56,7 @@ void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
} }
} }
void __inline__ downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
{ {
int i; int i;
float *left, *right, *left_sur, *right_sur; float *left, *right, *left_sur, *right_sur;
...@@ -78,7 +75,7 @@ void __inline__ downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) ...@@ -78,7 +75,7 @@ void __inline__ downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
} }
} }
void __inline__ downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
{ {
int i; int i;
float *left, *right, *center, *right_sur; float *left, *right, *center, *right_sur;
...@@ -98,7 +95,7 @@ void __inline__ downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) ...@@ -98,7 +95,7 @@ void __inline__ downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
} }
void __inline__ downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
{ {
int i; int i;
float *left, *right, *right_sur; float *left, *right, *right_sur;
...@@ -117,7 +114,7 @@ void __inline__ downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) ...@@ -117,7 +114,7 @@ void __inline__ downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
} }
void __inline__ downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par) void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
{ {
int i; int i;
float *left, *right, *center; float *left, *right, *center;
...@@ -136,7 +133,7 @@ void __inline__ downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par) ...@@ -136,7 +133,7 @@ void __inline__ downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
} }
void __inline__ stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right) void stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right)
{ {
int i; int i;
for (i=0; i < 256; i++) { for (i=0; i < 256; i++) {
...@@ -146,7 +143,7 @@ void __inline__ stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *ri ...@@ -146,7 +143,7 @@ void __inline__ stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *ri
} }
void __inline__ stream_sample_1ch_to_s16_c (s16 *out_buf, float *center) void stream_sample_1ch_to_s16_c (s16 *out_buf, float *center)
{ {
int i; int i;
float tmp; float tmp;
......
/*****************************************************************************
* ac3_downmix_sse.c: ac3 downmix functions
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
#include "defs.h"
#include "config.h"
#include "common.h"
#include "threads.h"
#include "mtime.h"
#include "tests.h"
#include "stream_control.h"
#include "input_ext-dec.h"
#include "ac3_decoder.h"
void sqrt2 (void)
{
__asm__ (".float 0f0.7071068");
}
void downmix_3f_2r_to_2ch_sse (float * samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $64, %%ecx\n" /* loop counter */
"movss (%%ebx), %%xmm5\n" /* unit */
"shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
"movss 4(%%ebx), %%xmm6\n" /* clev */
"shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
"movss 8(%%ebx), %%xmm7\n" /* slev */
"shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
".loop:\n"
"movups (%%eax), %%xmm0\n" /* left */
"movups 2048(%%eax), %%xmm1\n" /* right */
"movups 1024(%%eax), %%xmm2\n" /* center */
"movups 3072(%%eax), %%xmm3\n" /* leftsur */
"movups 4096(%%eax), %%xmm4\n" /* rithgsur */
"mulps %%xmm5, %%xmm0\n"
"mulps %%xmm5, %%xmm1\n"
"mulps %%xmm6, %%xmm2\n"
"addps %%xmm2, %%xmm0\n"
"addps %%xmm2, %%xmm1\n"
"mulps %%xmm7, %%xmm3\n"
"mulps %%xmm7, %%xmm4\n"
"addps %%xmm3, %%xmm0\n"
"addps %%xmm4, %%xmm1\n"
"movups %%xmm0, (%%eax)\n"
"movups %%xmm1, 1024(%%eax)\n"
"addl $16, %%eax\n"
"decl %%ecx\n"
"jnz .loop\n"
"popl %%ecx\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_2r_to_2ch_sse (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $64, %%ecx\n" /* loop counter */
"movss (%%ebx), %%xmm5\n" /* unit */
"shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
"movss 8(%%ebx), %%xmm7\n" /* slev */
"shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
".loop3:\n"
"movups (%%eax), %%xmm0\n" /* left */
"movups 1024(%%eax), %%xmm1\n" /* right */
"movups 2048(%%eax), %%xmm3\n" /* leftsur */
"movups 3072(%%eax), %%xmm4\n" /* rightsur */
"mulps %%xmm5, %%xmm0\n"
"mulps %%xmm5, %%xmm1\n"
"mulps %%xmm7, %%xmm3\n"
"mulps %%xmm7, %%xmm4\n"
"addps %%xmm3, %%xmm0\n"
"addps %%xmm4, %%xmm1\n"
"movups %%xmm0, (%%eax)\n"
"movups %%xmm1, 1024(%%eax)\n"
"addl $16, %%eax\n"
"decl %%ecx\n"
"jnz .loop3\n"
"popl %%ecx\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_3f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $64, %%ecx\n" /* loop counter */
"movss (%%ebx), %%xmm5\n" /* unit */
"shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
"movss 4(%%ebx), %%xmm6\n" /* clev */
"shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
"movss 8(%%ebx), %%xmm7\n" /* slev */
"shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
".loop4:\n"
"movups (%%eax), %%xmm0\n" /* left */
"movups 2048(%%eax), %%xmm1\n" /* right */
"movups 1024(%%eax), %%xmm2\n" /* center */
"movups 3072(%%eax), %%xmm3\n" /* sur */
"mulps %%xmm5, %%xmm0\n"
"mulps %%xmm5, %%xmm1\n"
"mulps %%xmm6, %%xmm2\n"
"addps %%xmm2, %%xmm0\n"
"mulps %%xmm7, %%xmm3\n"
"addps %%xmm2, %%xmm1\n"
"subps %%xmm3, %%xmm0\n"
"addps %%xmm3, %%xmm1\n"
"movups %%xmm0, (%%eax)\n"
"movups %%xmm1, 1024(%%eax)\n"
"addl $16, %%eax\n"
"decl %%ecx\n"
"jnz .loop4\n"
"popl %%ecx\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_2f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $64, %%ecx\n" /* loop counter */
"movss (%%ebx), %%xmm5\n" /* unit */
"shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
"movss 8(%%ebx), %%xmm7\n" /* slev */
"shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
".loop5:\n"
"movups (%%eax), %%xmm0\n" /* left */
"movups 1024(%%eax), %%xmm1\n" /* right */
"movups 2048(%%eax), %%xmm3\n" /* sur */
"mulps %%xmm5, %%xmm0\n"
"mulps %%xmm5, %%xmm1\n"
"mulps %%xmm7, %%xmm3\n"
"subps %%xmm3, %%xmm0\n"
"addps %%xmm3, %%xmm1\n"
"movups %%xmm0, (%%eax)\n"
"movups %%xmm1, 1024(%%eax)\n"
"addl $16, %%eax\n"
"decl %%ecx\n"
"jnz .loop5\n"
"popl %%ecx\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void downmix_3f_0r_to_2ch_sse (float *samples, dm_par_t * dm_par)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $64, %%ecx\n" /* loop counter */
"movss (%%ebx), %%xmm5\n" /* unit */
"shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
"movss 4(%%ebx), %%xmm6\n" /* clev */
"shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
".loop6:\n"
"movups (%%eax), %%xmm0\n" /*left */
"movups 2048(%%eax), %%xmm1\n" /* right */
"movups 1024(%%eax), %%xmm2\n" /* center */
"mulps %%xmm5, %%xmm0\n"
"mulps %%xmm5, %%xmm1\n"
"mulps %%xmm6, %%xmm2\n"
"addps %%xmm2, %%xmm0\n"
"addps %%xmm2, %%xmm1\n"
"movups %%xmm0, (%%eax)\n"
"movups %%xmm1, 1024(%%eax)\n"
"addl $16, %%eax\n"
"decl %%ecx\n"
"jnz .loop6\n"
"popl %%ecx\n"
: "=a" (samples)
: "a" (samples), "b" (dm_par));
}
void stream_sample_1ch_to_s16_sse (s16 *s16_samples, float *left)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"pushl %%edx\n"
"movl $sqrt2, %%edx\n"
"movss (%%edx), %%xmm7\n"
"shufps $0, %%xmm7, %%xmm7\n" /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */
"movl $64, %%ecx\n"
".loop2:\n"
"movups (%%ebx), %%xmm0\n" /* c3 | c2 | c1 | c0 */
"mulps %%xmm7, %%xmm0\n"
"movhlps %%xmm0, %%xmm2\n" /* c3 | c2 */
"cvtps2pi %%xmm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
"cvtps2pi %%xmm2, %%mm1\n" /* c3 c2 --> mm1, int_32 */
"packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
"packssdw %%mm1, %%mm1\n" /* c3 c3 c2 c2 --> mm1, int_16 */
"movq %%mm0, (%%eax)\n"
"movq %%mm1, 8(%%eax)\n"
"addl $16, %%eax\n"
"addl $16, %%ebx\n"
"decl %%ecx\n"
"jnz .loop2\n"
"popl %%edx\n"
"popl %%ecx\n"
"emms\n"
: "=a" (s16_samples), "=b" (left)
: "a" (s16_samples), "b" (left));
}
void stream_sample_2ch_to_s16_sse (s16 *s16_samples, float *left, float *right)
{
__asm__ __volatile__ (
"pushl %%ecx\n"
"movl $64, %%ecx\n"
".loop1:\n"
"movups (%%ebx), %%xmm0\n" /* l3 | l2 | l1 | l0 */
"movups (%%edx), %%xmm1\n" /* r3 | r2 | r1 | r0 */
"movhlps %%xmm0, %%xmm2\n" /* l3 | l2 */
"movhlps %%xmm1, %%xmm3\n" /* r3 | r2 */
"unpcklps %%xmm1, %%xmm0\n" /* r1 | l1 | r0 | l0 */
"unpcklps %%xmm3, %%xmm2\n" /* r3 | l3 | r2 | l2 */
"cvtps2pi %%xmm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
"movhlps %%xmm0, %%xmm0\n"
"cvtps2pi %%xmm0, %%mm1\n" /* r1 l1 --> mm1, int_32 */
"cvtps2pi %%xmm2, %%mm2\n" /* r2 l2 --> mm2, int_32 */
"movhlps %%xmm2, %%xmm2\n"
"cvtps2pi %%xmm2, %%mm3\n" /* r3 l3 --> mm3, int_32 */
"packssdw %%mm1, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
"packssdw %%mm3, %%mm2\n" /* r3 l3 r2 l2 --> mm2, int_16 */
"movq %%mm0, (%%eax)\n"
"movq %%mm2, 8(%%eax)\n"
"addl $16, %%eax\n"
"addl $16, %%ebx\n"
"addl $16, %%edx\n"
"decl %%ecx\n"
"jnz .loop1\n"
"popl %%ecx\n"
"emms\n"
: "=a" (s16_samples), "=b" (left), "=d" (right)
: "a" (s16_samples), "b" (left), "d" (right));
}
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_exponent.c: ac3 exponent calculations * ac3_exponent.c: ac3 exponent calculations
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_exponent.c,v 1.23 2001/04/20 12:14:34 reno Exp $ * $Id: ac3_exponent.c,v 1.24 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Michel Lespinasse <walken@zoy.org> * Michel Lespinasse <walken@zoy.org>
...@@ -31,15 +31,14 @@ ...@@ -31,15 +31,14 @@
#include "threads.h" #include "threads.h"
#include "mtime.h" #include "mtime.h"
#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
#include "stream_control.h" #include "stream_control.h"
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "audio_output.h" #include "audio_output.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_decoder_thread.h"
#include "intf_msg.h"
#include "ac3_internal.h" #include "ac3_internal.h"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_imdct.c: ac3 DCT * ac3_imdct.c: ac3 DCT
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct.c,v 1.18 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_imdct.c,v 1.19 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -39,15 +39,11 @@ ...@@ -39,15 +39,11 @@
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h"
#include "ac3_downmix.h" #include "ac3_imdct_c.h" /* imdct_init_c */
#include "ac3_imdct_c.h" #include "ac3_imdct_sse.h" /* imdct_init_sse */
#if 0
#include "ac3_imdct_kni.h"
#endif
#include "tests.h" #include "tests.h" /* TestCPU */
#ifndef M_PI #ifndef M_PI
# define M_PI 3.14159265358979323846 # define M_PI 3.14159265358979323846
...@@ -57,13 +53,13 @@ ...@@ -57,13 +53,13 @@
void imdct_init(imdct_t * p_imdct) void imdct_init(imdct_t * p_imdct)
{ {
int i; int i;
float scale = 255.99609372; float scale = 181.019;
#if 0 #if 0
if ( TestCPU (CPU_CAPABILITY_MMX) ) if ( TestCPU (CPU_CAPABILITY_SSE) )
{ {
imdct_init_kni (p_imdct); imdct_init_sse (p_imdct);
} else }
else
#endif #endif
{ {
imdct_init_c (p_imdct); imdct_init_c (p_imdct);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_imdct_c.c: ac3 DCT * ac3_imdct_c.c: ac3 DCT
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct_c.c,v 1.2 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_imdct_c.c,v 1.3 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h" #include "ac3_imdct_c.h"
#ifndef M_PI #ifndef M_PI
# define M_PI 3.14159265358979323846 # define M_PI 3.14159265358979323846
...@@ -46,9 +46,6 @@ ...@@ -46,9 +46,6 @@
void fft_64p_c (complex_t *x); void fft_64p_c (complex_t *x);
void fft_128p_c (complex_t *x); void fft_128p_c (complex_t *x);
void imdct_do_512_c (imdct_t * p_imdct, float data[], float delay[]);
void imdct_do_512_nol_c (imdct_t * p_imdct, float data[], float delay[]);
static float window[] = { static float window[] = {
0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
...@@ -112,7 +109,7 @@ static const int pm64[64] = ...@@ -112,7 +109,7 @@ static const int pm64[64] =
int imdct_init_c (imdct_t * p_imdct) int imdct_init_c (imdct_t * p_imdct)
{ {
int i; int i;
float scale = 255.99609372; float scale = 181.019;
p_imdct->imdct_do_512 = imdct_do_512_c; p_imdct->imdct_do_512 = imdct_do_512_c;
p_imdct->imdct_do_512_nol = imdct_do_512_nol_c; p_imdct->imdct_do_512_nol = imdct_do_512_nol_c;
......
This diff is collapsed.
int imdct_init_sse (imdct_t * p_imdct);
void imdct_do_512_sse(imdct_t * p_imdct, float data[], float delay[]);
void imdct_do_512_nol_sse(imdct_t * p_imdct, float data[], float delay[]);
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_internals.h: needed by the ac3 decoder * ac3_internals.h: needed by the ac3 decoder
***************************************************************************** *****************************************************************************
* Copyright (C) 2000 VideoLAN * Copyright (C) 2000 VideoLAN
* $Id: ac3_internal.h,v 1.8 2001/03/21 13:42:34 sam Exp $ * $Id: ac3_internal.h,v 1.9 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Michel Lespinasse <walken@zoy.org> * Authors: Michel Lespinasse <walken@zoy.org>
* *
...@@ -37,12 +37,13 @@ ...@@ -37,12 +37,13 @@
void bit_allocate (ac3dec_t *); void bit_allocate (ac3dec_t *);
/* ac3_downmix.c */ /* ac3_downmix.c */
int downmix (ac3dec_t *, float *, s16 *); void downmix_init (downmix_t * p_downmix);
/* ac3_exponent.c */ /* ac3_exponent.c */
int exponent_unpack (ac3dec_t *); int exponent_unpack (ac3dec_t *);
/* ac3_imdct.c */ /* ac3_imdct.c */
void imdct_init (imdct_t * p_imdct);
void imdct (ac3dec_t * p_ac3dec, s16 * buffer); void imdct (ac3dec_t * p_ac3dec, s16 * buffer);
/* ac3_mantissa.c */ /* ac3_mantissa.c */
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_mantissa.c: ac3 mantissa computation * ac3_mantissa.c: ac3 mantissa computation
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_mantissa.c,v 1.27 2001/05/07 03:14:09 stef Exp $ * $Id: ac3_mantissa.c,v 1.28 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -38,9 +38,6 @@ ...@@ -38,9 +38,6 @@
#include "audio_output.h" #include "audio_output.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_decoder_thread.h"
#include "ac3_internal.h"
#include "intf_msg.h" #include "intf_msg.h"
...@@ -291,7 +288,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf ...@@ -291,7 +288,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
p_ac3dec->total_bits_read += 5; p_ac3dec->total_bits_read += 5;
if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26) if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26)
{ {
intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (1)" ); intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (1)" );
return 0; return 0;
} }
...@@ -312,7 +309,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf ...@@ -312,7 +309,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
p_ac3dec->total_bits_read += 7; p_ac3dec->total_bits_read += 7;
if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124) if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124)
{ {
intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (2)" ); intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (2)" );
return 0; return 0;
} }
...@@ -327,7 +324,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf ...@@ -327,7 +324,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
p_ac3dec->total_bits_read += 3; p_ac3dec->total_bits_read += 3;
if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6) if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6)
{ {
intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (3)" ); intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (3)" );
return 0; return 0;
} }
...@@ -343,7 +340,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf ...@@ -343,7 +340,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
p_ac3dec->total_bits_read += 7; p_ac3dec->total_bits_read += 7;
if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120) if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120)
{ {
intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (4)" ); intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (4)" );
return 0; return 0;
} }
...@@ -357,7 +354,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf ...@@ -357,7 +354,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
p_ac3dec->total_bits_read += 4; p_ac3dec->total_bits_read += 4;
if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14) if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14)
{ {
intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (5)" ); intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (5)" );
return 0; return 0;
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_parse.c: ac3 parsing procedures * ac3_parse.c: ac3 parsing procedures
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_parse.c,v 1.21 2001/05/07 04:42:42 sam Exp $ * $Id: ac3_parse.c,v 1.22 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -40,9 +40,9 @@ ...@@ -40,9 +40,9 @@
#include "intf_msg.h" #include "intf_msg.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_decoder_thread.h" #include "ac3_decoder_thread.h" /* ac3dec_thread_t */
#include "ac3_internal.h" #include "ac3_internal.h" /* EXP_REUSE */
/* Misc LUT */ /* Misc LUT */
static const u16 nfchans[] = { 2, 1, 2, 3, 3, 4, 4, 5 }; static const u16 nfchans[] = { 2, 1, 2, 3, 3, 4, 4, 5 };
...@@ -97,8 +97,10 @@ static const struct frmsize_s frmsizecod_tbl[] = ...@@ -97,8 +97,10 @@ static const struct frmsize_s frmsizecod_tbl[] =
static const int fscod_tbl[] = {48000, 44100, 32000}; static const int fscod_tbl[] = {48000, 44100, 32000};
/* Some internal functions */ /* Some internal functions */
void parse_bsi_stats (ac3dec_t * p_ac3dec); #ifdef STATS
void parse_audblk_stats (ac3dec_t * p_ac3dec); static void parse_bsi_stats (ac3dec_t * p_ac3dec);
static void parse_audblk_stats (ac3dec_t * p_ac3dec);
#endif
/* Parse a syncinfo structure */ /* Parse a syncinfo structure */
int ac3_sync_frame (ac3dec_t * p_ac3dec, ac3_sync_info_t * p_sync_info) int ac3_sync_frame (ac3dec_t * p_ac3dec, ac3_sync_info_t * p_sync_info)
...@@ -778,7 +780,7 @@ int parse_audblk (ac3dec_t * p_ac3dec, int blknum) ...@@ -778,7 +780,7 @@ int parse_audblk (ac3dec_t * p_ac3dec, int blknum)
} }
#ifdef STATS #ifdef STATS
// parse_audblk_stats(p_ac3dec); parse_audblk_stats(p_ac3dec);
#endif #endif
return 0; return 0;
...@@ -806,7 +808,8 @@ void parse_auxdata (ac3dec_t * p_ac3dec) ...@@ -806,7 +808,8 @@ void parse_auxdata (ac3dec_t * p_ac3dec)
RemoveBits (&p_ac3dec->bit_stream,16); RemoveBits (&p_ac3dec->bit_stream,16);
} }
void parse_bsi_stats (ac3dec_t * p_ac3dec) /*Some stats */ #ifdef STATS
static void parse_bsi_stats (ac3dec_t * p_ac3dec) /* Some stats */
{ {
struct mixlev_s struct mixlev_s
{ {
...@@ -850,7 +853,7 @@ void parse_bsi_stats (ac3dec_t * p_ac3dec) /*Some stats */ ...@@ -850,7 +853,7 @@ void parse_bsi_stats (ac3dec_t * p_ac3dec) /*Some stats */
i = 0; i = 0;
} }
void parse_audblk_stats (ac3dec_t * p_ac3dec) static void parse_audblk_stats (ac3dec_t * p_ac3dec)
{ {
char *exp_strat_tbl[4] = {"R ","D15 ","D25 ","D45 "}; char *exp_strat_tbl[4] = {"R ","D15 ","D25 ","D45 "};
u32 i; u32 i;
...@@ -871,3 +874,4 @@ void parse_audblk_stats (ac3dec_t * p_ac3dec) ...@@ -871,3 +874,4 @@ void parse_audblk_stats (ac3dec_t * p_ac3dec)
intf_ErrMsg ("\n"); intf_ErrMsg ("\n");
} }
#endif
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_rematrix.c: ac3 audio rematrixing * ac3_rematrix.c: ac3 audio rematrixing
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_rematrix.c,v 1.16 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_rematrix.c,v 1.17 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include "input_ext-dec.h" #include "input_ext-dec.h"
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_internal.h"
struct rematrix_band_s { struct rematrix_band_s {
u32 start; u32 start;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_srfft.c: ac3 FFT * ac3_srfft.c: ac3 FFT
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_srfft.c,v 1.3 2001/05/06 04:32:02 sam Exp $ * $Id: ac3_srfft.c,v 1.4 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -40,9 +40,9 @@ ...@@ -40,9 +40,9 @@
#include "ac3_decoder.h" #include "ac3_decoder.h"
#include "ac3_srfft.h" #include "ac3_srfft.h"
void fft_8 (complex_t *x); static void fft_8 (complex_t *x);
void fft_4(complex_t *x) static void fft_4(complex_t *x)
{ {
/* delta_p = 1 here */ /* delta_p = 1 here */
/* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4}
...@@ -90,7 +90,7 @@ void fft_4(complex_t *x) ...@@ -90,7 +90,7 @@ void fft_4(complex_t *x)
} }
void fft_8 (complex_t *x) static void fft_8 (complex_t *x)
{ {
/* delta_p = diag{1, sqrt(i)} here */ /* delta_p = diag{1, sqrt(i)} here */
/* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8}
...@@ -205,7 +205,7 @@ void fft_8 (complex_t *x) ...@@ -205,7 +205,7 @@ void fft_8 (complex_t *x)
} }
void fft_asmb(int k, complex_t *x, complex_t *wTB, static void fft_asmb(int k, complex_t *x, complex_t *wTB,
const complex_t *d, const complex_t *d_3) const complex_t *d, const complex_t *d_3)
{ {
register complex_t *x2k, *x3k, *x4k, *wB; register complex_t *x2k, *x3k, *x4k, *wB;
...@@ -236,7 +236,7 @@ void fft_asmb(int k, complex_t *x, complex_t *wTB, ...@@ -236,7 +236,7 @@ void fft_asmb(int k, complex_t *x, complex_t *wTB,
} }
void fft_asmb16(complex_t *x, complex_t *wTB) static void fft_asmb16(complex_t *x, complex_t *wTB)
{ {
register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i; register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
int k = 2; int k = 2;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_srfft.h: ac3 FFT * ac3_srfft.h: ac3 FFT
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_srfft.h,v 1.2 2001/04/30 21:10:25 reno Exp $ * $Id: ac3_srfft.h,v 1.3 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -22,19 +22,19 @@ ...@@ -22,19 +22,19 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/ *****************************************************************************/
static complex_t delta16[4] = static const complex_t delta16[4] =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.92387953251129, -0.38268343236509}, {0.92387953251129, -0.38268343236509},
{0.70710678118655, -0.70710678118655}, {0.70710678118655, -0.70710678118655},
{0.38268343236509, -0.92387953251129}}; {0.38268343236509, -0.92387953251129}};
static complex_t delta16_3[4] = static const complex_t delta16_3[4] =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.38268343236509, -0.92387953251129}, {0.38268343236509, -0.92387953251129},
{-0.70710678118655, -0.70710678118655}, {-0.70710678118655, -0.70710678118655},
{-0.92387953251129, 0.38268343236509}}; {-0.92387953251129, 0.38268343236509}};
static complex_t delta32[8] = static const complex_t delta32[8] =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.98078528040323, -0.19509032201613}, {0.98078528040323, -0.19509032201613},
{0.92387953251129, -0.38268343236509}, {0.92387953251129, -0.38268343236509},
...@@ -44,7 +44,7 @@ static complex_t delta32[8] = ...@@ -44,7 +44,7 @@ static complex_t delta32[8] =
{0.38268343236509, -0.92387953251129}, {0.38268343236509, -0.92387953251129},
{0.19509032201613, -0.98078528040323}}; {0.19509032201613, -0.98078528040323}};
static complex_t delta32_3[8] = static const complex_t delta32_3[8] =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.83146961230255, -0.55557023301960}, {0.83146961230255, -0.55557023301960},
{0.38268343236509, -0.92387953251129}, {0.38268343236509, -0.92387953251129},
...@@ -54,7 +54,7 @@ static complex_t delta32_3[8] = ...@@ -54,7 +54,7 @@ static complex_t delta32_3[8] =
{-0.92387953251129, 0.38268343236509}, {-0.92387953251129, 0.38268343236509},
{-0.55557023301960, 0.83146961230255}}; {-0.55557023301960, 0.83146961230255}};
static complex_t delta64[16] = static const complex_t delta64[16] =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.99518472667220, -0.09801714032956}, {0.99518472667220, -0.09801714032956},
{0.98078528040323, -0.19509032201613}, {0.98078528040323, -0.19509032201613},
...@@ -72,7 +72,7 @@ static complex_t delta64[16] = ...@@ -72,7 +72,7 @@ static complex_t delta64[16] =
{0.19509032201613, -0.98078528040323}, {0.19509032201613, -0.98078528040323},
{0.09801714032956, -0.99518472667220}}; {0.09801714032956, -0.99518472667220}};
static complex_t delta64_3[16] = static const complex_t delta64_3[16] =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.95694033573221, -0.29028467725446}, {0.95694033573221, -0.29028467725446},
{0.83146961230255, -0.55557023301960}, {0.83146961230255, -0.55557023301960},
...@@ -90,7 +90,7 @@ static complex_t delta64_3[16] = ...@@ -90,7 +90,7 @@ static complex_t delta64_3[16] =
{-0.55557023301960, 0.83146961230255}, {-0.55557023301960, 0.83146961230255},
{-0.29028467725446, 0.95694033573221}}; {-0.29028467725446, 0.95694033573221}};
static complex_t delta128[32] = static const complex_t delta128[32] =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.99879545620517, -0.04906767432742}, {0.99879545620517, -0.04906767432742},
{0.99518472667220, -0.09801714032956}, {0.99518472667220, -0.09801714032956},
...@@ -124,7 +124,7 @@ static complex_t delta128[32] = ...@@ -124,7 +124,7 @@ static complex_t delta128[32] =
{0.09801714032956, -0.99518472667220}, {0.09801714032956, -0.99518472667220},
{0.04906767432742, -0.99879545620517}}; {0.04906767432742, -0.99879545620517}};
static complex_t delta128_3[32] = static const complex_t delta128_3[32] =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.98917650996478, -0.14673047445536}, {0.98917650996478, -0.14673047445536},
{0.95694033573221, -0.29028467725446}, {0.95694033573221, -0.29028467725446},
......
This diff is collapsed.
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* and spawn threads. * and spawn threads.
***************************************************************************** *****************************************************************************
* Copyright (C) 1998, 1999, 2000 VideoLAN * Copyright (C) 1998, 1999, 2000 VideoLAN
* $Id: main.c,v 1.93 2001/05/07 03:14:09 stef Exp $ * $Id: main.c,v 1.94 2001/05/14 15:58:04 reno Exp $
* *
* Authors: Vincent Seguin <seguin@via.ecp.fr> * Authors: Vincent Seguin <seguin@via.ecp.fr>
* Samuel Hocevar <sam@zoy.org> * Samuel Hocevar <sam@zoy.org>
...@@ -974,6 +974,7 @@ static int CPUCapabilities( void ) ...@@ -974,6 +974,7 @@ static int CPUCapabilities( void )
if( i_edx & 0x02000000 ) if( i_edx & 0x02000000 )
{ {
i_capabilities |= CPU_CAPABILITY_MMXEXT; i_capabilities |= CPU_CAPABILITY_MMXEXT;
i_capabilities |= CPU_CAPABILITY_SSE;
} }
/* test for additional capabilities */ /* test for additional capabilities */
...@@ -996,7 +997,6 @@ static int CPUCapabilities( void ) ...@@ -996,7 +997,6 @@ static int CPUCapabilities( void )
{ {
i_capabilities |= CPU_CAPABILITY_MMXEXT; i_capabilities |= CPU_CAPABILITY_MMXEXT;
} }
#else #else
/* default behaviour */ /* default behaviour */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment