* Begin of SSE/3DNow! support for imdct and downmix

If you have a PIII or a Athlon and you want to try this, just comment #if 0 in ac3_downmix.c and ac3_imdct.c and add in AC3_DECODER section of Makefile : src/ac3_decoder/ac3_imdct_sse.o \ src/ac3_decoder/ac3_srfft_sse.o \ src/ac3_decoder/ac3_downmix_sse.o \ src/ac3_decoder/ac3_downmix_3dn.o \

* Begin of SSE/3DNow! support for imdct and downmix
If you have a PIII or a Athlon and you want to try this, just comment #if 0 in ac3_downmix.c and ac3_imdct.c and add in AC3_DECODER section of Makefile : src/ac3_decoder/ac3_imdct_sse.o \ src/ac3_decoder/ac3_srfft_sse.o \ src/ac3_decoder/ac3_downmix_sse.o \ src/ac3_decoder/ac3_downmix_3dn.o \
db7b0421 · Renaud Dartus · c1df8159 · db7b0421 · db7b0421 · db7b0421
Commit db7b0421 authored May 14, 2001 by Renaud Dartus
24 changed files
--- a/Makefile
+++ b/Makefile
--- a/include/tests.h
+++ b/include/tests.h
@@ -2,7 +2,7 @@
 * tests.h: several test functions needed by the plugins
 *****************************************************************************
 * Copyright (C) 1996, 1997, 1998, 1999, 2000 VideoLAN
- * $Id: tests.h,v 1.9 2001/03/21 13:42:33 sam Exp $
+ * $Id: tests.h,v 1.10 2001/05/14 15:58:03 reno Exp $
 *
 * Authors: Samuel Hocevar <sam@zoy.org>
 *
@@ -28,6 +28,7 @@
 #define CPU_CAPABILITY_MMX     1<<3
 #define CPU_CAPABILITY_3DNOW   1<<4
 #define CPU_CAPABILITY_MMXEXT  1<<5
+#define CPU_CAPABILITY_SSE     1<<6
 #define CPU_CAPABILITY_ALTIVEC 1<<16
 /*****************************************************************************

--- a/src/ac3_decoder/ac3_bit_allocate.c
+++ b/src/ac3_decoder/ac3_bit_allocate.c
@@ -2,7 +2,7 @@
 * ac3_bit_allocate.c: ac3 allocation tables
 *****************************************************************************
 * Copyright (C) 2000 VideoLAN
- * $Id: ac3_bit_allocate.c,v 1.20 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_bit_allocate.c,v 1.21 2001/05/14 15:58:03 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -37,7 +37,7 @@
 #include "input_ext-dec.h"
 #include "ac3_decoder.h"
-#include "ac3_internal.h"
+#include "ac3_internal.h"                                 /* DELTA_BIT_REUSE */
 static void ba_compute_psd (bit_allocate_t * p_bit, s16 start, s16 end, s16 exps[]);

--- a/src/ac3_decoder/ac3_decoder.c
+++ b/src/ac3_decoder/ac3_decoder.c
@@ -2,7 +2,7 @@
 * ac3_decoder.c: core ac3 decoder
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder.c,v 1.32 2001/05/07 03:14:09 stef Exp $
+ * $Id: ac3_decoder.c,v 1.33 2001/05/14 15:58:03 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Michel Lespinasse <walken@zoy.org>
@@ -40,21 +40,14 @@
 #include "audio_output.h"
 #include "ac3_decoder.h"
-#include "ac3_decoder_thread.h"
+#include "ac3_decoder_thread.h"                           /* ac3dec_thread_t */
 #include "ac3_internal.h"
-#include <stdio.h>
+static const float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
+static const float smixlev_lut[4] = { 0.707, 0.500, 0.0  , 0.500 };
-void imdct_init (imdct_t * p_imdct);
-void downmix_init (downmix_t * p_downmix);
-static float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
-static float smixlev_lut[4] = { 0.707, 0.500, 0.0  , 0.500 };
 int ac3_init (ac3dec_t * p_ac3dec)
 {
-//    p_ac3dec->bit_stream.buffer = 0;
-//    p_ac3dec->bit_stream.i_available = 0;
    p_ac3dec->mantissa.lfsr_state = 1;          /* dither_gen initialization */
    imdct_init(&p_ac3dec->imdct);
    downmix_init(&p_ac3dec->downmix);
@@ -69,7 +62,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
    if (parse_bsi (p_ac3dec))
    {
-        intf_WarnMsg (3,"Error during ac3parsing");
+        intf_WarnMsg (3,"ac3dec warn: error during parsing");
        parse_auxdata (p_ac3dec);
        return 1;
    }
@@ -102,7 +95,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
        if (parse_audblk (p_ac3dec, i))
        {
-            intf_WarnMsg (3,"Error during ac3audioblock");
+            intf_WarnMsg (3,"ac3dec warn: error during audioblock");
            parse_auxdata (p_ac3dec);
            return 1;
        }
@@ -114,7 +107,7 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
        if (exponent_unpack (p_ac3dec))
        {
-            intf_WarnMsg (3,"Error during ac3unpack");
+            intf_WarnMsg (3,"ac3dec warn: error during unpack");
            parse_auxdata (p_ac3dec);
            return 1;
        }

--- a/src/ac3_decoder/ac3_decoder.h
+++ b/src/ac3_decoder/ac3_decoder.h
@@ -2,7 +2,7 @@
 * ac3_decoder.h : ac3 decoder interface
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder.h,v 1.7 2001/04/30 21:04:20 reno Exp $
+ * $Id: ac3_decoder.h,v 1.8 2001/05/14 15:58:03 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Renaud Dartus <reno@videolan.org>

--- a/src/ac3_decoder/ac3_decoder_thread.h
+++ b/src/ac3_decoder/ac3_decoder_thread.h
@@ -2,7 +2,7 @@
 * ac3_decoder_thread.h : ac3 decoder thread interface
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder_thread.h,v 1.6 2001/05/01 04:18:18 sam Exp $
+ * $Id: ac3_decoder_thread.h,v 1.7 2001/05/14 15:58:03 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *
@@ -30,14 +30,11 @@ typedef struct ac3dec_thread_s
     * Thread properties
     */
    vlc_thread_t        thread_id;                /* id for thread functions */
-//    bit_stream_t        bit_stream;
    /*
     * Input properties
     */
    decoder_fifo_t *    p_fifo;                /* stores the PES stream data */
-//    data_packet_t *     p_data;
    int                 sync_ptr;          /* sync ptr from ac3 magic header */
    adec_config_t *     p_config;

--- a/src/ac3_decoder/ac3_downmix.c
+++ b/src/ac3_decoder/ac3_downmix.c
@@ -2,7 +2,7 @@
 * ac3_downmix.c: ac3 downmix functions
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_downmix.c,v 1.22 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_downmix.c,v 1.23 2001/05/14 15:58:03 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -31,29 +31,41 @@
 #include "threads.h"
 #include "mtime.h"
+#include "intf_msg.h"                        /* intf_DbgMsg(), intf_ErrMsg() */
 #include "tests.h"
 #include "stream_control.h"
 #include "input_ext-dec.h"
 #include "ac3_decoder.h"
-#include "ac3_internal.h"
 #include "ac3_downmix.h"
 void downmix_init (downmix_t * p_downmix)
 {
 #if 0
-    if ( TestCPU (CPU_CAPABILITY_MMX) )
+    if ( TestCPU (CPU_CAPABILITY_SSE) )
    {
-		fprintf(stderr,"Using MMX for downmix\n");
+		intf_WarnMsg (1,"ac3dec: using MMX_SSE for downmix");
-		p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_kni;
+		p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_sse;
-		p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_kni;
+		p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_sse;
-		p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_kni;
+		p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_sse;
-		p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_kni;
+		p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_sse;
-		p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_kni;
+		p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_sse;
-		p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_kni;
+		p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_sse;
-    	p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_kni;
+    	p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_sse;
-    } else 
+    } 
+    else if ( TestCPU (CPU_CAPABILITY_3DNOW) )
+    {
+		intf_WarnMsg (1,"ac3dec: using MMX_3DNOW for downmix");
+		p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_3dn;
+		p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_3dn;
+		p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_3dn;
+		p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_3dn;
+		p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_3dn;
+		p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_3dn;
+    	p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_3dn;
+    } 
+    else
 #endif
    {
 		p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;

--- a/src/ac3_decoder/ac3_downmix.h
+++ b/src/ac3_decoder/ac3_downmix.h
@@ -2,7 +2,7 @@
 * ac3_downmix.h: ac3 downmix functions
 *****************************************************************************
 * Copyright (C) 2000, 2001 VideoLAN
- * $Id: ac3_downmix.h,v 1.6 2001/04/30 21:04:20 reno Exp $
+ * $Id: ac3_downmix.h,v 1.7 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Renaud Dartus <reno@videolan.org>
 *
@@ -30,13 +30,22 @@ void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par);
 void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right);
 void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center); 
-#if 0
+/* SSE functions */
-/* Kni functions */
+void downmix_3f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_3f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void downmix_3f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_3f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void downmix_2f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_2f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void downmix_2f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_2f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void downmix_3f_0r_to_2ch_sse(float *samples, dm_par_t * dm_par);            
-void downmix_3f_0r_to_2ch_kni(float *samples, dm_par_t * dm_par);            
+void stream_sample_2ch_to_s16_sse(s16 *s16_samples, float *left, float *right);
-void stream_sample_2ch_to_s16_kni(s16 *s16_samples, float *left, float *right);
+void stream_sample_1ch_to_s16_sse(s16 *s16_samples, float *center);  
-void stream_sample_1ch_to_s16_kni(s16 *s16_samples, float *center);  
-#endif
+/* 3DNow! functions */
+void downmix_3f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
+void downmix_3f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
+void downmix_2f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
+void downmix_2f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
+void downmix_3f_0r_to_2ch_3dn(float *samples, dm_par_t * dm_par);            
+void stream_sample_2ch_to_s16_3dn(s16 *s16_samples, float *left, float *right);
+void stream_sample_1ch_to_s16_3dn(s16 *s16_samples, float *center);  
--- a/src/ac3_decoder/ac3_downmix_3dn.c
+++ b/src/ac3_decoder/ac3_downmix_3dn.c
+/*****************************************************************************
+ * ac3_downmix_3dn.c: ac3 downmix functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_downmix_3dn.c,v 1.1 2001/05/14 15:58:04 reno Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+#include "defs.h"
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+#include "stream_control.h"
+#include "input_ext-dec.h"
+#include "ac3_decoder.h"
+void downmix_3f_2r_to_2ch_3dn (float * samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+	"movl  $128,  %%ecx\n"	        /* loop counter */
+	"movd	(%%ebx), %%mm5\n"	    /* unit */
+	"punpckldq %%mm5, %%mm5\n"	    /* unit | unit */
+	"movd	4(%%ebx), %%mm6\n"		/* clev */
+	"punpckldq %%mm6, %%mm6\n"	    /* clev | clev */
+	"movd	8(%%ebx), %%mm7\n"		/* slev */
+	"punpckldq %%mm7, %%mm7\n"	    /* slev | slev */
+".loop:\n"
+	"movq	(%%eax),     %%mm0\n"   /* left */
+	"movq	2048(%%eax), %%mm1\n"   /* right */
+	"movq   1024(%%eax), %%mm2\n"	/* center */
+	"movq	3072(%%eax), %%mm3\n"	/* leftsur */
+	"movq	4096(%%eax), %%mm4\n"	/* rightsur */
+	"pfmul	%%mm5, %%mm0\n"
+	"pfmul	%%mm5, %%mm1\n"
+	"pfmul	%%mm6, %%mm2\n"
+	"pfadd	%%mm2, %%mm0\n"
+	"pfadd 	%%mm2, %%mm1\n"
+	"pfmul  %%mm7, %%mm3\n"
+	"pfmul	%%mm7, %%mm4\n"
+	"pfadd	%%mm3, %%mm0\n"
+	"pfadd	%%mm4, %%mm1\n"
+	"movq	%%mm0, (%%eax)\n"
+	"movq	%%mm1, 1024(%%eax)\n"
+	"addl	$8, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop\n"
+    "popl   %%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void downmix_2f_2r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+	"pushl %%ecx\n"
+	"movl  $128, %%ecx\n"       /* loop counter */
+	"movd  (%%ebx), %%mm5\n"	/* unit */
+	"punpckldq %%mm5, %%mm5\n"  /* unit | unit */
+	"movd	8(%%ebx), %%mm7\n"	/* slev */
+	"punpckldq %%mm7, %%mm7\n"	/* slev | slev */
+".loop3:\n"
+	"movq   (%%eax), %%mm0\n"       /* left */
+	"movq   1024(%%eax), %%mm1\n"   /* right */
+	"movq   2048(%%eax), %%mm3\n"	/* leftsur */
+	"movq   3072(%%eax), %%mm4\n"	/* rightsur */
+	"pfmul	%%mm5, %%mm0\n"
+	"pfmul	%%mm5, %%mm1\n"
+	"pfmul	%%mm7, %%mm3\n"
+	"pfmul	%%mm7, %%mm4\n"
+	"pfadd	%%mm3, %%mm0\n"
+	"pfadd	%%mm4, %%mm1\n"
+	"movq	%%mm0, (%%eax)\n"
+	"movq	%%mm1, 1024(%%eax)\n"
+	"addl	$8, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop3\n"
+	"popl	%%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void downmix_3f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+	"pushl	%%ecx\n"
+	"movl	$128, %%ecx\n"		    /* loop counter */
+	"movd	(%%ebx), %%mm5\n"	    /* unit */
+	"punpckldq %%mm5, %%mm5\n"	    /* unit | unit */
+	"movd	4(%%ebx), %%mm6\n"		/* clev */
+	"punpckldq %%mm6, %%mm6\n"	    /* clev | clev */
+    "movd	8(%%ebx), %%mm7\n"		/* slev */
+	"punpckldq %%mm7, %%mm7\n"  	/* slev | slev */
+".loop4:\n"
+	"movq	(%%eax), %%mm0\n"       /* left */
+	"movq	2048(%%eax), %%mm1\n"   /* right */
+	"movq	1024(%%eax), %%mm2\n"	/* center */
+    "movq	3072(%%eax), %%mm3\n"	/* sur */
+	"pfmul	%%mm5, %%mm0\n"
+	"pfmul	%%mm5, %%mm1\n"
+	"pfmul	%%mm6, %%mm2\n"
+	"pfadd	%%mm2, %%mm0\n"
+	"pfmul	%%mm7, %%mm3\n"
+	"pfadd 	%%mm2, %%mm1\n"
+	"pfsub	%%mm3, %%mm0\n"
+	"pfadd	%%mm3, %%mm1\n"
+	"movq	%%mm0, (%%eax)\n"
+	"movq	%%mm1, 1024(%%eax)\n"
+	"addl	$8, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop4\n"
+	"popl	%%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void downmix_2f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+	"pushl	%%ecx\n"
+	"movl	$128, %%ecx\n"		    /* loop counter */
+	"movd	(%%ebx), %%mm5\n"	    /* unit */
+	"punpckldq %%mm5, %%mm5\n"	    /* unit | unit */
+	"movd	8(%%ebx), %%mm7\n"		/* slev */
+	"punpckldq %%mm7, %%mm7\n"  	/* slev | slev */
+".loop5:\n"
+	"movq	(%%eax), %%mm0\n"       /* left */
+	"movq	1024(%%eax), %%mm1\n"   /* right */
+	"movq	2048(%%eax), %%mm3\n"	/* sur */
+	"pfmul	%%mm5, %%mm0\n"
+	"pfmul	%%mm5, %%mm1\n"
+	"pfmul	%%mm7, %%mm3\n"
+	"pfsub	%%mm3, %%mm0\n"
+	"pfadd	%%mm3, %%mm1\n"
+	"movq	%%mm0, (%%eax)\n"
+	"movq	%%mm1, 1024(%%eax)\n"
+	"addl	$8, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop5\n"
+	"popl	%%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void downmix_3f_0r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+	"pushl	%%ecx\n"
+	"movl	$128, %%ecx\n"		    /* loop counter */
+	"movd	(%%ebx), %%mm5\n"	    /* unit */
+	"punpckldq %%mm5, %%mm5\n"	    /* unit | unit */
+	"movd	4(%%ebx), %%mm6\n"		/* clev */
+	"punpckldq %%mm6, %%mm6\n"  	/* clev | clev */
+".loop6:\n"
+	"movq	(%%eax), %%mm0\n"       /*left */
+	"movq	2048(%%eax), %%mm1\n"   /* right */
+	"movq   1024(%%eax), %%mm2\n"   /* center */
+	"pfmul	%%mm5, %%mm0\n"
+	"pfmul	%%mm5, %%mm1\n"
+	"pfmul	%%mm6, %%mm2\n"
+	"pfadd	%%mm2, %%mm0\n"
+	"pfadd 	%%mm2, %%mm1\n"
+	"movq	%%mm0, (%%eax)\n"
+	"movq	%%mm1, 1024(%%eax)\n"
+	"addl	$8, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop6\n"
+	"popl	%%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void stream_sample_1ch_to_s16_3dn (s16 *s16_samples, float *left)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "pushl %%edx\n"
+	"movl   $sqrt2, %%edx\n"
+	"movd  (%%edx), %%mm7\n"
+    "punpckldq %%mm7, %%mm7\n"   /* sqrt2 | sqrt2 */
+	"movl $128, %%ecx\n"
+".loop2:\n"
+	"movq (%%ebx), %%mm0\n"	    /* c1 | c0 */
+	"pfmul   %%mm7, %%mm0\n"
+	"pf2id %%mm0, %%mm0\n"	    /* c1 c0 --> mm0, int_32 */
+	"packssdw %%mm0, %%mm0\n"	    /* c1 c1 c0 c0 --> mm0, int_16 */
+    "movq %%mm0, (%%eax)\n"
+	"addl $8, %%eax\n"
+	"addl $8, %%ebx\n"
+	"decl %%ecx\n"
+	"jnz .loop2\n"
+	"popl %%edx\n"
+	"popl %%ecx\n"
+	"femms\n"
+    : "=a" (s16_samples), "=b" (left)
+    : "a" (s16_samples), "b" (left));
+}
+void stream_sample_2ch_to_s16_3dn (s16 *s16_samples, float *left, float *right)
+{
+	__asm__ __volatile__ (
+    "pushl %%ecx\n"
+	"movl $128, %%ecx\n"
+".loop1:\n"
+	"movq  (%%ebx), %%mm0\n"	/* l1 | l0 */
+	"movq  (%%edx), %%mm1\n"	/* r1 | r0 */
+	"movq   %%mm0,  %%mm2\n"	/* l1 | l0 */
+	"punpckldq %%mm1, %%mm0\n"	/* r0 | l0 */
+	"punpckhdq %%mm1, %%mm2\n"	/* r1 | l1 */
+	"pf2id    %%mm0, %%mm0\n"	/* r0 l0 --> mm0, int_32 */
+	"pf2id    %%mm2, %%mm2\n"	/* r0 l0 --> mm0, int_32 */
+    "packssdw %%mm2, %%mm0\n"	/* r1 l1 r0 l0 --> mm0, int_16 */
+	"movq %%mm0, (%%eax)\n"
+	"movq %%mm2, 8(%%eax)\n"
+	"addl $8, %%eax\n"
+	"addl $8, %%ebx\n"
+	"addl $8, %%edx\n"
+	"decl %%ecx\n"
+	"jnz .loop1\n"
+	"popl %%ecx\n"
+	"femms\n"
+    : "=a" (s16_samples), "=b" (left), "=d" (right)
+    : "a" (s16_samples), "b" (left), "d" (right));
+}
--- a/src/ac3_decoder/ac3_downmix_c.c
+++ b/src/ac3_decoder/ac3_downmix_c.c
@@ -2,7 +2,7 @@
 * ac3_downmix_c.c: ac3 downmix functions
 *****************************************************************************
 * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_downmix_c.c,v 1.7 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_downmix_c.c,v 1.8 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Renaud Dartus <reno@videolan.org>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -35,11 +35,8 @@
 #include "input_ext-dec.h"
 #include "ac3_decoder.h"
-#include "ac3_internal.h"
-#include "ac3_downmix.h"
+void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
-void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
 {
    int i;
    float *left, *right, *center, *left_sur, *right_sur;
@@ -59,7 +56,7 @@ void __inline__ downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
    }
 }
-void __inline__ downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
 {
    int i;
    float *left, *right, *left_sur, *right_sur;
@@ -78,7 +75,7 @@ void __inline__ downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
    }
 }
-void __inline__ downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
 {
    int i;
    float *left, *right, *center, *right_sur;
@@ -98,7 +95,7 @@ void __inline__ downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
 }
-void __inline__ downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
 {
    int i;
    float *left, *right, *right_sur;
@@ -117,7 +114,7 @@ void __inline__ downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
 }
-void __inline__ downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
 {
    int i;
    float *left, *right, *center;
@@ -136,7 +133,7 @@ void __inline__ downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
 }
-void __inline__ stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right)
+void stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right)
 {
    int i;
    for (i=0; i < 256; i++) {
@@ -146,7 +143,7 @@ void __inline__ stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *ri
 }
-void __inline__ stream_sample_1ch_to_s16_c (s16 *out_buf, float *center)
+void stream_sample_1ch_to_s16_c (s16 *out_buf, float *center)
 {
    int i;
    float tmp;

--- a/src/ac3_decoder/ac3_downmix_sse.c
+++ b/src/ac3_decoder/ac3_downmix_sse.c
+/*****************************************************************************
+ * ac3_downmix_sse.c: ac3 downmix functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_downmix_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+#include "defs.h"
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+#include "stream_control.h"
+#include "input_ext-dec.h"
+#include "ac3_decoder.h"
+void sqrt2 (void)
+{
+    __asm__ (".float 0f0.7071068");
+}
+void downmix_3f_2r_to_2ch_sse (float * samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+	"movl  $64,  %%ecx\n"	        /* loop counter */
+	"movss	(%%ebx), %%xmm5\n"	    /* unit */
+	"shufps	$0, %%xmm5, %%xmm5\n"	/* unit | unit | unit | unit */
+	"movss	4(%%ebx), %%xmm6\n"		/* clev */
+	"shufps	$0, %%xmm6, %%xmm6\n"	/* clev | clev | clev | clev */
+	"movss	8(%%ebx), %%xmm7\n"		/* slev */
+	"shufps	$0, %%xmm7, %%xmm7\n"	/* slev | slev | slev | slev */
+".loop:\n"
+	"movups	(%%eax),     %%xmm0\n"  /* left */
+	"movups	2048(%%eax), %%xmm1\n"  /* right */
+	"movups 1024(%%eax), %%xmm2\n"	/* center */
+	"movups	3072(%%eax), %%xmm3\n"	/* leftsur */
+	"movups	4096(%%eax), %%xmm4\n"	/* rithgsur */
+	"mulps	%%xmm5, %%xmm0\n"
+	"mulps	%%xmm5, %%xmm1\n"
+	"mulps	%%xmm6, %%xmm2\n"
+	"addps	%%xmm2, %%xmm0\n"
+	"addps 	%%xmm2, %%xmm1\n"
+	"mulps	%%xmm7, %%xmm3\n"
+	"mulps	%%xmm7, %%xmm4\n"
+	"addps	%%xmm3, %%xmm0\n"
+	"addps	%%xmm4, %%xmm1\n"
+	"movups	%%xmm0, (%%eax)\n"
+	"movups	%%xmm1, 1024(%%eax)\n"
+	"addl	$16, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop\n"
+    "popl   %%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void downmix_2f_2r_to_2ch_sse (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+	"pushl %%ecx\n"
+	"movl  $64, %%ecx\n"            /* loop counter */
+	"movss  (%%ebx), %%xmm5\n"	    /* unit */
+	"shufps $0, %%xmm5, %%xmm5\n"   /* unit | unit | unit | unit */
+	"movss	8(%%ebx), %%xmm7\n"		/* slev */
+	"shufps	$0, %%xmm7, %%xmm7\n"	/* slev | slev | slev | slev */
+".loop3:\n"
+	"movups	(%%eax), %%xmm0\n"      /* left */
+	"movups	1024(%%eax), %%xmm1\n"  /* right */
+	"movups 2048(%%eax), %%xmm3\n"	/* leftsur */
+	"movups	3072(%%eax), %%xmm4\n"	/* rightsur */
+	"mulps	%%xmm5, %%xmm0\n"
+	"mulps	%%xmm5, %%xmm1\n"
+	"mulps	%%xmm7, %%xmm3\n"
+	"mulps	%%xmm7, %%xmm4\n"
+	"addps	%%xmm3, %%xmm0\n"
+	"addps	%%xmm4, %%xmm1\n"
+	"movups	%%xmm0, (%%eax)\n"
+	"movups	%%xmm1, 1024(%%eax)\n"
+	"addl	$16, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop3\n"
+	"popl	%%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void downmix_3f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+	"pushl	%%ecx\n"
+	"movl	$64, %%ecx\n"		    /* loop counter */
+	"movss	(%%ebx), %%xmm5\n"	    /* unit */
+	"shufps	$0, %%xmm5, %%xmm5\n"	/* unit | unit | unit | unit */
+	"movss	4(%%ebx), %%xmm6\n"		/* clev */
+	"shufps	$0, %%xmm6, %%xmm6\n"	/* clev | clev | clev | clev */
+	"movss	8(%%ebx), %%xmm7\n"		/* slev */
+	"shufps	$0, %%xmm7, %%xmm7\n"	/* slev | slev | slev | slev */
+".loop4:\n"
+	"movups	(%%eax), %%xmm0\n"      /* left */
+	"movups	2048(%%eax), %%xmm1\n"  /* right */
+	"movups	1024(%%eax), %%xmm2\n"	/* center */
+    "movups	3072(%%eax), %%xmm3\n"	/* sur */
+	"mulps	%%xmm5, %%xmm0\n"
+	"mulps	%%xmm5, %%xmm1\n"
+	"mulps	%%xmm6, %%xmm2\n"
+	"addps	%%xmm2, %%xmm0\n"
+	"mulps	%%xmm7, %%xmm3\n"
+	"addps 	%%xmm2, %%xmm1\n"
+	"subps	%%xmm3, %%xmm0\n"
+	"addps	%%xmm3, %%xmm1\n"
+	"movups	%%xmm0, (%%eax)\n"
+	"movups	%%xmm1, 1024(%%eax)\n"
+	"addl	$16, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop4\n"
+	"popl	%%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void downmix_2f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+	"pushl	%%ecx\n"
+	"movl	$64, %%ecx\n"		    /* loop counter */
+	"movss	(%%ebx), %%xmm5\n"	    /* unit */
+	"shufps	$0, %%xmm5, %%xmm5\n"	/* unit | unit | unit | unit */
+	"movss	8(%%ebx), %%xmm7\n"		/* slev */
+	"shufps	$0, %%xmm7, %%xmm7\n"	/* slev | slev | slev | slev */
+".loop5:\n"
+	"movups	(%%eax), %%xmm0\n"      /* left */
+	"movups	1024(%%eax), %%xmm1\n"  /* right */
+	"movups	2048(%%eax), %%xmm3\n"	/* sur */
+	"mulps	%%xmm5, %%xmm0\n"
+	"mulps	%%xmm5, %%xmm1\n"
+	"mulps	%%xmm7, %%xmm3\n"
+	"subps	%%xmm3, %%xmm0\n"
+	"addps	%%xmm3, %%xmm1\n"
+	"movups	%%xmm0, (%%eax)\n"
+	"movups	%%xmm1, 1024(%%eax)\n"
+	"addl	$16, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop5\n"
+	"popl	%%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void downmix_3f_0r_to_2ch_sse (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+	"pushl	%%ecx\n"
+	"movl	$64, %%ecx\n"		    /* loop counter */
+	"movss	(%%ebx), %%xmm5\n"	    /* unit */
+	"shufps	$0, %%xmm5, %%xmm5\n"	/* unit | unit | unit | unit */
+	"movss	4(%%ebx), %%xmm6\n"		/* clev */
+	"shufps	$0, %%xmm6, %%xmm6\n"	/* clev | clev | clev | clev */
+".loop6:\n"
+	"movups	(%%eax), %%xmm0\n"      /*left */
+	"movups	2048(%%eax), %%xmm1\n"  /* right */
+	"movups 1024(%%eax), %%xmm2\n"	/* center */
+	"mulps	%%xmm5, %%xmm0\n"
+	"mulps	%%xmm5, %%xmm1\n"
+	"mulps	%%xmm6, %%xmm2\n"
+	"addps	%%xmm2, %%xmm0\n"
+	"addps 	%%xmm2, %%xmm1\n"
+	"movups	%%xmm0, (%%eax)\n"
+	"movups	%%xmm1, 1024(%%eax)\n"
+	"addl	$16, %%eax\n"
+	"decl 	%%ecx\n"
+	"jnz	.loop6\n"
+	"popl	%%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+void stream_sample_1ch_to_s16_sse (s16 *s16_samples, float *left)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "pushl %%edx\n"
+	"movl   $sqrt2, %%edx\n"
+	"movss (%%edx), %%xmm7\n"
+    "shufps $0, %%xmm7, %%xmm7\n"   /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */
+	"movl $64, %%ecx\n"
+".loop2:\n"
+	"movups (%%ebx), %%xmm0\n"	    /* c3 | c2 | c1 | c0 */
+	"mulps   %%xmm7, %%xmm0\n"
+	"movhlps %%xmm0, %%xmm2\n"	    /* c3 | c2 */
+	"cvtps2pi %%xmm0, %%mm0\n"	    /* c1 c0 --> mm0, int_32 */
+	"cvtps2pi %%xmm2, %%mm1\n"	    /* c3 c2 --> mm1, int_32 */
+	"packssdw %%mm0, %%mm0\n"	    /* c1 c1 c0 c0 --> mm0, int_16 */
+	"packssdw %%mm1, %%mm1\n"	    /* c3 c3 c2 c2 --> mm1, int_16 */
+    "movq %%mm0, (%%eax)\n"
+	"movq %%mm1, 8(%%eax)\n"
+	"addl $16, %%eax\n"
+	"addl $16, %%ebx\n"
+	"decl %%ecx\n"
+	"jnz .loop2\n"
+	"popl %%edx\n"
+	"popl %%ecx\n"
+	"emms\n"
+    : "=a" (s16_samples), "=b" (left)
+    : "a" (s16_samples), "b" (left));
+}
+void stream_sample_2ch_to_s16_sse (s16 *s16_samples, float *left, float *right)
+{
+	__asm__ __volatile__ (
+    "pushl %%ecx\n"
+	"movl $64, %%ecx\n"
+".loop1:\n"
+	"movups  (%%ebx), %%xmm0\n"	/* l3 | l2 | l1 | l0 */
+	"movups  (%%edx), %%xmm1\n"	/* r3 | r2 | r1 | r0 */
+	"movhlps  %%xmm0, %%xmm2\n"	/* l3 | l2 */
+	"movhlps  %%xmm1, %%xmm3\n"	/* r3 | r2 */
+	"unpcklps %%xmm1, %%xmm0\n"	/* r1 | l1 | r0 | l0 */
+	"unpcklps %%xmm3, %%xmm2\n"	/* r3 | l3 | r2 | l2 */
+	"cvtps2pi %%xmm0, %%mm0\n"	/* r0 l0 --> mm0, int_32 */
+	"movhlps  %%xmm0, %%xmm0\n"
+	"cvtps2pi %%xmm0, %%mm1\n"	/* r1 l1 --> mm1, int_32 */
+	"cvtps2pi %%xmm2, %%mm2\n"	/* r2 l2 --> mm2, int_32 */
+	"movhlps  %%xmm2, %%xmm2\n"
+	"cvtps2pi %%xmm2, %%mm3\n"	/* r3 l3 --> mm3, int_32 */
+	"packssdw %%mm1, %%mm0\n"	/* r1 l1 r0 l0 --> mm0, int_16 */
+	"packssdw %%mm3, %%mm2\n"	/* r3 l3 r2 l2 --> mm2, int_16 */
+	"movq %%mm0, (%%eax)\n"
+	"movq %%mm2, 8(%%eax)\n"
+	"addl $16, %%eax\n"
+	"addl $16, %%ebx\n"
+	"addl $16, %%edx\n"
+	"decl %%ecx\n"
+	"jnz .loop1\n"
+	"popl %%ecx\n"
+	"emms\n"
+    : "=a" (s16_samples), "=b" (left), "=d" (right)
+    : "a" (s16_samples), "b" (left), "d" (right));
+}
--- a/src/ac3_decoder/ac3_exponent.c
+++ b/src/ac3_decoder/ac3_exponent.c
@@ -2,7 +2,7 @@
 * ac3_exponent.c: ac3 exponent calculations
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_exponent.c,v 1.23 2001/04/20 12:14:34 reno Exp $
+ * $Id: ac3_exponent.c,v 1.24 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Michel Lespinasse <walken@zoy.org>
@@ -31,15 +31,14 @@
 #include "threads.h"
 #include "mtime.h"
+#include "intf_msg.h"                        /* intf_DbgMsg(), intf_ErrMsg() */
 #include "stream_control.h"
 #include "input_ext-dec.h"
 #include "audio_output.h"
 #include "ac3_decoder.h"
-#include "ac3_decoder_thread.h"
-#include "intf_msg.h"
 #include "ac3_internal.h"

--- a/src/ac3_decoder/ac3_imdct.c
+++ b/src/ac3_decoder/ac3_imdct.c
@@ -2,7 +2,7 @@
 * ac3_imdct.c: ac3 DCT
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct.c,v 1.18 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_imdct.c,v 1.19 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -39,15 +39,11 @@
 #include "input_ext-dec.h"
 #include "ac3_decoder.h"
-#include "ac3_internal.h"
-#include "ac3_downmix.h"
+#include "ac3_imdct_c.h"                                     /* imdct_init_c */
-#include "ac3_imdct_c.h"
+#include "ac3_imdct_sse.h"                                 /* imdct_init_sse */
-#if 0
-#include "ac3_imdct_kni.h"
-#endif
-#include "tests.h"
+#include "tests.h"                                                /* TestCPU */
 #ifndef M_PI
 #   define M_PI 3.14159265358979323846
@@ -57,13 +53,13 @@
 void imdct_init(imdct_t * p_imdct)
 {
 	int i;
-	float scale = 255.99609372;
+	float scale = 181.019;
 #if 0
-	if ( TestCPU (CPU_CAPABILITY_MMX) )
+	if ( TestCPU (CPU_CAPABILITY_SSE) )
    {
-        imdct_init_kni (p_imdct);
+        imdct_init_sse (p_imdct);
-    } else 
+    }
+    else
 #endif
    {
        imdct_init_c (p_imdct);

--- a/src/ac3_decoder/ac3_imdct_c.c
+++ b/src/ac3_decoder/ac3_imdct_c.c
@@ -2,7 +2,7 @@
 * ac3_imdct_c.c: ac3 DCT
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct_c.c,v 1.2 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_imdct_c.c,v 1.3 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Renaud Dartus <reno@videolan.org>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -38,7 +38,7 @@
 #include "input_ext-dec.h"
 #include "ac3_decoder.h"
-#include "ac3_internal.h"
+#include "ac3_imdct_c.h"
 #ifndef M_PI
 #   define M_PI 3.14159265358979323846
@@ -46,9 +46,6 @@
 void fft_64p_c (complex_t *x);
 void fft_128p_c (complex_t *x);
-void imdct_do_512_c (imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_512_nol_c (imdct_t * p_imdct, float data[], float delay[]);
 static float window[] = {
 	0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
@@ -112,7 +109,7 @@ static const int pm64[64] =
 int imdct_init_c (imdct_t * p_imdct)
 {
 	int i;
-	float scale = 255.99609372;
+	float scale = 181.019;
 	p_imdct->imdct_do_512 = imdct_do_512_c;
 	p_imdct->imdct_do_512_nol = imdct_do_512_nol_c;

--- a/src/ac3_decoder/ac3_imdct_sse.c
+++ b/src/ac3_decoder/ac3_imdct_sse.c
--- a/src/ac3_decoder/ac3_imdct_sse.h
+++ b/src/ac3_decoder/ac3_imdct_sse.h
+int  imdct_init_sse (imdct_t * p_imdct);
+void imdct_do_512_sse(imdct_t * p_imdct, float data[], float delay[]);
+void imdct_do_512_nol_sse(imdct_t * p_imdct, float data[], float delay[]);
--- a/src/ac3_decoder/ac3_internal.h
+++ b/src/ac3_decoder/ac3_internal.h
@@ -2,7 +2,7 @@
 * ac3_internals.h: needed by the ac3 decoder
 *****************************************************************************
 * Copyright (C) 2000 VideoLAN
- * $Id: ac3_internal.h,v 1.8 2001/03/21 13:42:34 sam Exp $
+ * $Id: ac3_internal.h,v 1.9 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Michel Lespinasse <walken@zoy.org>
 *
@@ -37,12 +37,13 @@
 void bit_allocate (ac3dec_t *);
 /* ac3_downmix.c */
-int downmix (ac3dec_t *, float *, s16 *);
+void downmix_init (downmix_t * p_downmix);
 /* ac3_exponent.c */
 int exponent_unpack (ac3dec_t *);
 /* ac3_imdct.c */
+void imdct_init (imdct_t * p_imdct);
 void imdct (ac3dec_t * p_ac3dec, s16 * buffer);
 /* ac3_mantissa.c */

--- a/src/ac3_decoder/ac3_mantissa.c
+++ b/src/ac3_decoder/ac3_mantissa.c
@@ -2,7 +2,7 @@
 * ac3_mantissa.c: ac3 mantissa computation
 *****************************************************************************
 * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_mantissa.c,v 1.27 2001/05/07 03:14:09 stef Exp $
+ * $Id: ac3_mantissa.c,v 1.28 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -38,9 +38,6 @@
 #include "audio_output.h"
 #include "ac3_decoder.h"
-#include "ac3_decoder_thread.h"
-#include "ac3_internal.h"
 #include "intf_msg.h"
@@ -291,7 +288,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
            p_ac3dec->total_bits_read += 5;
            if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26)
            {
-                intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (1)" );
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (1)" );
                return 0;
            }
@@ -312,7 +309,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
            p_ac3dec->total_bits_read += 7;
            if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124)
            {
-                intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (2)" );
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (2)" );
                return 0;
            }
@@ -327,7 +324,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
            p_ac3dec->total_bits_read += 3;
            if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6)
            {
-                intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (3)" );
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (3)" );
                return 0;
            }
@@ -343,7 +340,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
            p_ac3dec->total_bits_read += 7;
            if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120)
            {
-                intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (4)" );
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (4)" );
                return 0;
            }
@@ -357,7 +354,7 @@ static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithf
            p_ac3dec->total_bits_read += 4;
            if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14)
            {
-                intf_WarnMsg ( 3, "ac3dec error: invalid mantissa (5)" );
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (5)" );
                return 0;
            }

--- a/src/ac3_decoder/ac3_parse.c
+++ b/src/ac3_decoder/ac3_parse.c
@@ -2,7 +2,7 @@
 * ac3_parse.c: ac3 parsing procedures
 *****************************************************************************
 * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_parse.c,v 1.21 2001/05/07 04:42:42 sam Exp $
+ * $Id: ac3_parse.c,v 1.22 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -40,9 +40,9 @@
 #include "intf_msg.h"
 #include "ac3_decoder.h"
-#include "ac3_decoder_thread.h"
+#include "ac3_decoder_thread.h"                           /* ac3dec_thread_t */
-#include "ac3_internal.h"
+#include "ac3_internal.h"                                       /* EXP_REUSE */
 /* Misc LUT */
 static const u16 nfchans[] = { 2, 1, 2, 3, 3, 4, 4, 5 };
@@ -97,8 +97,10 @@ static const struct frmsize_s frmsizecod_tbl[] =
 static const int fscod_tbl[] = {48000, 44100, 32000};
 /* Some internal functions */
-void parse_bsi_stats (ac3dec_t * p_ac3dec);
+#ifdef STATS
-void parse_audblk_stats (ac3dec_t * p_ac3dec);
+static void parse_bsi_stats (ac3dec_t * p_ac3dec);
+static void parse_audblk_stats (ac3dec_t * p_ac3dec);
+#endif
 /* Parse a syncinfo structure */
 int ac3_sync_frame (ac3dec_t * p_ac3dec, ac3_sync_info_t * p_sync_info) 
@@ -778,7 +780,7 @@ int parse_audblk (ac3dec_t * p_ac3dec, int blknum)
    }
 #ifdef STATS
-//    parse_audblk_stats(p_ac3dec);
+    parse_audblk_stats(p_ac3dec);
 #endif
    return 0;
@@ -806,7 +808,8 @@ void parse_auxdata (ac3dec_t * p_ac3dec)
    RemoveBits (&p_ac3dec->bit_stream,16);
 }
-void parse_bsi_stats (ac3dec_t * p_ac3dec) /*Some stats */
+#ifdef STATS
+static void parse_bsi_stats (ac3dec_t * p_ac3dec) /* Some stats */
 {  
    struct mixlev_s
    {
@@ -850,7 +853,7 @@ void parse_bsi_stats (ac3dec_t * p_ac3dec) /*Some stats */
        i = 0;
 }
-void parse_audblk_stats (ac3dec_t * p_ac3dec)
+static void parse_audblk_stats (ac3dec_t * p_ac3dec)
 {
    char *exp_strat_tbl[4] = {"R   ","D15 ","D25 ","D45 "};
    u32 i;
@@ -871,3 +874,4 @@ void parse_audblk_stats (ac3dec_t * p_ac3dec)
    intf_ErrMsg ("\n");
 }
+#endif
--- a/src/ac3_decoder/ac3_rematrix.c
+++ b/src/ac3_decoder/ac3_rematrix.c
@@ -2,7 +2,7 @@
 * ac3_rematrix.c: ac3 audio rematrixing
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_rematrix.c,v 1.16 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_rematrix.c,v 1.17 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Michel Kaempf <maxx@via.ecp.fr>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -34,7 +34,6 @@
 #include "input_ext-dec.h"
 #include "ac3_decoder.h"
-#include "ac3_internal.h"
 struct rematrix_band_s {
    u32 start;

--- a/src/ac3_decoder/ac3_srfft.c
+++ b/src/ac3_decoder/ac3_srfft.c
@@ -2,7 +2,7 @@
 * ac3_srfft.c: ac3 FFT
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_srfft.c,v 1.3 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_srfft.c,v 1.4 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Renaud Dartus <reno@videolan.org>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -40,9 +40,9 @@
 #include "ac3_decoder.h"
 #include "ac3_srfft.h"
-void fft_8 (complex_t *x);
+static void fft_8 (complex_t *x);
-void fft_4(complex_t *x)
+static void fft_4(complex_t *x)
 {
  /* delta_p = 1 here */
  /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} 
@@ -90,7 +90,7 @@ void fft_4(complex_t *x)
 }
-void fft_8 (complex_t *x)
+static void fft_8 (complex_t *x)
 {
  /* delta_p = diag{1, sqrt(i)} here */
  /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} 
@@ -205,7 +205,7 @@ void fft_8 (complex_t *x)
 }
-void fft_asmb(int k, complex_t *x, complex_t *wTB,
+static void fft_asmb(int k, complex_t *x, complex_t *wTB,
 	     const complex_t *d, const complex_t *d_3)
 {
  register complex_t  *x2k, *x3k, *x4k, *wB;
@@ -236,7 +236,7 @@ void fft_asmb(int k, complex_t *x, complex_t *wTB,
 }
-void fft_asmb16(complex_t *x, complex_t *wTB)
+static void fft_asmb16(complex_t *x, complex_t *wTB)
 {
  register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
  int k = 2;

--- a/src/ac3_decoder/ac3_srfft.h
+++ b/src/ac3_decoder/ac3_srfft.h
@@ -2,7 +2,7 @@
 * ac3_srfft.h: ac3 FFT
 *****************************************************************************
 * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_srfft.h,v 1.2 2001/04/30 21:10:25 reno Exp $
+ * $Id: ac3_srfft.h,v 1.3 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Renaud Dartus <reno@videolan.org>
 *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -22,19 +22,19 @@
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/
-static complex_t delta16[4] = 
+static const complex_t delta16[4] = 
 { {1.00000000000000,  0.00000000000000},
   {0.92387953251129, -0.38268343236509},
   {0.70710678118655, -0.70710678118655},
   {0.38268343236509, -0.92387953251129}};
-static complex_t delta16_3[4] = 
+static const complex_t delta16_3[4] = 
 { {1.00000000000000,  0.00000000000000},
   {0.38268343236509, -0.92387953251129},
   {-0.70710678118655, -0.70710678118655},
   {-0.92387953251129, 0.38268343236509}};
-static complex_t delta32[8] = 
+static const complex_t delta32[8] = 
 { {1.00000000000000,  0.00000000000000},
   {0.98078528040323, -0.19509032201613},
   {0.92387953251129, -0.38268343236509},
@@ -44,7 +44,7 @@ static complex_t delta32[8] =
   {0.38268343236509, -0.92387953251129},
   {0.19509032201613, -0.98078528040323}};
-static complex_t delta32_3[8] = 
+static const complex_t delta32_3[8] = 
 { {1.00000000000000,  0.00000000000000},
   {0.83146961230255, -0.55557023301960},
   {0.38268343236509, -0.92387953251129},
@@ -54,7 +54,7 @@ static complex_t delta32_3[8] =
   {-0.92387953251129, 0.38268343236509},
   {-0.55557023301960, 0.83146961230255}};
-static complex_t delta64[16] = 
+static const complex_t delta64[16] = 
 { {1.00000000000000,  0.00000000000000},
   {0.99518472667220, -0.09801714032956},
   {0.98078528040323, -0.19509032201613},
@@ -72,7 +72,7 @@ static complex_t delta64[16] =
   {0.19509032201613, -0.98078528040323},
   {0.09801714032956, -0.99518472667220}};
-static complex_t delta64_3[16] = 
+static const complex_t delta64_3[16] = 
 { {1.00000000000000,  0.00000000000000},
   {0.95694033573221, -0.29028467725446},
   {0.83146961230255, -0.55557023301960},
@@ -90,7 +90,7 @@ static complex_t delta64_3[16] =
   {-0.55557023301960, 0.83146961230255},
   {-0.29028467725446, 0.95694033573221}};
-static complex_t delta128[32] = 
+static const complex_t delta128[32] = 
 { {1.00000000000000,  0.00000000000000},
   {0.99879545620517, -0.04906767432742},
   {0.99518472667220, -0.09801714032956},
@@ -124,7 +124,7 @@ static complex_t delta128[32] =
   {0.09801714032956, -0.99518472667220},
   {0.04906767432742, -0.99879545620517}};
-static complex_t delta128_3[32] = 
+static const complex_t delta128_3[32] = 
 { {1.00000000000000,  0.00000000000000},
   {0.98917650996478, -0.14673047445536},
   {0.95694033573221, -0.29028467725446},

--- a/src/ac3_decoder/ac3_srfft_sse.c
+++ b/src/ac3_decoder/ac3_srfft_sse.c
--- a/src/interface/main.c
+++ b/src/interface/main.c
@@ -4,7 +4,7 @@
 * and spawn threads.
 *****************************************************************************
 * Copyright (C) 1998, 1999, 2000 VideoLAN
- * $Id: main.c,v 1.93 2001/05/07 03:14:09 stef Exp $
+ * $Id: main.c,v 1.94 2001/05/14 15:58:04 reno Exp $
 *
 * Authors: Vincent Seguin <seguin@via.ecp.fr>
 *          Samuel Hocevar <sam@zoy.org>
@@ -974,6 +974,7 @@ static int CPUCapabilities( void )
    if( i_edx & 0x02000000 )
    {
        i_capabilities |= CPU_CAPABILITY_MMXEXT;
+        i_capabilities |= CPU_CAPABILITY_SSE;
    }
    /* test for additional capabilities */
@@ -996,7 +997,6 @@ static int CPUCapabilities( void )
    {
        i_capabilities |= CPU_CAPABILITY_MMXEXT;
    }
 #else
    /* default behaviour */