Commit e5b4af91 authored by Renaud Dartus's avatar Renaud Dartus

* Use memalign for aligned data (instead of a greek malloc)

* Some optimization in imdct (all data are now aligned)
* SSE downmix now works for windows
* SSE imdct is desactivated for windows (MINGW32 doesn't know how to aligned data)
parent a940bf72
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_imdct.h : AC3 IMDCT types * ac3_imdct.h : AC3 IMDCT types
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct.h,v 1.5 2001/07/08 23:15:11 reno Exp $ * $Id: ac3_imdct.h,v 1.6 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org> * Renaud Dartus <reno@videolan.org>
...@@ -31,30 +31,27 @@ typedef struct complex_s { ...@@ -31,30 +31,27 @@ typedef struct complex_s {
typedef struct imdct_s typedef struct imdct_s
{ {
complex_t buf[N/4] __attribute__ ((aligned(16))); complex_t * buf;
/* Delay buffer for time domain interleaving */ /* Delay buffer for time domain interleaving */
float delay[6][256] __attribute__ ((aligned(16))); float * delay;
float delay1[6][256] __attribute__ ((aligned(16))); float * delay1;
/* Twiddle factors for IMDCT */ /* Twiddle factors for IMDCT */
float xcos1[N/4] __attribute__ ((aligned(16))); float * xcos1;
float xsin1[N/4] __attribute__ ((aligned(16))); float * xsin1;
float xcos2[N/8] __attribute__ ((aligned(16))); float * xcos2;
float xsin2[N/8] __attribute__ ((aligned(16))); float * xsin2;
float xcos_sin_sse[128 * 4] __attribute__ ((aligned(16))); float * xcos_sin_sse;
/* Twiddle factor LUT */ /* Twiddle factor LUT */
complex_t w_1[1] __attribute__ ((aligned(16))); complex_t * w_2;
float used_for_alignement1; complex_t * w_4;
float used_for_alignement2; complex_t * w_8;
complex_t w_2[2] __attribute__ ((aligned(16))); complex_t * w_16;
complex_t w_4[4] __attribute__ ((aligned(16))); complex_t * w_32;
complex_t w_8[8] __attribute__ ((aligned(16))); complex_t * w_64;
complex_t w_16[16] __attribute__ ((aligned(16))); complex_t * w_1;
complex_t w_32[32] __attribute__ ((aligned(16)));
complex_t w_64[64] __attribute__ ((aligned(16)));
complex_t *w[7] __attribute__ ((aligned(16)));
/* Module used and shortcuts */ /* Module used and shortcuts */
struct module_s * p_module; struct module_s * p_module;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* Collection of useful common types and macros definitions * Collection of useful common types and macros definitions
***************************************************************************** *****************************************************************************
* Copyright (C) 1998, 1999, 2000 VideoLAN * Copyright (C) 1998, 1999, 2000 VideoLAN
* $Id: common.h,v 1.42 2001/10/22 12:28:53 massiot Exp $ * $Id: common.h,v 1.43 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Samuel Hocevar <sam@via.ecp.fr> * Authors: Samuel Hocevar <sam@via.ecp.fr>
* Vincent Seguin <seguin@via.ecp.fr> * Vincent Seguin <seguin@via.ecp.fr>
...@@ -208,10 +208,16 @@ struct pgrm_descriptor_s; ...@@ -208,10 +208,16 @@ struct pgrm_descriptor_s;
/* That's like using a hammer to kill a fly, but eh... */ /* That's like using a hammer to kill a fly, but eh... */
# include <unistd.h> # include <unistd.h>
# define memalign(align,size) valloc(size) # define memalign(align,size) valloc(size)
# else
# if defined( __MINGW32__ )
# define memalign(align,size) (void *)(((unsigned long)(malloc(size+align-1))+align-1)&~(align-1))
# else # else
/* Assume malloc alignment is sufficient */ /* Assume malloc alignment is sufficient */
# define memalign(align,size) malloc(size) # define memalign(align,size) malloc(size)
# endif # endif
# endif
#endif #endif
/* win32, cl and icl support */ /* win32, cl and icl support */
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_imdct_sse.c: accelerated SSE ac3 DCT * ac3_imdct_sse.c: accelerated SSE ac3 DCT
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct_sse.c,v 1.5 2001/07/26 20:00:33 reno Exp $ * $Id: ac3_imdct_sse.c,v 1.6 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -249,7 +249,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w ...@@ -249,7 +249,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
"movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
"movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
"movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ "movaps (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
"movaps (%%ecx), %%xmm5\n" /* d3 | d2 | d1 | d0 */ "movaps (%%ecx), %%xmm5\n" /* d3 | d2 | d1 | d0 */
"shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
...@@ -263,7 +263,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w ...@@ -263,7 +263,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
"movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
"addps %%xmm5, %%xmm0\n" "addps %%xmm5, %%xmm0\n"
"shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
"movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ "movaps 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
"movaps 16(%%ecx), %%xmm5\n" /* d7 | d6 | d5 | d4 */ "movaps 16(%%ecx), %%xmm5\n" /* d7 | d6 | d5 | d4 */
"subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */ "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */
"addl $32, %%edx\n" "addl $32, %%edx\n"
...@@ -292,7 +292,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w ...@@ -292,7 +292,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
"movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
"movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */
"movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ "movaps (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
"movaps (%%ecx), %%xmm5\n" /* d3 | d2 | d1 | d0 */ "movaps (%%ecx), %%xmm5\n" /* d3 | d2 | d1 | d0 */
"shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
...@@ -305,7 +305,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w ...@@ -305,7 +305,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
"movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
"mulps %%xmm4, %%xmm0\n" "mulps %%xmm4, %%xmm0\n"
"shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
"movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ "movaps 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
"addl $32, %%esi\n" "addl $32, %%esi\n"
"subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
"addps %%xmm5, %%xmm0\n" "addps %%xmm5, %%xmm0\n"
...@@ -336,7 +336,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w ...@@ -336,7 +336,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
"movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
"movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */
"movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ "movaps -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
"shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
"movss 16(%%esi), %%xmm6\n" /* re2 */ "movss 16(%%esi), %%xmm6\n" /* re2 */
"movss 24(%%esi), %%xmm7\n" /* re3 */ "movss 24(%%esi), %%xmm7\n" /* re3 */
...@@ -347,7 +347,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w ...@@ -347,7 +347,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
"movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
"movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
"mulps %%xmm4, %%xmm0\n" "mulps %%xmm4, %%xmm0\n"
"movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ "movaps (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
"shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
"movaps %%xmm0, (%%ecx)\n" "movaps %%xmm0, (%%ecx)\n"
"addl $32, %%esi\n" "addl $32, %%esi\n"
...@@ -373,7 +373,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w ...@@ -373,7 +373,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
"movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
"movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
"movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ "movaps -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
"shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
"movss 16(%%esi), %%xmm6\n" /* im2 */ "movss 16(%%esi), %%xmm6\n" /* im2 */
"movss 24(%%esi), %%xmm7\n" /* im3 */ "movss 24(%%esi), %%xmm7\n" /* im3 */
...@@ -384,7 +384,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w ...@@ -384,7 +384,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
"movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
"movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
"mulps %%xmm4, %%xmm1\n" "mulps %%xmm4, %%xmm1\n"
"movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ "movaps (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
"shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
"movaps %%xmm1, (%%ecx)\n" "movaps %%xmm1, (%%ecx)\n"
"addl $32, %%esi\n" "addl $32, %%esi\n"
...@@ -436,7 +436,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa ...@@ -436,7 +436,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
"movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
"movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
"movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ "movaps (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
"shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
"movss 16(%%esi), %%xmm6\n" /* im2 */ "movss 16(%%esi), %%xmm6\n" /* im2 */
...@@ -448,7 +448,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa ...@@ -448,7 +448,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
"movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
"movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
"shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
"movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ "movaps 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
"subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */ "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */
"addl $32, %%edx\n" "addl $32, %%edx\n"
"movaps %%xmm0, (%%eax)\n" "movaps %%xmm0, (%%eax)\n"
...@@ -474,7 +474,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa ...@@ -474,7 +474,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
"movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
"movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */
"movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ "movaps (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
"shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
"movss 16(%%esi), %%xmm6\n" /* re2 */ "movss 16(%%esi), %%xmm6\n" /* re2 */
...@@ -486,7 +486,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa ...@@ -486,7 +486,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
"movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
"mulps %%xmm4, %%xmm0\n" "mulps %%xmm4, %%xmm0\n"
"shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
"movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ "movaps 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
"addl $32, %%esi\n" "addl $32, %%esi\n"
"subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
"mulps %%xmm4, %%xmm6\n" "mulps %%xmm4, %%xmm6\n"
...@@ -513,7 +513,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa ...@@ -513,7 +513,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
"movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
"movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */
"movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ "movaps -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
"shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
"movss 16(%%esi), %%xmm6\n" /* re2 */ "movss 16(%%esi), %%xmm6\n" /* re2 */
"movss 24(%%esi), %%xmm7\n" /* re3 */ "movss 24(%%esi), %%xmm7\n" /* re3 */
...@@ -524,7 +524,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa ...@@ -524,7 +524,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
"movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
"movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
"mulps %%xmm4, %%xmm0\n" "mulps %%xmm4, %%xmm0\n"
"movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ "movaps (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
"shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
"movaps %%xmm0, (%%ecx)\n" "movaps %%xmm0, (%%ecx)\n"
"addl $32, %%esi\n" "addl $32, %%esi\n"
...@@ -550,7 +550,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa ...@@ -550,7 +550,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
"movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
"movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
"movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ "movaps -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
"shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
"movss 16(%%esi), %%xmm6\n" /* im2 */ "movss 16(%%esi), %%xmm6\n" /* im2 */
"movss 24(%%esi), %%xmm7\n" /* im3 */ "movss 24(%%esi), %%xmm7\n" /* im3 */
...@@ -561,7 +561,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa ...@@ -561,7 +561,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
"movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
"movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
"mulps %%xmm4, %%xmm1\n" "mulps %%xmm4, %%xmm1\n"
"movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ "movaps (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
"shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
"movaps %%xmm1, (%%ecx)\n" "movaps %%xmm1, (%%ecx)\n"
"addl $32, %%esi\n" "addl $32, %%esi\n"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_retables.h: ac3 DCT tables * ac3_retables.h: ac3 DCT tables
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_retables.h,v 1.1 2001/05/16 14:51:29 reno Exp $ * $Id: ac3_retables.h,v 1.2 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/ *****************************************************************************/
static float window[] = { static float window[] ATTR_ALIGN(16) = {
0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_srfft.h: ac3 FFT tables * ac3_srfft.h: ac3 FFT tables
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_srfft.h,v 1.1 2001/05/15 16:19:42 sam Exp $ * $Id: ac3_srfft.h,v 1.2 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -22,19 +22,19 @@ ...@@ -22,19 +22,19 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/ *****************************************************************************/
static const complex_t delta16[4] = static const complex_t delta16[4] ATTR_ALIGN(16) =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.92387953251129, -0.38268343236509}, {0.92387953251129, -0.38268343236509},
{0.70710678118655, -0.70710678118655}, {0.70710678118655, -0.70710678118655},
{0.38268343236509, -0.92387953251129}}; {0.38268343236509, -0.92387953251129}};
static const complex_t delta16_3[4] = static const complex_t delta16_3[4] ATTR_ALIGN(16) =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.38268343236509, -0.92387953251129}, {0.38268343236509, -0.92387953251129},
{-0.70710678118655, -0.70710678118655}, {-0.70710678118655, -0.70710678118655},
{-0.92387953251129, 0.38268343236509}}; {-0.92387953251129, 0.38268343236509}};
static const complex_t delta32[8] = static const complex_t delta32[8] ATTR_ALIGN(16) =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.98078528040323, -0.19509032201613}, {0.98078528040323, -0.19509032201613},
{0.92387953251129, -0.38268343236509}, {0.92387953251129, -0.38268343236509},
...@@ -44,7 +44,7 @@ static const complex_t delta32[8] = ...@@ -44,7 +44,7 @@ static const complex_t delta32[8] =
{0.38268343236509, -0.92387953251129}, {0.38268343236509, -0.92387953251129},
{0.19509032201613, -0.98078528040323}}; {0.19509032201613, -0.98078528040323}};
static const complex_t delta32_3[8] = static const complex_t delta32_3[8] ATTR_ALIGN(16) =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.83146961230255, -0.55557023301960}, {0.83146961230255, -0.55557023301960},
{0.38268343236509, -0.92387953251129}, {0.38268343236509, -0.92387953251129},
...@@ -54,7 +54,7 @@ static const complex_t delta32_3[8] = ...@@ -54,7 +54,7 @@ static const complex_t delta32_3[8] =
{-0.92387953251129, 0.38268343236509}, {-0.92387953251129, 0.38268343236509},
{-0.55557023301960, 0.83146961230255}}; {-0.55557023301960, 0.83146961230255}};
static const complex_t delta64[16] = static const complex_t delta64[16] ATTR_ALIGN(16) =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.99518472667220, -0.09801714032956}, {0.99518472667220, -0.09801714032956},
{0.98078528040323, -0.19509032201613}, {0.98078528040323, -0.19509032201613},
...@@ -72,7 +72,7 @@ static const complex_t delta64[16] = ...@@ -72,7 +72,7 @@ static const complex_t delta64[16] =
{0.19509032201613, -0.98078528040323}, {0.19509032201613, -0.98078528040323},
{0.09801714032956, -0.99518472667220}}; {0.09801714032956, -0.99518472667220}};
static const complex_t delta64_3[16] = static const complex_t delta64_3[16] ATTR_ALIGN(16) =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.95694033573221, -0.29028467725446}, {0.95694033573221, -0.29028467725446},
{0.83146961230255, -0.55557023301960}, {0.83146961230255, -0.55557023301960},
...@@ -90,7 +90,7 @@ static const complex_t delta64_3[16] = ...@@ -90,7 +90,7 @@ static const complex_t delta64_3[16] =
{-0.55557023301960, 0.83146961230255}, {-0.55557023301960, 0.83146961230255},
{-0.29028467725446, 0.95694033573221}}; {-0.29028467725446, 0.95694033573221}};
static const complex_t delta128[32] = static const complex_t delta128[32] ATTR_ALIGN(16) =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.99879545620517, -0.04906767432742}, {0.99879545620517, -0.04906767432742},
{0.99518472667220, -0.09801714032956}, {0.99518472667220, -0.09801714032956},
...@@ -124,7 +124,7 @@ static const complex_t delta128[32] = ...@@ -124,7 +124,7 @@ static const complex_t delta128[32] =
{0.09801714032956, -0.99518472667220}, {0.09801714032956, -0.99518472667220},
{0.04906767432742, -0.99879545620517}}; {0.04906767432742, -0.99879545620517}};
static const complex_t delta128_3[32] = static const complex_t delta128_3[32] ATTR_ALIGN(16) =
{ {1.00000000000000, 0.00000000000000}, { {1.00000000000000, 0.00000000000000},
{0.98917650996478, -0.14673047445536}, {0.98917650996478, -0.14673047445536},
{0.95694033573221, -0.29028467725446}, {0.95694033573221, -0.29028467725446},
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_srfft_sse.c: accelerated SSE ac3 fft functions * ac3_srfft_sse.c: accelerated SSE ac3 fft functions
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_srfft_sse.c,v 1.5 2001/07/26 20:00:33 reno Exp $ * $Id: ac3_srfft_sse.c,v 1.6 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -43,8 +43,6 @@ ...@@ -43,8 +43,6 @@
#include "ac3_imdct.h" #include "ac3_imdct.h"
#include "ac3_srfft.h" #include "ac3_srfft.h"
void hsqrt2_sse (void) __asm__ ("hsqrt2_sse");
void C_1_sse (void) __asm__ ("C_1_sse");
static void fft_4_sse (complex_t *x); static void fft_4_sse (complex_t *x);
static void fft_8_sse (complex_t *x); static void fft_8_sse (complex_t *x);
static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
...@@ -104,25 +102,11 @@ void _M( fft_128p ) ( complex_t *a ) ...@@ -104,25 +102,11 @@ void _M( fft_128p ) ( complex_t *a )
fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
} }
void hsqrt2_sse (void) static float hsqrt2_sse[] ATTR_ALIGN(16) =
{ { 0.707106781188, 0.707106781188, -0.707106781188, -0.707106781188 };
__asm__ __volatile__ (
".float 0f0.707106781188\n"
".float 0f0.707106781188\n"
".float 0f-0.707106781188\n"
".float 0f-0.707106781188\n"
);
}
void C_1_sse (void) static float C_1_sse[] ATTR_ALIGN(16) =
{ { -1.0, 1.0, -1.0, 1.0 };
__asm__ __volatile__ (
".float 0f-1.0\n"
".float 0f1.0\n"
".float 0f-1.0\n"
".float 0f1.0\n"
);
}
static void fft_4_sse (complex_t *x) static void fft_4_sse (complex_t *x)
{ {
...@@ -153,7 +137,6 @@ static void fft_8_sse (complex_t *x) ...@@ -153,7 +137,6 @@ static void fft_8_sse (complex_t *x)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n" ".align 16\n"
"pushl %%ebx\n"
"movlps (%%eax), %%xmm0\n" /* x[0] */ "movlps (%%eax), %%xmm0\n" /* x[0] */
"movlps 32(%%eax), %%xmm1\n" /* x[4] */ "movlps 32(%%eax), %%xmm1\n" /* x[4] */
...@@ -176,13 +159,12 @@ static void fft_8_sse (complex_t *x) ...@@ -176,13 +159,12 @@ static void fft_8_sse (complex_t *x)
"subps %%xmm5, %%xmm7\n" /* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */ "subps %%xmm5, %%xmm7\n" /* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */
"movhps 24(%%eax), %%xmm1\n" /* x[3] | x[1] */ "movhps 24(%%eax), %%xmm1\n" /* x[3] | x[1] */
"movl $hsqrt2_sse, %%ebx\n"
"movlps 40(%%eax), %%xmm2\n" /* x[5] */ "movlps 40(%%eax), %%xmm2\n" /* x[5] */
"movhps 56(%%eax), %%xmm2\n" /* x[7] | x[5] */ "movhps 56(%%eax), %%xmm2\n" /* x[7] | x[5] */
"movaps %%xmm1, %%xmm3\n" /* x[3] | x[1] */ "movaps %%xmm1, %%xmm3\n" /* x[3] | x[1] */
"addps %%xmm2, %%xmm1\n" /* x[3] + x[7] | x[1] + x[5] */ "addps %%xmm2, %%xmm1\n" /* x[3] + x[7] | x[1] + x[5] */
"subps %%xmm2, %%xmm3\n" /* x[3] - x[7] | x[1] - x[5] */ "subps %%xmm2, %%xmm3\n" /* x[3] - x[7] | x[1] - x[5] */
"movups (%%ebx), %%xmm4\n" /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */ "movaps (%%ecx), %%xmm4\n" /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
"movaps %%xmm3, %%xmm6\n" /* x[3] - x[7] | x[1] - x[5] */ "movaps %%xmm3, %%xmm6\n" /* x[3] - x[7] | x[1] - x[5] */
"mulps %%xmm4, %%xmm3\n" /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */ "mulps %%xmm4, %%xmm3\n" /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */
"shufps $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */ "shufps $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */
...@@ -193,12 +175,11 @@ static void fft_8_sse (complex_t *x) ...@@ -193,12 +175,11 @@ static void fft_8_sse (complex_t *x)
"movlhps %%xmm6, %%xmm1\n" /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ "movlhps %%xmm6, %%xmm1\n" /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
"shufps $0xe4, %%xmm6, %%xmm5\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */ "shufps $0xe4, %%xmm6, %%xmm5\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
"movaps %%xmm1, %%xmm3\n" /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ "movaps %%xmm1, %%xmm3\n" /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
"movl $C_1_sse, %%ebx\n"
"addps %%xmm5, %%xmm1\n" /* u */ "addps %%xmm5, %%xmm1\n" /* u */
"subps %%xmm5, %%xmm3\n" /* v */ "subps %%xmm5, %%xmm3\n" /* v */
"movaps %%xmm0, %%xmm2\n" /* yb */ "movaps %%xmm0, %%xmm2\n" /* yb */
"movaps %%xmm7, %%xmm4\n" /* yt */ "movaps %%xmm7, %%xmm4\n" /* yt */
"movups (%%ebx), %%xmm5\n" "movaps (%%edx), %%xmm5\n"
"mulps %%xmm5, %%xmm3\n" "mulps %%xmm5, %%xmm3\n"
"addps %%xmm1, %%xmm0\n" /* yt + u */ "addps %%xmm1, %%xmm0\n" /* yt + u */
"subps %%xmm1, %%xmm2\n" /* yt - u */ "subps %%xmm1, %%xmm2\n" /* yt - u */
...@@ -210,17 +191,16 @@ static void fft_8_sse (complex_t *x) ...@@ -210,17 +191,16 @@ static void fft_8_sse (complex_t *x)
"movaps %%xmm4, 16(%%eax)\n" "movaps %%xmm4, 16(%%eax)\n"
"movaps %%xmm7, 48(%%eax)\n" "movaps %%xmm7, 48(%%eax)\n"
"popl %%ebx\n"
: "=a" (x) : "=a" (x)
: "a" (x)); : "a" (x), "c" (hsqrt2_sse), "d" (C_1_sse));
} }
static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
const complex_t *d, const complex_t *d_3) const complex_t *d, const complex_t *d_3)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n" ".align 16\n"
"pushl %%esp\n"
"pushl %%ebp\n" "pushl %%ebp\n"
"movl %%esp, %%ebp\n" "movl %%esp, %%ebp\n"
...@@ -256,14 +236,13 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, ...@@ -256,14 +236,13 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
"movhlps %%xmm5, %%xmm7\n" /* wT[1].im * d[1].im | wT[1].re * d[1].im */ "movhlps %%xmm5, %%xmm7\n" /* wT[1].im * d[1].im | wT[1].re * d[1].im */
"movlhps %%xmm6, %%xmm5\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */ "movlhps %%xmm6, %%xmm5\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
"shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */ "shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
"movl $C_1_sse, %%ebx\n"
"movaps (%%ebx), %%xmm4\n" "movaps (%%ebx), %%xmm4\n"
"mulps %%xmm4, %%xmm7\n" "mulps %%xmm4, %%xmm7\n"
"addps %%xmm7, %%xmm5\n" /* wB[1] * d3[1] | wT[1] * d[1] */ "addps %%xmm7, %%xmm5\n" /* wB[1] * d3[1] | wT[1] * d[1] */
"movlhps %%xmm5, %%xmm1\n" /* d[1] * wT[1] | wT[0] */ "movlhps %%xmm5, %%xmm1\n" /* d[1] * wT[1] | wT[0] */
"shufps $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */ "shufps $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */
"movaps %%xmm1, %%xmm3\n" /* d[1] * wT[1] | wT[0] */ "movaps %%xmm1, %%xmm3\n" /* d[1] * wT[1] | wT[0] */
"leal (%%eax, %%ecx, 2), %%ebx\n" "leal (%%eax, %%ecx, 2), %%esp\n"
"addps %%xmm2, %%xmm1\n" /* u */ "addps %%xmm2, %%xmm1\n" /* u */
"subps %%xmm2, %%xmm3\n" /* v */ "subps %%xmm2, %%xmm3\n" /* v */
"mulps %%xmm4, %%xmm3\n" "mulps %%xmm4, %%xmm3\n"
...@@ -276,9 +255,9 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, ...@@ -276,9 +255,9 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
"addps %%xmm3, %%xmm5\n" "addps %%xmm3, %%xmm5\n"
"subps %%xmm3, %%xmm6\n" "subps %%xmm3, %%xmm6\n"
"movaps %%xmm0, (%%eax)\n" "movaps %%xmm0, (%%eax)\n"
"movaps %%xmm2, (%%ebx)\n" "movaps %%xmm2, (%%esp)\n"
"movaps %%xmm5, (%%eax, %%ecx)\n" "movaps %%xmm5, (%%eax, %%ecx)\n"
"movaps %%xmm6, (%%ebx, %%ecx)\n" "movaps %%xmm6, (%%esp, %%ecx)\n"
"addl $16, %%eax\n" "addl $16, %%eax\n"
"addl $16, %%edi\n" "addl $16, %%edi\n"
"addl $8, %%edx\n" "addl $8, %%edx\n"
...@@ -317,7 +296,6 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, ...@@ -317,7 +296,6 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
"mulps %%xmm5, %%xmm4\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */ "mulps %%xmm5, %%xmm4\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
"mulps %%xmm7, %%xmm6\n" /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */ "mulps %%xmm7, %%xmm6\n" /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
"shufps $0xb1, %%xmm2, %%xmm1\n" /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */ "shufps $0xb1, %%xmm2, %%xmm1\n" /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
"movl $C_1_sse, %%ebx\n"
"movaps (%%ebx), %%xmm3\n" /* 1.0 | -1.0 | 1.0 | -1.0 */ "movaps (%%ebx), %%xmm3\n" /* 1.0 | -1.0 | 1.0 | -1.0 */
"movhlps %%xmm4, %%xmm5\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */ "movhlps %%xmm4, %%xmm5\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
...@@ -333,7 +311,7 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, ...@@ -333,7 +311,7 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
"addps %%xmm4, %%xmm0\n" /* u */ "addps %%xmm4, %%xmm0\n" /* u */
"subps %%xmm4, %%xmm1\n" /* v */ "subps %%xmm4, %%xmm1\n" /* v */
"movaps (%%eax), %%xmm6\n" /* x[1] | x[0] */ "movaps (%%eax), %%xmm6\n" /* x[1] | x[0] */
"leal (%%eax, %%ecx, 2), %%ebx\n" "leal (%%eax, %%ecx, 2), %%esp\n"
"mulps %%xmm3, %%xmm1\n" "mulps %%xmm3, %%xmm1\n"
"addl $16, %%edi\n" "addl $16, %%edi\n"
"addl $16, %%esi\n" "addl $16, %%esi\n"
...@@ -344,12 +322,12 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, ...@@ -344,12 +322,12 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
"addps %%xmm0, %%xmm6\n" "addps %%xmm0, %%xmm6\n"
"subps %%xmm0, %%xmm2\n" "subps %%xmm0, %%xmm2\n"
"movaps %%xmm6, (%%eax)\n" "movaps %%xmm6, (%%eax)\n"
"movaps %%xmm2, (%%ebx)\n" "movaps %%xmm2, (%%esp)\n"
"addps %%xmm1, %%xmm7\n" "addps %%xmm1, %%xmm7\n"
"subps %%xmm1, %%xmm4\n" "subps %%xmm1, %%xmm4\n"
"addl $16, %%edx\n" "addl $16, %%edx\n"
"movaps %%xmm7, (%%eax, %%ecx)\n" "movaps %%xmm7, (%%eax, %%ecx)\n"
"movaps %%xmm4, (%%ebx, %%ecx)\n" "movaps %%xmm4, (%%esp, %%ecx)\n"
"addl $16, %%eax\n" "addl $16, %%eax\n"
"decl -4(%%ebp)\n" "decl -4(%%ebp)\n"
...@@ -367,7 +345,7 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, ...@@ -367,7 +345,7 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
"addl $4, %%esp\n" "addl $4, %%esp\n"
"leave\n" "leave\n"
"popl %%esp\n"
: "=c" (k), "=a" (x), "=D" (wTB) : "=c" (k), "=a" (x), "=D" (wTB)
: "c" (k), "a" (x), "D" (wTB), "d" (d), "S" (d_3)); : "c" (k), "a" (x), "D" (wTB), "d" (d), "S" (d_3), "b" (C_1_sse) );
} }
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* imdctsse.c : accelerated SSE IMDCT module * imdctsse.c : accelerated SSE IMDCT module
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: imdctsse.c,v 1.6 2001/07/11 02:01:04 sam Exp $ * $Id: imdctsse.c,v 1.7 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Gal Hendryckx <jimmy@via.ecp.fr> * Authors: Gal Hendryckx <jimmy@via.ecp.fr>
* *
...@@ -93,6 +93,9 @@ static void imdct_getfunctions( function_list_t * p_function_list ) ...@@ -93,6 +93,9 @@ static void imdct_getfunctions( function_list_t * p_function_list )
*****************************************************************************/ *****************************************************************************/
static int imdct_Probe( probedata_t *p_data ) static int imdct_Probe( probedata_t *p_data )
{ {
#if defined ( __MINGW32__ )
return 0;
#else
if( !TestCPU( CPU_CAPABILITY_SSE ) ) if( !TestCPU( CPU_CAPABILITY_SSE ) )
{ {
return( 0 ); return( 0 );
...@@ -106,5 +109,6 @@ static int imdct_Probe( probedata_t *p_data ) ...@@ -106,5 +109,6 @@ static int imdct_Probe( probedata_t *p_data )
/* This plugin always works */ /* This plugin always works */
return( 200 ); return( 200 );
#endif
} }
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder.c: core ac3 decoder * ac3_decoder.c: core ac3 decoder
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.c,v 1.34 2001/05/15 16:19:42 sam Exp $ * $Id: ac3_decoder.c,v 1.35 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Michel Lespinasse <walken@zoy.org> * Michel Lespinasse <walken@zoy.org>
...@@ -55,7 +55,7 @@ static const float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 }; ...@@ -55,7 +55,7 @@ static const float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
int ac3_init (ac3dec_t * p_ac3dec) int ac3_init (ac3dec_t * p_ac3dec)
{ {
p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */ p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */
imdct_init(&p_ac3dec->imdct); imdct_init(p_ac3dec->imdct);
return 0; return 0;
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder.h : ac3 decoder interface * ac3_decoder.h : ac3 decoder interface
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.h,v 1.11 2001/07/08 23:15:11 reno Exp $ * $Id: ac3_decoder.h,v 1.12 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org> * Renaud Dartus <reno@videolan.org>
...@@ -354,8 +354,8 @@ typedef struct mantissa_s ...@@ -354,8 +354,8 @@ typedef struct mantissa_s
struct ac3dec_s struct ac3dec_s
{ {
float samples[6][256] __attribute__ ((aligned(16))); float * samples;
imdct_t imdct __attribute__ ((aligned(16))); imdct_t * imdct;
/* /*
* Input properties * Input properties
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder_thread.c: ac3 decoder thread * ac3_decoder_thread.c: ac3 decoder thread
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder_thread.c,v 1.37 2001/09/05 16:46:10 massiot Exp $ * $Id: ac3_decoder_thread.c,v 1.38 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Lespinasse <walken@zoy.org> * Authors: Michel Lespinasse <walken@zoy.org>
* *
...@@ -78,18 +78,11 @@ static void BitstreamCallback ( bit_stream_t *p_bit_stream, ...@@ -78,18 +78,11 @@ static void BitstreamCallback ( bit_stream_t *p_bit_stream,
vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config ) vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
{ {
ac3dec_thread_t * p_ac3thread; ac3dec_thread_t * p_ac3thread;
ac3dec_thread_t * p_ac3thread_temp;
intf_DbgMsg( "ac3dec debug: creating ac3 decoder thread" ); intf_DbgMsg( "ac3dec debug: creating ac3 decoder thread" );
/* Allocate the memory needed to store the thread's structure */ /* Allocate the memory needed to store the thread's structure */
p_ac3thread_temp = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t) + 15); p_ac3thread = (ac3dec_thread_t *)memalign(16, sizeof(ac3dec_thread_t));
memset( p_ac3thread_temp, 0, sizeof(ac3dec_thread_t) + 15 );
/* We need to be 16 bytes aligned */
p_ac3thread = (ac3dec_thread_t *)(((unsigned long)p_ac3thread_temp + 15)
& ~0xFUL );
p_ac3thread->ac3thread = p_ac3thread_temp;
if(p_ac3thread == NULL) if(p_ac3thread == NULL)
{ {
...@@ -103,17 +96,19 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config ) ...@@ -103,17 +96,19 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
*/ */
p_ac3thread->p_config = p_config; p_ac3thread->p_config = p_config;
p_ac3thread->p_fifo = p_config->decoder_config.p_decoder_fifo; p_ac3thread->p_fifo = p_config->decoder_config.p_decoder_fifo;
p_ac3thread->ac3_decoder = memalign(16, sizeof(ac3dec_t));
/* /*
* Choose the best downmix module * Choose the best downmix module
*/ */
#define DOWNMIX p_ac3thread->ac3_decoder.downmix #define DOWNMIX p_ac3thread->ac3_decoder->downmix
DOWNMIX.p_module = module_Need( MODULE_CAPABILITY_DOWNMIX, NULL ); DOWNMIX.p_module = module_Need( MODULE_CAPABILITY_DOWNMIX, NULL );
if( DOWNMIX.p_module == NULL ) if( DOWNMIX.p_module == NULL )
{ {
intf_ErrMsg( "ac3dec error: no suitable downmix module" ); intf_ErrMsg( "ac3dec error: no suitable downmix module" );
free( p_ac3thread->ac3thread ); free( p_ac3thread->ac3_decoder );
free( p_ac3thread );
return( 0 ); return( 0 );
} }
...@@ -131,28 +126,49 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config ) ...@@ -131,28 +126,49 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
/* /*
* Choose the best IMDCT module * Choose the best IMDCT module
*/ */
#define IMDCT p_ac3thread->ac3_decoder.imdct p_ac3thread->ac3_decoder->imdct = memalign(16, sizeof(imdct_t));
IMDCT.p_module = module_Need( MODULE_CAPABILITY_IMDCT, NULL );
#define IMDCT p_ac3thread->ac3_decoder->imdct
IMDCT->p_module = module_Need( MODULE_CAPABILITY_IMDCT, NULL );
if( IMDCT.p_module == NULL ) if( IMDCT->p_module == NULL )
{ {
intf_ErrMsg( "ac3dec error: no suitable IMDCT module" ); intf_ErrMsg( "ac3dec error: no suitable IMDCT module" );
module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module ); module_Unneed( p_ac3thread->ac3_decoder->downmix.p_module );
free( p_ac3thread->ac3thread ); free( p_ac3thread->ac3_decoder->imdct );
free( p_ac3thread->ac3_decoder );
free( p_ac3thread );
return( 0 ); return( 0 );
} }
#define F IMDCT.p_module->p_functions->imdct.functions.imdct #define F IMDCT->p_module->p_functions->imdct.functions.imdct
IMDCT.pf_imdct_init = F.pf_imdct_init; IMDCT->pf_imdct_init = F.pf_imdct_init;
IMDCT.pf_imdct_256 = F.pf_imdct_256; IMDCT->pf_imdct_256 = F.pf_imdct_256;
IMDCT.pf_imdct_256_nol = F.pf_imdct_256_nol; IMDCT->pf_imdct_256_nol = F.pf_imdct_256_nol;
IMDCT.pf_imdct_512 = F.pf_imdct_512; IMDCT->pf_imdct_512 = F.pf_imdct_512;
IMDCT.pf_imdct_512_nol = F.pf_imdct_512_nol; IMDCT->pf_imdct_512_nol = F.pf_imdct_512_nol;
#undef F #undef F
#undef IMDCT #undef IMDCT
/* Initialize the ac3 decoder structures */ /* Initialize the ac3 decoder structures */
ac3_init (&p_ac3thread->ac3_decoder); p_ac3thread->ac3_decoder->samples = memalign(16, 6 * 256 * sizeof(float));
p_ac3thread->ac3_decoder->imdct->buf = memalign(16, N/4 * sizeof(complex_t));
p_ac3thread->ac3_decoder->imdct->delay = memalign(16, 6 * 256 * sizeof(float));
p_ac3thread->ac3_decoder->imdct->delay1 = memalign(16, 6 * 256 * sizeof(float));
p_ac3thread->ac3_decoder->imdct->xcos1 = memalign(16, N/4 * sizeof(float));
p_ac3thread->ac3_decoder->imdct->xsin1 = memalign(16, N/4 * sizeof(float));
p_ac3thread->ac3_decoder->imdct->xcos2 = memalign(16, N/8 * sizeof(float));
p_ac3thread->ac3_decoder->imdct->xsin2 = memalign(16, N/8 * sizeof(float));
p_ac3thread->ac3_decoder->imdct->xcos_sin_sse = memalign(16, 128 * 4 * sizeof(float));
p_ac3thread->ac3_decoder->imdct->w_2 = memalign(16, 2 * sizeof(complex_t));
p_ac3thread->ac3_decoder->imdct->w_4 = memalign(16, 4 * sizeof(complex_t));
p_ac3thread->ac3_decoder->imdct->w_8 = memalign(16, 8 * sizeof(complex_t));
p_ac3thread->ac3_decoder->imdct->w_16 = memalign(16, 16 * sizeof(complex_t));
p_ac3thread->ac3_decoder->imdct->w_32 = memalign(16, 32 * sizeof(complex_t));
p_ac3thread->ac3_decoder->imdct->w_64 = memalign(16, 64 * sizeof(complex_t));
p_ac3thread->ac3_decoder->imdct->w_1 = memalign(16, sizeof(complex_t));
ac3_init (p_ac3thread->ac3_decoder);
/* /*
* Initialize the output properties * Initialize the output properties
...@@ -164,9 +180,27 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config ) ...@@ -164,9 +180,27 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
(vlc_thread_func_t)RunThread, (void *)p_ac3thread)) (vlc_thread_func_t)RunThread, (void *)p_ac3thread))
{ {
intf_ErrMsg( "ac3dec error: can't spawn ac3 decoder thread" ); intf_ErrMsg( "ac3dec error: can't spawn ac3 decoder thread" );
module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module ); module_Unneed( p_ac3thread->ac3_decoder->downmix.p_module );
module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module ); module_Unneed( p_ac3thread->ac3_decoder->imdct->p_module );
free (p_ac3thread->ac3thread); free( p_ac3thread->ac3_decoder->imdct->w_1 );
free( p_ac3thread->ac3_decoder->imdct->w_64 );
free( p_ac3thread->ac3_decoder->imdct->w_32 );
free( p_ac3thread->ac3_decoder->imdct->w_16 );
free( p_ac3thread->ac3_decoder->imdct->w_8 );
free( p_ac3thread->ac3_decoder->imdct->w_4 );
free( p_ac3thread->ac3_decoder->imdct->w_2 );
free( p_ac3thread->ac3_decoder->imdct->xcos_sin_sse );
free( p_ac3thread->ac3_decoder->imdct->xsin2 );
free( p_ac3thread->ac3_decoder->imdct->xcos2 );
free( p_ac3thread->ac3_decoder->imdct->xsin1 );
free( p_ac3thread->ac3_decoder->imdct->xcos1 );
free( p_ac3thread->ac3_decoder->imdct->delay1 );
free( p_ac3thread->ac3_decoder->imdct->delay );
free( p_ac3thread->ac3_decoder->imdct->buf );
free( p_ac3thread->ac3_decoder->samples );
free( p_ac3thread->ac3_decoder->imdct );
free( p_ac3thread->ac3_decoder );
free( p_ac3thread );
return 0; return 0;
} }
...@@ -184,7 +218,7 @@ static int InitThread (ac3dec_thread_t * p_ac3thread) ...@@ -184,7 +218,7 @@ static int InitThread (ac3dec_thread_t * p_ac3thread)
intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3thread); intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3thread);
p_ac3thread->p_config->decoder_config.pf_init_bit_stream( p_ac3thread->p_config->decoder_config.pf_init_bit_stream(
&p_ac3thread->ac3_decoder.bit_stream, &p_ac3thread->ac3_decoder->bit_stream,
p_ac3thread->p_config->decoder_config.p_decoder_fifo, p_ac3thread->p_config->decoder_config.p_decoder_fifo,
BitstreamCallback, (void *) p_ac3thread ); BitstreamCallback, (void *) p_ac3thread );
...@@ -228,7 +262,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread) ...@@ -228,7 +262,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
if (!sync) { if (!sync) {
do { do {
GetBits(&p_ac3thread->ac3_decoder.bit_stream,8); GetBits(&p_ac3thread->ac3_decoder->bit_stream,8);
} while ((!p_ac3thread->sync_ptr) && (!p_ac3thread->p_fifo->b_die) } while ((!p_ac3thread->sync_ptr) && (!p_ac3thread->p_fifo->b_die)
&& (!p_ac3thread->p_fifo->b_error)); && (!p_ac3thread->p_fifo->b_error));
...@@ -237,7 +271,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread) ...@@ -237,7 +271,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
while(ptr-- && (!p_ac3thread->p_fifo->b_die) while(ptr-- && (!p_ac3thread->p_fifo->b_die)
&& (!p_ac3thread->p_fifo->b_error)) && (!p_ac3thread->p_fifo->b_error))
{ {
p_ac3thread->ac3_decoder.bit_stream.p_byte++; p_ac3thread->ac3_decoder->bit_stream.p_byte++;
} }
/* we are in sync now */ /* we are in sync now */
...@@ -254,7 +288,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread) ...@@ -254,7 +288,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
LAST_MDATE; LAST_MDATE;
} }
if (ac3_sync_frame (&p_ac3thread->ac3_decoder, &sync_info)) if (ac3_sync_frame (p_ac3thread->ac3_decoder, &sync_info))
{ {
sync = 0; sync = 0;
goto bad_frame; goto bad_frame;
...@@ -265,7 +299,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread) ...@@ -265,7 +299,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
buffer = ((s16 *)p_ac3thread->p_aout_fifo->buffer) + buffer = ((s16 *)p_ac3thread->p_aout_fifo->buffer) +
(p_ac3thread->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE); (p_ac3thread->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE);
if (ac3_decode_frame (&p_ac3thread->ac3_decoder, buffer)) if (ac3_decode_frame (p_ac3thread->ac3_decoder, buffer))
{ {
sync = 0; sync = 0;
goto bad_frame; goto bad_frame;
...@@ -278,7 +312,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread) ...@@ -278,7 +312,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
vlc_mutex_unlock (&p_ac3thread->p_aout_fifo->data_lock); vlc_mutex_unlock (&p_ac3thread->p_aout_fifo->data_lock);
bad_frame: bad_frame:
RealignBits(&p_ac3thread->ac3_decoder.bit_stream); RealignBits(&p_ac3thread->ac3_decoder->bit_stream);
} }
/* If b_error is set, the ac3 decoder thread enters the error loop */ /* If b_error is set, the ac3 decoder thread enters the error loop */
...@@ -339,12 +373,30 @@ static void EndThread (ac3dec_thread_t * p_ac3thread) ...@@ -339,12 +373,30 @@ static void EndThread (ac3dec_thread_t * p_ac3thread)
} }
/* Unlock the modules */ /* Unlock the modules */
module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module ); module_Unneed( p_ac3thread->ac3_decoder->downmix.p_module );
module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module ); module_Unneed( p_ac3thread->ac3_decoder->imdct->p_module );
/* Destroy descriptor */ /* Destroy descriptor */
free( p_ac3thread->ac3_decoder->imdct->w_1 );
free( p_ac3thread->ac3_decoder->imdct->w_64 );
free( p_ac3thread->ac3_decoder->imdct->w_32 );
free( p_ac3thread->ac3_decoder->imdct->w_16 );
free( p_ac3thread->ac3_decoder->imdct->w_8 );
free( p_ac3thread->ac3_decoder->imdct->w_4 );
free( p_ac3thread->ac3_decoder->imdct->w_2 );
free( p_ac3thread->ac3_decoder->imdct->xcos_sin_sse );
free( p_ac3thread->ac3_decoder->imdct->xsin2 );
free( p_ac3thread->ac3_decoder->imdct->xcos2 );
free( p_ac3thread->ac3_decoder->imdct->xsin1 );
free( p_ac3thread->ac3_decoder->imdct->xcos1 );
free( p_ac3thread->ac3_decoder->imdct->delay1 );
free( p_ac3thread->ac3_decoder->imdct->delay );
free( p_ac3thread->ac3_decoder->imdct->buf );
free( p_ac3thread->ac3_decoder->samples );
free( p_ac3thread->ac3_decoder->imdct );
free( p_ac3thread->ac3_decoder );
free( p_ac3thread->p_config ); free( p_ac3thread->p_config );
free( p_ac3thread->ac3thread ); free( p_ac3thread );
intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3thread); intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3thread);
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder_thread.h : ac3 decoder thread interface * ac3_decoder_thread.h : ac3 decoder thread interface
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder_thread.h,v 1.9 2001/08/05 15:32:46 gbazin Exp $ * $Id: ac3_decoder_thread.h,v 1.10 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* *
...@@ -30,9 +30,7 @@ typedef struct ac3dec_thread_s ...@@ -30,9 +30,7 @@ typedef struct ac3dec_thread_s
/* /*
* Decoder properties * Decoder properties
*/ */
float used_for_alignement1; ac3dec_t * ac3_decoder;
float used_for_alignement2;
ac3dec_t ac3_decoder __attribute__ ((aligned(16)));
/* /*
* Thread properties * Thread properties
...@@ -51,8 +49,6 @@ typedef struct ac3dec_thread_s ...@@ -51,8 +49,6 @@ typedef struct ac3dec_thread_s
*/ */
aout_fifo_t * p_aout_fifo; /* stores the decompressed audio frames */ aout_fifo_t * p_aout_fifo; /* stores the decompressed audio frames */
struct ac3dec_thread_s * ac3thread; /* save the old pointer */
} ac3dec_thread_t; } ac3dec_thread_t;
/***************************************************************************** /*****************************************************************************
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_imdct.c: ac3 DCT * ac3_imdct.c: ac3 DCT
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct.c,v 1.21 2001/05/16 14:51:29 reno Exp $ * $Id: ac3_imdct.c,v 1.22 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -76,11 +76,11 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -76,11 +76,11 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
/* test if dm in frequency is doable */ /* test if dm in frequency is doable */
if (!(doable = p_ac3dec->audblk.blksw[0])) if (!(doable = p_ac3dec->audblk.blksw[0]))
{ {
do_imdct = p_ac3dec->imdct.pf_imdct_512; do_imdct = p_ac3dec->imdct->pf_imdct_512;
} }
else else
{ {
do_imdct = p_ac3dec->imdct.pf_imdct_256; do_imdct = p_ac3dec->imdct->pf_imdct_256;
} }
/* downmix in the frequency domain if all the channels /* downmix in the frequency domain if all the channels
...@@ -100,28 +100,28 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -100,28 +100,28 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
switch(p_ac3dec->bsi.acmod) switch(p_ac3dec->bsi.acmod)
{ {
case 7: /* 3/2 */ case 7: /* 3/2 */
p_ac3dec->downmix.pf_downmix_3f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); p_ac3dec->downmix.pf_downmix_3f_2r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
break; break;
case 6: /* 2/2 */ case 6: /* 2/2 */
p_ac3dec->downmix.pf_downmix_2f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); p_ac3dec->downmix.pf_downmix_2f_2r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
break; break;
case 5: /* 3/1 */ case 5: /* 3/1 */
p_ac3dec->downmix.pf_downmix_3f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); p_ac3dec->downmix.pf_downmix_3f_1r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
break; break;
case 4: /* 2/1 */ case 4: /* 2/1 */
p_ac3dec->downmix.pf_downmix_2f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); p_ac3dec->downmix.pf_downmix_2f_1r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
break; break;
case 3: /* 3/0 */ case 3: /* 3/0 */
p_ac3dec->downmix.pf_downmix_3f_0r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); p_ac3dec->downmix.pf_downmix_3f_0r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
break; break;
case 2: case 2:
break; break;
default: /* 1/0 */ default: /* 1/0 */
// if (p_ac3dec->bsi.acmod == 1) // if (p_ac3dec->bsi.acmod == 1)
center = p_ac3dec->samples[0]; center = p_ac3dec->samples;
// else if (p_ac3dec->bsi.acmod == 0) // else if (p_ac3dec->bsi.acmod == 0)
// center = samples[ac3_config.dual_mono_ch_sel]; // center = samples[ac3_config.dual_mono_ch_sel];
do_imdct(&p_ac3dec->imdct, center, p_ac3dec->imdct.delay[0]); /* no downmix*/ do_imdct(p_ac3dec->imdct, center, p_ac3dec->imdct->delay); /* no downmix*/
p_ac3dec->downmix.pf_stream_sample_1ch_to_s16 (buffer, center); p_ac3dec->downmix.pf_stream_sample_1ch_to_s16 (buffer, center);
...@@ -129,9 +129,9 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -129,9 +129,9 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
break; break;
} }
do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[0], p_ac3dec->imdct.delay[0]); do_imdct (p_ac3dec->imdct, p_ac3dec->samples, p_ac3dec->imdct->delay);
do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[1], p_ac3dec->imdct.delay[1]); do_imdct (p_ac3dec->imdct, p_ac3dec->samples+256, p_ac3dec->imdct->delay+256);
p_ac3dec->downmix.pf_stream_sample_2ch_to_s16(buffer, p_ac3dec->samples[0], p_ac3dec->samples[1]); p_ac3dec->downmix.pf_stream_sample_2ch_to_s16(buffer, p_ac3dec->samples, p_ac3dec->samples+256);
} else { } else {
/* imdct and then downmix /* imdct and then downmix
...@@ -141,27 +141,27 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -141,27 +141,27 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
{ {
if (p_ac3dec->audblk.blksw[i]) if (p_ac3dec->audblk.blksw[i])
/* There is only a C function */ /* There is only a C function */
p_ac3dec->imdct.pf_imdct_256_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]); p_ac3dec->imdct->pf_imdct_256_nol (p_ac3dec->imdct, p_ac3dec->samples+256*i, p_ac3dec->imdct->delay1+256*i);
else else
p_ac3dec->imdct.pf_imdct_512_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]); p_ac3dec->imdct->pf_imdct_512_nol (p_ac3dec->imdct, p_ac3dec->samples+256*i, p_ac3dec->imdct->delay1+256*i);
} }
/* mix the sample, overlap */ /* mix the sample, overlap */
switch(p_ac3dec->bsi.acmod) switch(p_ac3dec->bsi.acmod)
{ {
case 7: /* 3/2 */ case 7: /* 3/2 */
left = p_ac3dec->samples[0]; left = p_ac3dec->samples;
center = p_ac3dec->samples[1]; center = p_ac3dec->samples+256;
right = p_ac3dec->samples[2]; right = p_ac3dec->samples+2*256;
left_sur = p_ac3dec->samples[3]; left_sur = p_ac3dec->samples+3*256;
right_sur = p_ac3dec->samples[4]; right_sur = p_ac3dec->samples+4*256;
delay_left = p_ac3dec->imdct.delay[0]; delay_left = p_ac3dec->imdct->delay;
delay_right = p_ac3dec->imdct.delay[1]; delay_right = p_ac3dec->imdct->delay+256;
delay1_left = p_ac3dec->imdct.delay1[0]; delay1_left = p_ac3dec->imdct->delay1;
delay1_center = p_ac3dec->imdct.delay1[1]; delay1_center = p_ac3dec->imdct->delay1+256;
delay1_right = p_ac3dec->imdct.delay1[2]; delay1_right = p_ac3dec->imdct->delay1+2*256;
delay1_sl = p_ac3dec->imdct.delay1[3]; delay1_sl = p_ac3dec->imdct->delay1+3*256;
delay1_sr = p_ac3dec->imdct.delay1[4]; delay1_sr = p_ac3dec->imdct->delay1+4*256;
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center + p_ac3dec->dm_par.slev * *left_sur++; left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center + p_ac3dec->dm_par.slev * *left_sur++;
...@@ -173,16 +173,16 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -173,16 +173,16 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
} }
break; break;
case 6: /* 2/2 */ case 6: /* 2/2 */
left = p_ac3dec->samples[0]; left = p_ac3dec->samples;
right = p_ac3dec->samples[1]; right = p_ac3dec->samples+256;
left_sur = p_ac3dec->samples[2]; left_sur = p_ac3dec->samples+2*256;
right_sur = p_ac3dec->samples[3]; right_sur = p_ac3dec->samples+3*256;
delay_left = p_ac3dec->imdct.delay[0]; delay_left = p_ac3dec->imdct->delay;
delay_right = p_ac3dec->imdct.delay[1]; delay_right = p_ac3dec->imdct->delay+256;
delay1_left = p_ac3dec->imdct.delay1[0]; delay1_left = p_ac3dec->imdct->delay1;
delay1_right = p_ac3dec->imdct.delay1[1]; delay1_right = p_ac3dec->imdct->delay1+256;
delay1_sl = p_ac3dec->imdct.delay1[2]; delay1_sl = p_ac3dec->imdct->delay1+2*256;
delay1_sr = p_ac3dec->imdct.delay1[3]; delay1_sr = p_ac3dec->imdct->delay1+3*256;
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.slev * *left_sur++; left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.slev * *left_sur++;
...@@ -194,16 +194,16 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -194,16 +194,16 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
} }
break; break;
case 5: /* 3/1 */ case 5: /* 3/1 */
left = p_ac3dec->samples[0]; left = p_ac3dec->samples;
center = p_ac3dec->samples[1]; center = p_ac3dec->samples+256;
right = p_ac3dec->samples[2]; right = p_ac3dec->samples+2*256;
right_sur = p_ac3dec->samples[3]; right_sur = p_ac3dec->samples+3*256;
delay_left = p_ac3dec->imdct.delay[0]; delay_left = p_ac3dec->imdct->delay;
delay_right = p_ac3dec->imdct.delay[1]; delay_right = p_ac3dec->imdct->delay+256;
delay1_left = p_ac3dec->imdct.delay1[0]; delay1_left = p_ac3dec->imdct->delay1;
delay1_center = p_ac3dec->imdct.delay1[1]; delay1_center = p_ac3dec->imdct->delay1+256;
delay1_right = p_ac3dec->imdct.delay1[2]; delay1_right = p_ac3dec->imdct->delay1+2*256;
delay1_sl = p_ac3dec->imdct.delay1[3]; delay1_sl = p_ac3dec->imdct->delay1+3*256;
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center - p_ac3dec->dm_par.slev * *right_sur; left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center - p_ac3dec->dm_par.slev * *right_sur;
...@@ -215,14 +215,14 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -215,14 +215,14 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
} }
break; break;
case 4: /* 2/1 */ case 4: /* 2/1 */
left = p_ac3dec->samples[0]; left = p_ac3dec->samples;
right = p_ac3dec->samples[1]; right = p_ac3dec->samples+256;
right_sur = p_ac3dec->samples[2]; right_sur = p_ac3dec->samples+2*256;
delay_left = p_ac3dec->imdct.delay[0]; delay_left = p_ac3dec->imdct->delay;
delay_right = p_ac3dec->imdct.delay[1]; delay_right = p_ac3dec->imdct->delay+256;
delay1_left = p_ac3dec->imdct.delay1[0]; delay1_left = p_ac3dec->imdct->delay1;
delay1_right = p_ac3dec->imdct.delay1[1]; delay1_right = p_ac3dec->imdct->delay1+256;
delay1_sl = p_ac3dec->imdct.delay1[2]; delay1_sl = p_ac3dec->imdct->delay1+2*256;
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur; left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur;
...@@ -234,14 +234,14 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -234,14 +234,14 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
} }
break; break;
case 3: /* 3/0 */ case 3: /* 3/0 */
left = p_ac3dec->samples[0]; left = p_ac3dec->samples;
center = p_ac3dec->samples[1]; center = p_ac3dec->samples+256;
right = p_ac3dec->samples[2]; right = p_ac3dec->samples+2*256;
delay_left = p_ac3dec->imdct.delay[0]; delay_left = p_ac3dec->imdct->delay;
delay_right = p_ac3dec->imdct.delay[1]; delay_right = p_ac3dec->imdct->delay+256;
delay1_left = p_ac3dec->imdct.delay1[0]; delay1_left = p_ac3dec->imdct->delay1;
delay1_center = p_ac3dec->imdct.delay1[1]; delay1_center = p_ac3dec->imdct->delay1+256;
delay1_right = p_ac3dec->imdct.delay1[2]; delay1_right = p_ac3dec->imdct->delay1+2*256;
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center; left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center;
...@@ -254,8 +254,8 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer) ...@@ -254,8 +254,8 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
break; break;
case 2: /* copy to output */ case 2: /* copy to output */
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
*buffer++ = (s16)p_ac3dec->samples[0][i]; *buffer++ = (s16) *(p_ac3dec->samples+i);
*buffer++ = (s16)p_ac3dec->samples[1][i]; *buffer++ = (s16) *(p_ac3dec->samples+256+i);
} }
break; break;
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_mantissa.c: ac3 mantissa computation * ac3_mantissa.c: ac3 mantissa computation
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_mantissa.c,v 1.29 2001/05/15 16:19:42 sam Exp $ * $Id: ac3_mantissa.c,v 1.30 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -59,7 +59,7 @@ void mantissa_unpack (ac3dec_t * p_ac3dec) ...@@ -59,7 +59,7 @@ void mantissa_unpack (ac3dec_t * p_ac3dec)
for (i=0; i< p_ac3dec->bsi.nfchans; i++) { for (i=0; i< p_ac3dec->bsi.nfchans; i++) {
for (j=0; j < p_ac3dec->audblk.endmant[i]; j++) for (j=0; j < p_ac3dec->audblk.endmant[i]; j++)
p_ac3dec->samples[i][j] = coeff_get_float(p_ac3dec, p_ac3dec->audblk.fbw_bap[i][j], *(p_ac3dec->samples+i*256+j) = coeff_get_float(p_ac3dec, p_ac3dec->audblk.fbw_bap[i][j],
p_ac3dec->audblk.dithflag[i], p_ac3dec->audblk.fbw_exp[i][j]); p_ac3dec->audblk.dithflag[i], p_ac3dec->audblk.fbw_exp[i][j]);
if (p_ac3dec->audblk.cplinu && p_ac3dec->audblk.chincpl[i] && !(done_cpl)) { if (p_ac3dec->audblk.cplinu && p_ac3dec->audblk.chincpl[i] && !(done_cpl)) {
...@@ -85,7 +85,7 @@ void mantissa_unpack (ac3dec_t * p_ac3dec) ...@@ -85,7 +85,7 @@ void mantissa_unpack (ac3dec_t * p_ac3dec)
if (p_ac3dec->bsi.lfeon) { if (p_ac3dec->bsi.lfeon) {
/* There are always 7 mantissas for lfe, no dither for lfe */ /* There are always 7 mantissas for lfe, no dither for lfe */
for (j=0; j < 7 ; j++) for (j=0; j < 7 ; j++)
p_ac3dec->samples[5][j] = coeff_get_float(p_ac3dec, p_ac3dec->audblk.lfe_bap[j], *(p_ac3dec->samples+5*256+j) = coeff_get_float(p_ac3dec, p_ac3dec->audblk.lfe_bap[j],
0, p_ac3dec->audblk.lfe_exp[j]); 0, p_ac3dec->audblk.lfe_exp[j]);
} }
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_mantissa.h: ac3 mantissa computation * ac3_mantissa.h: ac3 mantissa computation
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_mantissa.h,v 1.4 2001/05/15 16:19:42 sam Exp $ * $Id: ac3_mantissa.h,v 1.5 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -392,10 +392,10 @@ static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch) ...@@ -392,10 +392,10 @@ static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch)
* so the channels are uncorrelated */ * so the channels are uncorrelated */
if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i]) if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i])
{ {
p_ac3dec->samples[ch][i] = cpl_coord * dither_gen(&p_ac3dec->mantissa) * *(p_ac3dec->samples+ch*256+i) = cpl_coord * dither_gen(&p_ac3dec->mantissa) *
scale_factor[p_ac3dec->audblk.cpl_exp[i]]; scale_factor[p_ac3dec->audblk.cpl_exp[i]];
} else { } else {
p_ac3dec->samples[ch][i] = cpl_coord * p_ac3dec->audblk.cpl_flt[i]; *(p_ac3dec->samples+ch*256+i) = cpl_coord * p_ac3dec->audblk.cpl_flt[i];
} }
i++; i++;
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_rematrix.c: ac3 audio rematrixing * ac3_rematrix.c: ac3 audio rematrixing
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_rematrix.c,v 1.18 2001/05/15 16:19:42 sam Exp $ * $Id: ac3_rematrix.c,v 1.19 2001/10/30 19:34:53 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca> * Aaron Holtzman <aholtzma@engr.uvic.ca>
...@@ -78,10 +78,10 @@ void rematrix (ac3dec_t * p_ac3dec) ...@@ -78,10 +78,10 @@ void rematrix (ac3dec_t * p_ac3dec)
end = min_value(rematrix_band[i].end ,12 * p_ac3dec->audblk.cplbegf + 36); end = min_value(rematrix_band[i].end ,12 * p_ac3dec->audblk.cplbegf + 36);
for (j=start;j < end; j++) { for (j=start;j < end; j++) {
left = 0.5f * (p_ac3dec->samples[0][j] + p_ac3dec->samples[1][j]); left = 0.5f * ( *(p_ac3dec->samples+j) + *(p_ac3dec->samples+256+j) );
right = 0.5f * (p_ac3dec->samples[0][j] - p_ac3dec->samples[1][j]); right = 0.5f * ( *(p_ac3dec->samples+j) - *(p_ac3dec->samples+256+j) );
p_ac3dec->samples[0][j] = left; *(p_ac3dec->samples+j) = left;
p_ac3dec->samples[1][j] = right; *(p_ac3dec->samples+256+j) = right;
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment