Commit dee3179d authored by Renaud Dartus's avatar Renaud Dartus

* Alignement in asm functions

* 16 bytes alignement for data (need fo SSE)
* Optimization in SSE
parent 5b49dba8
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_imdct.h : AC3 IMDCT types * ac3_imdct.h : AC3 IMDCT types
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct.h,v 1.4 2001/06/12 00:30:41 reno Exp $ * $Id: ac3_imdct.h,v 1.5 2001/07/08 23:15:11 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org> * Renaud Dartus <reno@videolan.org>
...@@ -42,18 +42,19 @@ typedef struct imdct_s ...@@ -42,18 +42,19 @@ typedef struct imdct_s
float xsin1[N/4] __attribute__ ((aligned(16))); float xsin1[N/4] __attribute__ ((aligned(16)));
float xcos2[N/8] __attribute__ ((aligned(16))); float xcos2[N/8] __attribute__ ((aligned(16)));
float xsin2[N/8] __attribute__ ((aligned(16))); float xsin2[N/8] __attribute__ ((aligned(16)));
float xcos_sin_sse[128 * 4] __attribute__ ((aligned(16)));
/* Twiddle factor LUT */ /* Twiddle factor LUT */
complex_t *w[7] __attribute__ ((aligned(16)));
complex_t w_1[1] __attribute__ ((aligned(16))); complex_t w_1[1] __attribute__ ((aligned(16)));
float used_for_alignement1;
float used_for_alignement2;
complex_t w_2[2] __attribute__ ((aligned(16))); complex_t w_2[2] __attribute__ ((aligned(16)));
complex_t w_4[4] __attribute__ ((aligned(16))); complex_t w_4[4] __attribute__ ((aligned(16)));
complex_t w_8[8] __attribute__ ((aligned(16))); complex_t w_8[8] __attribute__ ((aligned(16)));
complex_t w_16[16] __attribute__ ((aligned(16))); complex_t w_16[16] __attribute__ ((aligned(16)));
complex_t w_32[32] __attribute__ ((aligned(16))); complex_t w_32[32] __attribute__ ((aligned(16)));
complex_t w_64[64] __attribute__ ((aligned(16))); complex_t w_64[64] __attribute__ ((aligned(16)));
complex_t *w[7] __attribute__ ((aligned(16)));
float xcos_sin_sse[128 * 4] __attribute__ ((aligned(16)));
/* Module used and shortcuts */ /* Module used and shortcuts */
struct module_s * p_module; struct module_s * p_module;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_downmix_3dn.c: accelerated 3D Now! ac3 downmix functions * ac3_downmix_3dn.c: accelerated 3D Now! ac3 downmix functions
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_downmix_3dn.c,v 1.3 2001/07/01 08:49:09 gbazin Exp $ * $Id: ac3_downmix_3dn.c,v 1.4 2001/07/08 23:15:11 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* *
...@@ -46,6 +46,7 @@ void sqrt2_3dn (void) ...@@ -46,6 +46,7 @@ void sqrt2_3dn (void)
void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par) void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebx\n" "pushl %%ebx\n"
"movl $128, %%ebx\n" /* loop counter */ "movl $128, %%ebx\n" /* loop counter */
...@@ -58,6 +59,7 @@ void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par) ...@@ -58,6 +59,7 @@ void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par)
"movd 8(%%ecx), %%mm7\n" /* slev */ "movd 8(%%ecx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */ "punpckldq %%mm7, %%mm7\n" /* slev | slev */
".align 16\n"
".loop:\n" ".loop:\n"
"movq (%%eax), %%mm0\n" /* left */ "movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */ "movq 2048(%%eax), %%mm1\n" /* right */
...@@ -90,6 +92,7 @@ void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par) ...@@ -90,6 +92,7 @@ void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par)
void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par) void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebx\n" "pushl %%ebx\n"
"movl $128, %%ebx\n" /* loop counter */ "movl $128, %%ebx\n" /* loop counter */
...@@ -99,6 +102,7 @@ void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par) ...@@ -99,6 +102,7 @@ void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par)
"movd 8(%%ecx), %%mm7\n" /* slev */ "movd 8(%%ecx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */ "punpckldq %%mm7, %%mm7\n" /* slev | slev */
".align 16\n"
".loop3:\n" ".loop3:\n"
"movq (%%eax), %%mm0\n" /* left */ "movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */ "movq 1024(%%eax), %%mm1\n" /* right */
...@@ -127,7 +131,7 @@ void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par) ...@@ -127,7 +131,7 @@ void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par)
void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebx\n" "pushl %%ebx\n"
"movl $128, %%ebx\n" /* loop counter */ "movl $128, %%ebx\n" /* loop counter */
...@@ -140,6 +144,7 @@ void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) ...@@ -140,6 +144,7 @@ void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
"movd 8(%%ecx), %%mm7\n" /* slev */ "movd 8(%%ecx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */ "punpckldq %%mm7, %%mm7\n" /* slev | slev */
".align 16\n"
".loop4:\n" ".loop4:\n"
"movq (%%eax), %%mm0\n" /* left */ "movq (%%eax), %%mm0\n" /* left */
"movq 2048(%%eax), %%mm1\n" /* right */ "movq 2048(%%eax), %%mm1\n" /* right */
...@@ -170,6 +175,7 @@ void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) ...@@ -170,6 +175,7 @@ void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebx\n" "pushl %%ebx\n"
"movl $128, %%ebx\n" /* loop counter */ "movl $128, %%ebx\n" /* loop counter */
...@@ -179,6 +185,7 @@ void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) ...@@ -179,6 +185,7 @@ void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
"movd 8(%%ecx), %%mm7\n" /* slev */ "movd 8(%%ecx), %%mm7\n" /* slev */
"punpckldq %%mm7, %%mm7\n" /* slev | slev */ "punpckldq %%mm7, %%mm7\n" /* slev | slev */
".align 16\n"
".loop5:\n" ".loop5:\n"
"movq (%%eax), %%mm0\n" /* left */ "movq (%%eax), %%mm0\n" /* left */
"movq 1024(%%eax), %%mm1\n" /* right */ "movq 1024(%%eax), %%mm1\n" /* right */
...@@ -205,6 +212,7 @@ void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) ...@@ -205,6 +212,7 @@ void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par) void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebx\n" "pushl %%ebx\n"
"movl $128, %%ebx\n" /* loop counter */ "movl $128, %%ebx\n" /* loop counter */
...@@ -214,6 +222,7 @@ void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par) ...@@ -214,6 +222,7 @@ void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par)
"movd 4(%%ecx), %%mm6\n" /* clev */ "movd 4(%%ecx), %%mm6\n" /* clev */
"punpckldq %%mm6, %%mm6\n" /* clev | clev */ "punpckldq %%mm6, %%mm6\n" /* clev | clev */
".align 16\n"
".loop6:\n" ".loop6:\n"
"movq (%%eax), %%mm0\n" /*left */ "movq (%%eax), %%mm0\n" /*left */
"movq 2048(%%eax), %%mm1\n" /* right */ "movq 2048(%%eax), %%mm1\n" /* right */
...@@ -240,6 +249,7 @@ void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par) ...@@ -240,6 +249,7 @@ void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par)
void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left) void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebx\n" "pushl %%ebx\n"
"pushl %%edx\n" "pushl %%edx\n"
...@@ -248,6 +258,7 @@ void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left) ...@@ -248,6 +258,7 @@ void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left)
"punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */ "punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */
"movl $128, %%ebx\n" "movl $128, %%ebx\n"
".align 16\n"
".loop2:\n" ".loop2:\n"
"movq (%%ecx), %%mm0\n" /* c1 | c0 */ "movq (%%ecx), %%mm0\n" /* c1 | c0 */
"pfmul %%mm7, %%mm0\n" "pfmul %%mm7, %%mm0\n"
...@@ -274,9 +285,11 @@ void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right ...@@ -274,9 +285,11 @@ void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebx\n" "pushl %%ebx\n"
"movl $128, %%ebx\n" "movl $128, %%ebx\n"
".align 16\n"
".loop1:\n" ".loop1:\n"
"movq (%%ecx), %%mm0\n" /* l1 | l0 */ "movq (%%ecx), %%mm0\n" /* l1 | l0 */
"movq (%%edx), %%mm1\n" /* r1 | r0 */ "movq (%%edx), %%mm1\n" /* r1 | r0 */
......
This diff is collapsed.
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_imdct_3dn.c: accelerated 3D Now! ac3 DCT * ac3_imdct_3dn.c: accelerated 3D Now! ac3 DCT
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_imdct_3dn.c,v 1.4 2001/06/03 12:47:21 sam Exp $ * $Id: ac3_imdct_3dn.c,v 1.5 2001/07/08 23:15:11 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* *
...@@ -89,6 +89,7 @@ void _M( imdct_do_512_nol ) (imdct_t * p_imdct, float data[], float delay[]) ...@@ -89,6 +89,7 @@ void _M( imdct_do_512_nol ) (imdct_t * p_imdct, float data[], float delay[])
static void imdct512_pre_ifft_twiddle_3dn (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse) static void imdct512_pre_ifft_twiddle_3dn (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebp\n" "pushl %%ebp\n"
"movl %%esp, %%ebp\n" "movl %%esp, %%ebp\n"
"addl $-4, %%esp\n" /* local variable, loop counter */ "addl $-4, %%esp\n" /* local variable, loop counter */
...@@ -106,6 +107,7 @@ static void imdct512_pre_ifft_twiddle_3dn (const int *pmt, complex_t *buf, float ...@@ -106,6 +107,7 @@ static void imdct512_pre_ifft_twiddle_3dn (const int *pmt, complex_t *buf, float
"movl 20(%%ebp), %%edx\n" /* xcos_sin_sse */ "movl 20(%%ebp), %%edx\n" /* xcos_sin_sse */
"movl $128, -4(%%ebp)\n" "movl $128, -4(%%ebp)\n"
".align 16\n"
".loop:\n" ".loop:\n"
"movl (%%eax), %%esi\n" "movl (%%eax), %%esi\n"
"movd (%%ecx, %%esi, 8), %%mm1\n" /* 2j */ "movd (%%ecx, %%esi, 8), %%mm1\n" /* 2j */
...@@ -147,9 +149,11 @@ static void imdct512_pre_ifft_twiddle_3dn (const int *pmt, complex_t *buf, float ...@@ -147,9 +149,11 @@ static void imdct512_pre_ifft_twiddle_3dn (const int *pmt, complex_t *buf, float
static void imdct512_post_ifft_twiddle_3dn (complex_t *buf, float *xcos_sin_sse) static void imdct512_post_ifft_twiddle_3dn (complex_t *buf, float *xcos_sin_sse)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebx\n" "pushl %%ebx\n"
"movl $64, %%ebx\n" /* loop counter */ "movl $64, %%ebx\n" /* loop counter */
".align 16\n"
".loop1:\n" ".loop1:\n"
"movq (%%eax), %%mm0\n" /* im0 | re0 */ "movq (%%eax), %%mm0\n" /* im0 | re0 */
"movq %%mm0, %%mm1\n" /* im0 | re0 */ "movq %%mm0, %%mm1\n" /* im0 | re0 */
...@@ -200,6 +204,7 @@ static void imdct512_post_ifft_twiddle_3dn (complex_t *buf, float *xcos_sin_sse) ...@@ -200,6 +204,7 @@ static void imdct512_post_ifft_twiddle_3dn (complex_t *buf, float *xcos_sin_sse)
static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt) static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebp\n" "pushl %%ebp\n"
"movl %%esp, %%ebp\n" "movl %%esp, %%ebp\n"
...@@ -219,6 +224,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w ...@@ -219,6 +224,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w
"leal 504(%%eax), %%edi\n" /* buf[63].re */ "leal 504(%%eax), %%edi\n" /* buf[63].re */
"movl 12(%%ebp), %%eax\n" /* data */ "movl 12(%%ebp), %%eax\n" /* data */
".align 16\n"
".first_128_samples:\n" ".first_128_samples:\n"
"movd (%%esi), %%mm0\n" /* im0 */ "movd (%%esi), %%mm0\n" /* im0 */
"movd 8(%%esi), %%mm2\n" /* im1 */ "movd 8(%%esi), %%mm2\n" /* im1 */
...@@ -258,6 +264,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w ...@@ -258,6 +264,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w
"leal 1020(%%esi), %%edi\n" /* buf[127].im */ "leal 1020(%%esi), %%edi\n" /* buf[127].im */
"movl $32, %%ecx\n" /* loop count */ "movl $32, %%ecx\n" /* loop count */
".align 16\n"
".second_128_samples:\n" ".second_128_samples:\n"
"movd (%%esi), %%mm0\n" /* buf[i].re */ "movd (%%esi), %%mm0\n" /* buf[i].re */
"movd 8(%%esi), %%mm2\n" /* re1 */ "movd 8(%%esi), %%mm2\n" /* re1 */
...@@ -302,6 +309,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w ...@@ -302,6 +309,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w
"movl $32, %%ecx\n" /* loop count */ "movl $32, %%ecx\n" /* loop count */
"movl 20(%%ebp), %%eax\n" /* delay */ "movl 20(%%ebp), %%eax\n" /* delay */
".align 16\n"
".first_128_delay:\n" ".first_128_delay:\n"
"movd (%%esi), %%mm0\n" /* re0 */ "movd (%%esi), %%mm0\n" /* re0 */
"movd 8(%%esi), %%mm2\n" /* re1 */ "movd 8(%%esi), %%mm2\n" /* re1 */
...@@ -339,6 +347,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w ...@@ -339,6 +347,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w
"leal 1016(%%ebx), %%edi\n" /* buf[127].re */ "leal 1016(%%ebx), %%edi\n" /* buf[127].re */
"movl $32, %%ecx\n" /* loop count */ "movl $32, %%ecx\n" /* loop count */
".align 16\n"
".second_128_delay:\n" ".second_128_delay:\n"
"movd (%%esi), %%mm0\n" /* im0 */ "movd (%%esi), %%mm0\n" /* im0 */
"movd 8(%%esi), %%mm2\n" /* im1 */ "movd 8(%%esi), %%mm2\n" /* im1 */
...@@ -386,6 +395,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w ...@@ -386,6 +395,7 @@ static void imdct512_window_delay_3dn (complex_t *buf, float *data_ptr, float *w
static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt) static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"pushl %%ebp\n" "pushl %%ebp\n"
"movl %%esp, %%ebp\n" "movl %%esp, %%ebp\n"
...@@ -405,6 +415,7 @@ static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, floa ...@@ -405,6 +415,7 @@ static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, floa
"leal 504(%%eax), %%edi\n" /* buf[63].re */ "leal 504(%%eax), %%edi\n" /* buf[63].re */
"movl 12(%%ebp), %%eax\n" /* data */ "movl 12(%%ebp), %%eax\n" /* data */
".align 16\n"
".first_128_samples2:\n" ".first_128_samples2:\n"
"movd (%%esi), %%mm0\n" /* im0 */ "movd (%%esi), %%mm0\n" /* im0 */
"movd 8(%%esi), %%mm2\n" /* im1 */ "movd 8(%%esi), %%mm2\n" /* im1 */
...@@ -439,6 +450,7 @@ static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, floa ...@@ -439,6 +450,7 @@ static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, floa
"leal 1020(%%esi), %%edi\n" /* buf[127].im */ "leal 1020(%%esi), %%edi\n" /* buf[127].im */
"movl $32, %%ecx\n" /* loop count */ "movl $32, %%ecx\n" /* loop count */
".align 16\n"
".second_128_samples2:\n" ".second_128_samples2:\n"
"movd (%%esi), %%mm0\n" /* buf[i].re */ "movd (%%esi), %%mm0\n" /* buf[i].re */
"movd 8(%%esi), %%mm2\n" /* re1 */ "movd 8(%%esi), %%mm2\n" /* re1 */
...@@ -478,6 +490,7 @@ static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, floa ...@@ -478,6 +490,7 @@ static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, floa
"movl $32, %%ecx\n" /* loop count */ "movl $32, %%ecx\n" /* loop count */
"movl 20(%%ebp), %%eax\n" /* delay */ "movl 20(%%ebp), %%eax\n" /* delay */
".align 16\n"
".first_128_delays:\n" ".first_128_delays:\n"
"movd (%%esi), %%mm0\n" /* re0 */ "movd (%%esi), %%mm0\n" /* re0 */
"movd 8(%%esi), %%mm2\n" /* re1 */ "movd 8(%%esi), %%mm2\n" /* re1 */
...@@ -515,6 +528,7 @@ static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, floa ...@@ -515,6 +528,7 @@ static void imdct512_window_delay_nol_3dn (complex_t *buf, float *data_ptr, floa
"leal 1016(%%ebx), %%edi\n" /* buf[127].re */ "leal 1016(%%ebx), %%edi\n" /* buf[127].re */
"movl $32, %%ecx\n" /* loop count */ "movl $32, %%ecx\n" /* loop count */
".align 16\n"
".second_128_delays:\n" ".second_128_delays:\n"
"movd (%%esi), %%mm0\n" /* im0 */ "movd (%%esi), %%mm0\n" /* im0 */
"movd 8(%%esi), %%mm2\n" /* im1 */ "movd 8(%%esi), %%mm2\n" /* im1 */
......
This diff is collapsed.
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_srfft_3dn.c: accelerated 3D Now! ac3 fft functions * ac3_srfft_3dn.c: accelerated 3D Now! ac3 fft functions
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN * Copyright (C) 1999, 2000, 2001 VideoLAN
* $Id: ac3_srfft_3dn.c,v 1.1 2001/05/16 14:51:29 reno Exp $ * $Id: ac3_srfft_3dn.c,v 1.2 2001/07/08 23:15:11 reno Exp $
* *
* Authors: Renaud Dartus <reno@videolan.org> * Authors: Renaud Dartus <reno@videolan.org>
* *
...@@ -126,6 +126,7 @@ void C_1_3dn (void) ...@@ -126,6 +126,7 @@ void C_1_3dn (void)
static void fft_4_3dn (complex_t *x) static void fft_4_3dn (complex_t *x)
{ {
__asm__ __volatile__ ( __asm__ __volatile__ (
".align 16\n"
"movq (%%eax), %%mm0\n" /* x[0] */ "movq (%%eax), %%mm0\n" /* x[0] */
"movq 8(%%eax), %%mm1\n" /* x[1] */ "movq 8(%%eax), %%mm1\n" /* x[1] */
"movq 16(%%eax), %%mm2\n" /* x[2] */ "movq 16(%%eax), %%mm2\n" /* x[2] */
......
This diff is collapsed.
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder.h : ac3 decoder interface * ac3_decoder.h : ac3 decoder interface
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder.h,v 1.10 2001/06/12 00:30:41 reno Exp $ * $Id: ac3_decoder.h,v 1.11 2001/07/08 23:15:11 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org> * Renaud Dartus <reno@videolan.org>
...@@ -354,6 +354,9 @@ typedef struct mantissa_s ...@@ -354,6 +354,9 @@ typedef struct mantissa_s
struct ac3dec_s struct ac3dec_s
{ {
float samples[6][256] __attribute__ ((aligned(16)));
imdct_t imdct __attribute__ ((aligned(16)));
/* /*
* Input properties * Input properties
*/ */
...@@ -370,12 +373,10 @@ struct ac3dec_s ...@@ -370,12 +373,10 @@ struct ac3dec_s
bsi_t bsi; bsi_t bsi;
audblk_t audblk; audblk_t audblk;
float samples[6][256] __attribute__ ((aligned(16)));
dm_par_t dm_par; dm_par_t dm_par;
bit_allocate_t bit_allocate; bit_allocate_t bit_allocate;
mantissa_t mantissa; mantissa_t mantissa;
imdct_t imdct;
downmix_t downmix; downmix_t downmix;
}; };
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder_thread.c: ac3 decoder thread * ac3_decoder_thread.c: ac3 decoder thread
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder_thread.c,v 1.34 2001/05/31 01:37:08 sam Exp $ * $Id: ac3_decoder_thread.c,v 1.35 2001/07/08 23:15:11 reno Exp $
* *
* Authors: Michel Lespinasse <walken@zoy.org> * Authors: Michel Lespinasse <walken@zoy.org>
* *
...@@ -82,7 +82,13 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config ) ...@@ -82,7 +82,13 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
intf_DbgMsg( "ac3dec debug: creating ac3 decoder thread" ); intf_DbgMsg( "ac3dec debug: creating ac3 decoder thread" );
/* Allocate the memory needed to store the thread's structure */ /* Allocate the memory needed to store the thread's structure */
if((p_ac3thread = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t)))==NULL) p_ac3thread = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t));
/* We need to be 16 bytes aligned */
p_ac3thread->ac3thread = (int)p_ac3thread & (-15);
p_ac3thread = (ac3dec_thread_t *)p_ac3thread->ac3thread;
if(p_ac3thread == NULL)
{ {
intf_ErrMsg ( "ac3dec error: not enough memory " intf_ErrMsg ( "ac3dec error: not enough memory "
"for ac3dec_CreateThread() to create the new thread"); "for ac3dec_CreateThread() to create the new thread");
...@@ -335,6 +341,7 @@ static void EndThread (ac3dec_thread_t * p_ac3thread) ...@@ -335,6 +341,7 @@ static void EndThread (ac3dec_thread_t * p_ac3thread)
/* Destroy descriptor */ /* Destroy descriptor */
free( p_ac3thread->p_config ); free( p_ac3thread->p_config );
p_ac3thread = (ac3dec_thread_t *)p_ac3thread->ac3thread;
free( p_ac3thread ); free( p_ac3thread );
intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3thread); intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3thread);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* ac3_decoder_thread.h : ac3 decoder thread interface * ac3_decoder_thread.h : ac3 decoder thread interface
***************************************************************************** *****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN * Copyright (C) 1999, 2000 VideoLAN
* $Id: ac3_decoder_thread.h,v 1.7 2001/05/14 15:58:03 reno Exp $ * $Id: ac3_decoder_thread.h,v 1.8 2001/07/08 23:15:11 reno Exp $
* *
* Authors: Michel Kaempf <maxx@via.ecp.fr> * Authors: Michel Kaempf <maxx@via.ecp.fr>
* *
...@@ -24,8 +24,16 @@ ...@@ -24,8 +24,16 @@
/***************************************************************************** /*****************************************************************************
* ac3dec_thread_t : ac3 decoder thread descriptor * ac3dec_thread_t : ac3 decoder thread descriptor
*****************************************************************************/ *****************************************************************************/
typedef struct ac3dec_thread_s typedef struct ac3dec_thread_s
{ {
/*
* Decoder properties
*/
float used_for_alignement1;
float used_for_alignement2;
ac3dec_t ac3_decoder __attribute__ ((aligned(16)));
/* /*
* Thread properties * Thread properties
*/ */
...@@ -38,16 +46,12 @@ typedef struct ac3dec_thread_s ...@@ -38,16 +46,12 @@ typedef struct ac3dec_thread_s
int sync_ptr; /* sync ptr from ac3 magic header */ int sync_ptr; /* sync ptr from ac3 magic header */
adec_config_t * p_config; adec_config_t * p_config;
/*
* Decoder properties
*/
ac3dec_t ac3_decoder;
/* /*
* Output properties * Output properties
*/ */
aout_fifo_t * p_aout_fifo; /* stores the decompressed audio frames */ aout_fifo_t * p_aout_fifo; /* stores the decompressed audio frames */
int ac3thread; /* save the old pointer */
} ac3dec_thread_t; } ac3dec_thread_t;
/***************************************************************************** /*****************************************************************************
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment