Drop unused args from vector_fmul_add_add, simpify code, and rename

The src3 and step arguments to vector_fmul_add_add() are always zero and one, respectively. This removes these arguments from the function, simplifies the code accordingly, and renames the function to better match the new operation. git-svn-id: file:///var/local/repositories/ffmpeg/trunk@20061 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b

Drop unused args from vector_fmul_add_add, simpify code, and rename
The src3 and step arguments to vector_fmul_add_add() are always zero and one, respectively. This removes these arguments from the function, simplifies the code accordingly, and renames the function to better match the new operation. git-svn-id: file:///var/local/repositories/ffmpeg/trunk@20061 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
b55353bb · mru · 880da8f4 · b55353bb · b55353bb · b55353bb
Commit b55353bb authored Sep 27, 2009 by mru
5 changed files
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -4068,10 +4068,10 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, const float *sr
        dst[i] = src0[i] * src1[-i];
 }

-void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){
+static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
    int i;
    for(i=0; i<len; i++)
-        dst[i*step] = src0[i] * src1[i] + src2[i] + src3;
+        dst[i] = src0[i] * src1[i] + src2[i];
 }

 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
@@ -4787,7 +4787,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 #endif
    c->vector_fmul = vector_fmul_c;
    c->vector_fmul_reverse = vector_fmul_reverse_c;
-    c->vector_fmul_add_add = ff_vector_fmul_add_add_c;
+    c->vector_fmul_add = vector_fmul_add_c;
    c->vector_fmul_window = ff_vector_fmul_window_c;
    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
    c->vector_clipf = vector_clipf_c;

--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -66,8 +66,6 @@ void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *bl
 void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);

-void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
-                              const float *src2, int src3, int blocksize, int step);
 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
                             const float *win, float add_bias, int len);
 void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
@@ -391,7 +389,7 @@ typedef struct DSPContext {
    void (*vector_fmul)(float *dst, const float *src, int len);
    void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
-    void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
+    void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */

--- a/libavcodec/ppc/float_altivec.c
+++ b/libavcodec/ppc/float_altivec.c
@@ -66,71 +66,15 @@ static void vector_fmul_reverse_altivec(float *dst, const float *src0,
    }
 }

-static void vector_fmul_add_add_altivec(float *dst, const float *src0,
+static void vector_fmul_add_altivec(float *dst, const float *src0,
                                    const float *src1, const float *src2,
-                                        int src3, int len, int step)
+                                    int len)
 {
    int i;
    vector float d, s0, s1, s2, t0, t1, edges;
    vector unsigned char align = vec_lvsr(0,dst),
                         mask = vec_lvsl(0, dst);

-#if 0 //FIXME: there is still something wrong
-    if (step == 2) {
-        int y;
-        vector float d0, d1, s3, t2;
-        vector unsigned int sel =
-                vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0));
-        t1 = vec_ld(16, dst);
-        for (i=0,y=0; i<len-3; i+=4,y+=8) {
-
-            s0 = vec_ld(0,src0+i);
-            s1 = vec_ld(0,src1+i);
-            s2 = vec_ld(0,src2+i);
-
-//          t0 = vec_ld(0, dst+y);  //[x x x|a]
-//          t1 = vec_ld(16, dst+y); //[b c d|e]
-            t2 = vec_ld(31, dst+y); //[f g h|x]
-
-            d = vec_madd(s0,s1,s2); // [A B C D]
-
-                                                 // [A A B B]
-
-                                                 // [C C D D]
-
-            d0 = vec_perm(t0, t1, mask); // [a b c d]
-
-            d0 = vec_sel(vec_mergeh(d, d), d0, sel);   // [A b B d]
-
-            edges = vec_perm(t1, t0, mask);
-
-            t0 = vec_perm(edges, d0, align); // [x x x|A]
-
-            t1 = vec_perm(d0, edges, align); // [b B d|e]
-
-            vec_stl(t0, 0, dst+y);
-
-            d1 = vec_perm(t1, t2, mask); // [e f g h]
-
-            d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h]
-
-            edges = vec_perm(t2, t1, mask);
-
-            t1 = vec_perm(edges, d1, align); // [b B d|C]
-
-            t2 = vec_perm(d1, edges, align); // [f D h|x]
-
-            vec_stl(t1, 16, dst+y);
-
-            t0 = t1;
-
-            vec_stl(t2, 31, dst+y);
-
-            t1 = t2;
-        }
-    } else
-    #endif
-    if (step == 1 && src3 == 0)
        for (i=0; i<len-3; i+=4) {
            t0 = vec_ld(0, dst+i);
            t1 = vec_ld(15, dst+i);
@@ -144,8 +88,6 @@ static void vector_fmul_add_add_altivec(float *dst, const float *src0,
            vec_st(t1, 15, dst+i);
            vec_st(t0, 0, dst+i);
        }
-    else
-        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
 }

 static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)
@@ -299,7 +241,7 @@ void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
 {
    c->vector_fmul = vector_fmul_altivec;
    c->vector_fmul_reverse = vector_fmul_reverse_altivec;
-    c->vector_fmul_add_add = vector_fmul_add_add_altivec;
+    c->vector_fmul_add = vector_fmul_add_altivec;
    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec;
    if(!(avctx->flags & CODEC_FLAG_BITEXACT)) {
        c->vector_fmul_window = vector_fmul_window_altivec;

--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -301,16 +301,16 @@ static void wma_window(WMACodecContext *s, float *out)
        block_len = s->block_len;
        bsize = s->frame_len_bits - s->block_len_bits;

-        s->dsp.vector_fmul_add_add(out, in, s->windows[bsize],
-                                   out, 0, block_len, 1);
+        s->dsp.vector_fmul_add(out, in, s->windows[bsize],
+                               out, block_len);

    } else {
        block_len = 1 << s->prev_block_len_bits;
        n = (s->block_len - block_len) / 2;
        bsize = s->frame_len_bits - s->prev_block_len_bits;

-        s->dsp.vector_fmul_add_add(out+n, in+n, s->windows[bsize],
-                                   out+n, 0, block_len, 1);
+        s->dsp.vector_fmul_add(out+n, in+n, s->windows[bsize],
+                               out+n, block_len);

        memcpy(out+n+block_len, in+n+block_len, n*sizeof(float));
    }

--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2125,34 +2125,9 @@ static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *
    );
 }

-static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float *src1,
-                                      const float *src2, int src3, int len, int step){
+static void vector_fmul_add_3dnow(float *dst, const float *src0, const float *src1,
+                                  const float *src2, int len){
    x86_reg i = (len-4)*4;
-    if(step == 2 && src3 == 0){
-        dst += (len-4)*2;
-        __asm__ volatile(
-            "1: \n\t"
-            "movq   (%2,%0),  %%mm0 \n\t"
-            "movq  8(%2,%0),  %%mm1 \n\t"
-            "pfmul  (%3,%0),  %%mm0 \n\t"
-            "pfmul 8(%3,%0),  %%mm1 \n\t"
-            "pfadd  (%4,%0),  %%mm0 \n\t"
-            "pfadd 8(%4,%0),  %%mm1 \n\t"
-            "movd     %%mm0,   (%1) \n\t"
-            "movd     %%mm1, 16(%1) \n\t"
-            "psrlq      $32,  %%mm0 \n\t"
-            "psrlq      $32,  %%mm1 \n\t"
-            "movd     %%mm0,  8(%1) \n\t"
-            "movd     %%mm1, 24(%1) \n\t"
-            "sub  $32, %1 \n\t"
-            "sub  $16, %0 \n\t"
-            "jge  1b \n\t"
-            :"+r"(i), "+r"(dst)
-            :"r"(src0), "r"(src1), "r"(src2)
-            :"memory"
-        );
-    }
-    else if(step == 1 && src3 == 0){
        __asm__ volatile(
            "1: \n\t"
            "movq    (%2,%0), %%mm0 \n\t"
@@ -2169,47 +2144,11 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float
            :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
            :"memory"
        );
-    }
-    else
-        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
    __asm__ volatile("femms");
 }
-static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
-                                    const float *src2, int src3, int len, int step){
+static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1,
+                                const float *src2, int len){
    x86_reg i = (len-8)*4;
-    if(step == 2 && src3 == 0){
-        dst += (len-8)*2;
-        __asm__ volatile(
-            "1: \n\t"
-            "movaps   (%2,%0), %%xmm0 \n\t"
-            "movaps 16(%2,%0), %%xmm1 \n\t"
-            "mulps    (%3,%0), %%xmm0 \n\t"
-            "mulps  16(%3,%0), %%xmm1 \n\t"
-            "addps    (%4,%0), %%xmm0 \n\t"
-            "addps  16(%4,%0), %%xmm1 \n\t"
-            "movss     %%xmm0,   (%1) \n\t"
-            "movss     %%xmm1, 32(%1) \n\t"
-            "movhlps   %%xmm0, %%xmm2 \n\t"
-            "movhlps   %%xmm1, %%xmm3 \n\t"
-            "movss     %%xmm2, 16(%1) \n\t"
-            "movss     %%xmm3, 48(%1) \n\t"
-            "shufps $0xb1, %%xmm0, %%xmm0 \n\t"
-            "shufps $0xb1, %%xmm1, %%xmm1 \n\t"
-            "movss     %%xmm0,  8(%1) \n\t"
-            "movss     %%xmm1, 40(%1) \n\t"
-            "movhlps   %%xmm0, %%xmm2 \n\t"
-            "movhlps   %%xmm1, %%xmm3 \n\t"
-            "movss     %%xmm2, 24(%1) \n\t"
-            "movss     %%xmm3, 56(%1) \n\t"
-            "sub  $64, %1 \n\t"
-            "sub  $32, %0 \n\t"
-            "jge  1b \n\t"
-            :"+r"(i), "+r"(dst)
-            :"r"(src0), "r"(src1), "r"(src2)
-            :"memory"
-        );
-    }
-    else if(step == 1 && src3 == 0){
        __asm__ volatile(
            "1: \n\t"
            "movaps   (%2,%0), %%xmm0 \n\t"
@@ -2226,9 +2165,6 @@ static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *
            :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
            :"memory"
        );
-    }
-    else
-        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
 }

 static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
@@ -3077,7 +3013,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
            c->ac3_downmix = ac3_downmix_sse;
            c->vector_fmul = vector_fmul_sse;
            c->vector_fmul_reverse = vector_fmul_reverse_sse;
-            c->vector_fmul_add_add = vector_fmul_add_add_sse;
+            c->vector_fmul_add = vector_fmul_add_sse;
            c->vector_fmul_window = vector_fmul_window_sse;
            c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
            c->vector_clipf = vector_clipf_sse;
@@ -3085,7 +3021,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
            c->float_to_int16_interleave = float_to_int16_interleave_sse;
        }
        if(mm_flags & FF_MM_3DNOW)
-            c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
+            c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse
        if(mm_flags & FF_MM_SSE2){
            c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
            c->float_to_int16 = float_to_int16_sse2;