cosmetics: Reformat PPC code in libavcodec according to style guidelines.

This includes indentation changes, comment reformatting, consistent brace placement and some prettyprinting. git-svn-id: file:///var/local/repositories/ffmpeg/trunk@14316 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b

cosmetics: Reformat PPC code in libavcodec according to style guidelines.
This includes indentation changes, comment reformatting, consistent brace placement and some prettyprinting. git-svn-id: file:///var/local/repositories/ffmpeg/trunk@14316 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
1665af73 · diego · 18d45891 · 1665af73 · 1665af73 · 1665af73
Commit 1665af73 authored Jul 20, 2008 by diego
12 changed files
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -96,10 +96,8 @@ void powerpc_display_perf_report(void)
 {
    int i, j;
    av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
-  for(i = 0 ; i < powerpc_perf_total ; i++)
+    for(i = 0 ; i < powerpc_perf_total ; i++) {
-  {
+        for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
-    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
-      {
            if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
                av_log(NULL, AV_LOG_INFO,
                       " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
@@ -117,28 +115,23 @@ void powerpc_display_perf_report(void)
 /* ***** WARNING ***** WARNING ***** WARNING ***** */
 /*
-  clear_blocks_dcbz32_ppc will not work properly
+clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
-  on PowerPC processors with a cache line size
+cache line size not equal to 32 bytes.
-  not equal to 32 bytes.
+Fortunately all processor used by Apple up to at least the 7450 (aka second
-  Fortunately all processor used by Apple up to
+generation G4) use 32 bytes cache line.
-  at least the 7450 (aka second generation G4)
+This is due to the use of the 'dcbz' instruction. It simply clear to zero a
-  use 32 bytes cache line.
+single cache line, so you need to know the cache line size to use it !
-  This is due to the use of the 'dcbz' instruction.
+It's absurd, but it's fast...
-  It simply clear to zero a single cache line,
-  so you need to know the cache line size to use it !
-  It's absurd, but it's fast...
-  update 24/06/2003 : Apple released yesterday the G5,
+update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
-  with a PPC970. cache line size : 128 bytes. Oups.
+size: 128 bytes. Oups.
-  The semantic of dcbz was changed, it always clear
+The semantic of dcbz was changed, it always clear 32 bytes. so the function
-  32 bytes. so the function below will work, but will
+below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
-  be slow. So I fixed check_dcbz_effect to use dcbzl,
+which is defined to clear a cache line (as dcbz before). So we still can
-  which is defined to clear a cache line (as dcbz before).
+distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
-  So we still can distinguish, and use dcbz (32 bytes)
-  or dcbzl (one cache line) as required.
-  see <http://developer.apple.com/technotes/tn/tn2087.html>
+see <http://developer.apple.com/technotes/tn/tn2087.html>
-  and <http://developer.apple.com/technotes/tn/tn2086.html>
+and <http://developer.apple.com/technotes/tn/tn2086.html>
 */
 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
 {
@@ -216,8 +209,7 @@ long check_dcbzl_effect(void)
    register long i = 0;
    long count = 0;
-  if (!fakedata)
+    if (!fakedata) {
-  {
        return 0L;
    }
@@ -229,8 +221,7 @@ long check_dcbzl_effect(void)
       in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
    asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
-  for (i = 0; i < 1024 ; i ++)
+    for (i = 0; i < 1024 ; i ++) {
-  {
        if (fakedata[i] == (char)0)
            count++;
    }
@@ -286,17 +277,14 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 #ifdef CONFIG_ENCODERS
        if (avctx->dct_algo == FF_DCT_AUTO ||
-            avctx->dct_algo == FF_DCT_ALTIVEC)
+            avctx->dct_algo == FF_DCT_ALTIVEC) {
-        {
            c->fdct = fdct_altivec;
        }
 #endif //CONFIG_ENCODERS
-        if (avctx->lowres==0)
+        if (avctx->lowres==0) {
-        {
            if ((avctx->idct_algo == FF_IDCT_AUTO) ||
-                (avctx->idct_algo == FF_IDCT_ALTIVEC))
+                (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
-        {
                c->idct_put = idct_put_altivec;
                c->idct_add = idct_add_altivec;
                c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
@@ -306,10 +294,8 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 #ifdef CONFIG_POWERPC_PERF
        {
            int i, j;
-          for (i = 0 ; i < powerpc_perf_total ; i++)
+            for (i = 0 ; i < powerpc_perf_total ; i++) {
-          {
+                for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
-            for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
-              {
                    perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
                    perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
                    perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;

--- a/libavcodec/ppc/dsputil_ppc.h
+++ b/libavcodec/ppc/dsputil_ppc.h
@@ -125,14 +125,11 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
    POWERPC_GET_PMC4(pmc_stop[3]);            \
    POWERPC_GET_PMC5(pmc_stop[4]);            \
    POWERPC_GET_PMC6(pmc_stop[5]);            \
-  if (cond)                       \
+    if (cond) {                               \
-  {                               \
        for(pmc_loop_index = 0;               \
            pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
-        pmc_loop_index++)         \
+            pmc_loop_index++) {               \
-    {                             \
+            if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) {  \
-      if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index])  \
-        {                                                         \
                POWERP_PMC_DATATYPE diff =                                \
                  pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
                if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \

--- a/libavcodec/ppc/fft_altivec.c
+++ b/libavcodec/ppc/fft_altivec.c
@@ -85,12 +85,9 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
        c1 = vcii(p,p,n,n);
-        if (s->inverse)
+        if (s->inverse) {
-            {
            c2 = vcii(p,p,n,p);
-            }
+        } else {
-        else
-            {
            c2 = vcii(p,p,p,n);
        }

--- a/libavcodec/ppc/gmc_altivec.c
+++ b/libavcodec/ppc/gmc_altivec.c
@@ -74,19 +74,17 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
    src_1 = vec_ld(16, src);
    srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
-    if (src_really_odd != 0x0000000F)
+    if (src_really_odd != 0x0000000F) {
-    { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
+        // if src & 0xF == 0xF, then (src+1) is properly aligned
+        // on the second vector.
        srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
-    }
+    } else {
-    else
-    {
        srcvB = src_1;
    }
    srcvA = vec_mergeh(vczero, srcvA);
    srcvB = vec_mergeh(vczero, srcvB);
-    for(i=0; i<h; i++)
+    for(i=0; i<h; i++) {
-    {
        dst_odd = (unsigned long)dst & 0x0000000F;
        src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
@@ -100,12 +98,11 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
        src_1 = vec_ld(stride + 16, src);
        srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
-      if (src_really_odd != 0x0000000F)
+        if (src_really_odd != 0x0000000F) {
-      { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
+            // if src & 0xF == 0xF, then (src+1) is properly aligned
+            // on the second vector.
            srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
-      }
+        } else {
-      else
-      {
            srcvD = src_1;
        }
@@ -128,12 +125,9 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
        dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
-      if (dst_odd)
+        if (dst_odd) {
-      {
            dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
-      }
+        } else {
-      else
-      {
            dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
        }

--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -392,8 +392,8 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
 */
-  H264_MC(put_, 16, altivec)
+H264_MC(put_, 16, altivec)
-  H264_MC(avg_, 16, altivec)
+H264_MC(avg_, 16, altivec)
 /****************************************************************************

--- a/libavcodec/ppc/h264_template_altivec.c
+++ b/libavcodec/ppc/h264_template_altivec.c
@@ -344,7 +344,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
        src += srcStride;
        dst += dstStride;
    }
-POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
 }
 /* this code assume stride % 16 == 0 */
@@ -365,23 +365,23 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
    const vec_u8_t srcM2a = vec_ld(0, srcbis);
    const vec_u8_t srcM2b = vec_ld(16, srcbis);
    const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm);
-//  srcbis += srcStride;
+    //srcbis += srcStride;
    const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride);
    const vec_u8_t srcM1b = vec_ld(16, srcbis);
    const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm);
-//  srcbis += srcStride;
+    //srcbis += srcStride;
    const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride);
    const vec_u8_t srcP0b = vec_ld(16, srcbis);
    const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm);
-//  srcbis += srcStride;
+    //srcbis += srcStride;
    const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride);
    const vec_u8_t srcP1b = vec_ld(16, srcbis);
    const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm);
-//  srcbis += srcStride;
+    //srcbis += srcStride;
    const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride);
    const vec_u8_t srcP2b = vec_ld(16, srcbis);
    const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm);
-//  srcbis += srcStride;
+    //srcbis += srcStride;
    vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
    vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
@@ -409,7 +409,7 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
        srcP3 = vec_perm(srcP3a, srcP3b, perm);
        srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
        srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
-//    srcbis += srcStride;
+        //srcbis += srcStride;
        sum1A = vec_adds(srcP0ssA, srcP1ssA);
        sum1B = vec_adds(srcP0ssB, srcP1ssB);

--- a/libavcodec/ppc/idct_altivec.c
+++ b/libavcodec/ppc/idct_altivec.c
@@ -22,7 +22,6 @@
 * NOTE: This code is based on GPL code from the libmpeg2 project.  The
 * author, Michel Lespinasses, has given explicit permission to release
 * under LGPL as part of ffmpeg.
- *
 */
 /*

--- a/libavcodec/ppc/imgresample_altivec.c
+++ b/libavcodec/ppc/imgresample_altivec.c
@@ -46,8 +46,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
    vector signed short zeros, sumhv, sumlv;
    s = src;
-    for(i=0;i<4;i++)
+    for(i=0;i<4;i++) {
-    {
        /*
           The vec_madds later on does an implicit >>15 on the result.
           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
@@ -86,13 +85,11 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
    /* Do our altivec resampling on 16 pixels at once. */
    while(dst_width>=16) {
-        /*
+        /* Read 16 (potentially unaligned) bytes from each of
-           Read 16 (potentially unaligned) bytes from each of
           4 lines into 4 vectors, and split them into shorts.
           Interleave the multipy/accumulate for the resample
           filter with the loads to hide the 3 cycle latency
-           the vec_madds have.
+           the vec_madds have. */
-        */
        tv = (vector unsigned char *) &s[0 * wrap];
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
@@ -121,10 +118,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
-        /*
+        /* Pack the results into our destination vector,
-           Pack the results into our destination vector,
+           and do an aligned write of that back to memory. */
-           and do an aligned write of that back to memory.
-        */
        dstv = vec_packsu(sumhv, sumlv) ;
        vec_st(dstv, 0, (vector unsigned char *) dst);
@@ -133,10 +128,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
        dst_width-=16;
    }
-    /*
+    /* If there are any leftover pixels, resample them
-       If there are any leftover pixels, resample them
+       with the slow scalar method. */
-       with the slow scalar method.
-    */
    while(dst_width>0) {
        sum = s[0 * wrap] * filter[0] +
        s[1 * wrap] * filter[1] +

--- a/libavcodec/ppc/int_altivec.c
+++ b/libavcodec/ppc/int_altivec.c
--- a/libavcodec/ppc/mathops.h
+++ b/libavcodec/ppc/mathops.h
@@ -25,11 +25,11 @@
 #if defined(ARCH_POWERPC_405)
 /* signed 16x16 -> 32 multiply add accumulate */
-#   define MAC16(rt, ra, rb) \
+#define MAC16(rt, ra, rb) \
    asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
 /* signed 16x16 -> 32 multiply */
-#   define MUL16(ra, rb) \
+#define MUL16(ra, rb) \
    ({ int __rt; \
    asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
    __rt; })

--- a/libavcodec/ppc/mpegvideo_altivec.c
+++ b/libavcodec/ppc/mpegvideo_altivec.c
@@ -137,10 +137,8 @@ int dct_quantize_altivec(MpegEncContext* s,
        int whichPass, whichHalf;
-        for(whichPass = 1; whichPass<=2; whichPass++)
+        for(whichPass = 1; whichPass<=2; whichPass++) {
-        {
+            for(whichHalf = 1; whichHalf<=2; whichHalf++) {
-            for(whichHalf = 1; whichHalf<=2; whichHalf++)
-            {
                vector float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
                vector float tmp10, tmp11, tmp12, tmp13;
                vector float z1, z2, z3, z4, z5;
@@ -235,8 +233,7 @@ int dct_quantize_altivec(MpegEncContext* s,
                SWAP(row7, alt7);
            }
-            if (whichPass == 1)
+            if (whichPass == 1) {
-            {
                // transpose the data for the second pass
                // First, block transpose the upper right with lower left.
@@ -261,8 +258,7 @@ int dct_quantize_altivec(MpegEncContext* s,
        const vector signed int* qmat;
        vector float bias, negBias;
-        if (s->mb_intra)
+        if (s->mb_intra) {
-        {
            vector signed int baseVector;
            // We must cache element 0 in the intra case
@@ -272,9 +268,7 @@ int dct_quantize_altivec(MpegEncContext* s,
            qmat = (vector signed int*)s->q_intra_matrix[qscale];
            biasAddr = &(s->intra_quant_bias);
-        }
+        } else {
-        else
-        {
            qmat = (vector signed int*)s->q_inter_matrix[qscale];
            biasAddr = &(s->inter_quant_bias);
        }
@@ -439,8 +433,7 @@ int dct_quantize_altivec(MpegEncContext* s,
        // and handle it using the vector unit if we can.  This is the permute used
        // by the altivec idct, so it is common when using the altivec dct.
-        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM))
+        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) {
-        {
            TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
        }
@@ -456,10 +449,8 @@ int dct_quantize_altivec(MpegEncContext* s,
    }
    // special handling of block[0]
-    if (s->mb_intra)
+    if (s->mb_intra) {
-    {
+        if (!s->h263_aic) {
-        if (!s->h263_aic)
-        {
            if (n < 4)
                oldBaseValue /= s->y_dc_scale;
            else
@@ -474,8 +465,7 @@ int dct_quantize_altivec(MpegEncContext* s,
    // need to permute the "no" permutation case.
    if ((lastNonZero > 0) &&
        (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
-        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM))
+        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) {
-    {
        ff_block_permute(data, s->dsp.idct_permutation,
                s->intra_scantable.scantable, lastNonZero);
    }
@@ -483,10 +473,8 @@ int dct_quantize_altivec(MpegEncContext* s,
    return lastNonZero;
 }
-/*
+/* AltiVec version of dct_unquantize_h263
-  AltiVec version of dct_unquantize_h263
+   this code assumes `block' is 16 bytes-aligned */
-  this code assumes `block' is 16 bytes-aligned
-*/
 void dct_unquantize_h263_altivec(MpegEncContext *s,
                                 DCTELEM *block, int n, int qscale)
 {
@@ -559,8 +547,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
        // vectorize all the 16 bytes-aligned blocks
        // of 8 elements
-      for(; (j + 7) <= nCoeffs ; j+=8)
+        for(; (j + 7) <= nCoeffs ; j+=8) {
-      {
            blockv = vec_ld(j << 1, block);
            blockv_neg = vec_cmplt(blockv, vczero);
            blockv_null = vec_cmpeq(blockv, vczero);
@@ -589,8 +576,8 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
            }
        }
-      if (i == 1)
+        if (i == 1) {
-      { // cheat. this avoid special-casing the first iteration
+            // cheat. this avoid special-casing the first iteration
            block[0] = backup_0;
        }
    }
@@ -605,11 +592,9 @@ void MPV_common_init_altivec(MpegEncContext *s)
 {
    if ((mm_flags & MM_ALTIVEC) == 0) return;
-    if (s->avctx->lowres==0)
+    if (s->avctx->lowres==0) {
-    {
        if ((s->avctx->idct_algo == FF_IDCT_AUTO) ||
-                (s->avctx->idct_algo == FF_IDCT_ALTIVEC))
+            (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) {
-        {
            s->dsp.idct_put = idct_put_altivec;
            s->dsp.idct_add = idct_add_altivec;
            s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
@@ -618,15 +603,13 @@ void MPV_common_init_altivec(MpegEncContext *s)
    // Test to make sure that the dct required alignments are met.
    if ((((long)(s->q_intra_matrix) & 0x0f) != 0) ||
-        (((long)(s->q_inter_matrix) & 0x0f) != 0))
+        (((long)(s->q_inter_matrix) & 0x0f) != 0)) {
-    {
        av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned "
                "to use AltiVec DCT. Reverting to non-AltiVec version.\n");
        return;
    }
-    if (((long)(s->intra_scantable.inverse) & 0x0f) != 0)
+    if (((long)(s->intra_scantable.inverse) & 0x0f) != 0) {
-    {
        av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned "
                "to use AltiVec DCT. Reverting to non-AltiVec version.\n");
        return;
@@ -634,8 +617,7 @@ void MPV_common_init_altivec(MpegEncContext *s)
    if ((s->avctx->dct_algo == FF_DCT_AUTO) ||
-            (s->avctx->dct_algo == FF_DCT_ALTIVEC))
+            (s->avctx->dct_algo == FF_DCT_ALTIVEC)) {
-    {
 #if 0 /* seems to cause trouble under some circumstances */
        s->dct_quantize = dct_quantize_altivec;
 #endif

--- a/libavcodec/ppc/snow_altivec.c
+++ b/libavcodec/ppc/snow_altivec.c
@@ -379,8 +379,7 @@ void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
    v4=(vector signed int *)b4;
    v5=(vector signed int *)b5;
-    for (i=0; i< w4;i++)
+    for (i=0; i< w4;i++) {
-    {
    #if 0
        b4[i] -= (3*(b3[i] + b5[i])+4)>>3;