Commit 1faa8064 authored by ramiro's avatar ramiro

Do not misuse long as the size of a register in x86.

typedef x86_reg as the appropriate size and use it instead.

git-svn-id: file:///var/local/repositories/ffmpeg/trunk@13081 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent bec1a1e3
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "dsputil.h" #include "dsputil.h"
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
#include "common.h" #include "common.h"
#include "x86_cpu.h"
/***************************************************************************** /*****************************************************************************
* *
...@@ -301,7 +302,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -301,7 +302,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
\ \
: "+a"(src), "+c"(dst)\ : "+a"(src), "+c"(dst)\
: "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
: "memory"\ : "memory"\
);\ );\
if(h==16){\ if(h==16){\
...@@ -316,7 +317,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ...@@ -316,7 +317,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
\ \
: "+a"(src), "+c"(dst)\ : "+a"(src), "+c"(dst)\
: "S"((long)srcStride), "D"((long)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
: "memory"\ : "memory"\
);\ );\
}\ }\
...@@ -367,7 +368,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt ...@@ -367,7 +368,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstSt
"decl %2 \n\t"\ "decl %2 \n\t"\
" jnz 1b \n\t"\ " jnz 1b \n\t"\
: "+a"(src), "+c"(dst), "+m"(h)\ : "+a"(src), "+c"(dst), "+m"(h)\
: "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
: "memory"\ : "memory"\
);\ );\
}\ }\
......
...@@ -42,7 +42,7 @@ int mm_support(void) ...@@ -42,7 +42,7 @@ int mm_support(void)
int rval = 0; int rval = 0;
int eax, ebx, ecx, edx; int eax, ebx, ecx, edx;
int max_std_level, max_ext_level, std_caps=0, ext_caps=0; int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
long a, c; x86_reg a, c;
asm volatile ( asm volatile (
/* See if CPUID instruction is supported ... */ /* See if CPUID instruction is supported ... */
......
...@@ -249,7 +249,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1* ...@@ -249,7 +249,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*
"sub $2, %2 \n\t" "sub $2, %2 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
: "+r"(dst), "+r"(src), "+r"(h) : "+r"(dst), "+r"(src), "+r"(h)
: "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y) : "r"((x86_reg)stride), "m"(ff_pw_32), "m"(x), "m"(y)
); );
} }
...@@ -300,7 +300,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* ...@@ -300,7 +300,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*
"sub $1, %2\n\t" "sub $1, %2\n\t"
"jnz 1b\n\t" "jnz 1b\n\t"
: "+r" (dst), "+r"(src), "+r"(h) : "+r" (dst), "+r"(src), "+r"(h)
: "m" (ff_pw_32), "r"((long)stride) : "m" (ff_pw_32), "r"((x86_reg)stride)
: "%esi"); : "%esi");
} }
......
...@@ -72,7 +72,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -72,7 +72,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
"lea (%0,%3,2), %0 \n\t" "lea (%0,%3,2), %0 \n\t"
"jg 1b \n\t" "jg 1b \n\t"
:"+r"(dst), "+r"(src), "+r"(h) :"+r"(dst), "+r"(src), "+r"(h)
:"r"((long)stride) :"r"((x86_reg)stride)
); );
} else { } else {
asm volatile( asm volatile(
...@@ -100,7 +100,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -100,7 +100,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
"lea (%0,%3,2), %0 \n\t" "lea (%0,%3,2), %0 \n\t"
"jg 1b \n\t" "jg 1b \n\t"
:"+r"(dst), "+r"(src), "+r"(h) :"+r"(dst), "+r"(src), "+r"(h)
:"r"((long)stride) :"r"((x86_reg)stride)
); );
} }
return; return;
...@@ -154,7 +154,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ...@@ -154,7 +154,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
"lea (%0,%3,2), %0 \n\t" "lea (%0,%3,2), %0 \n\t"
"jg 1b \n\t" "jg 1b \n\t"
:"+r"(dst), "+r"(src), "+r"(h) :"+r"(dst), "+r"(src), "+r"(h)
:"r"((long)stride) :"r"((x86_reg)stride)
); );
} }
...@@ -202,7 +202,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1* ...@@ -202,7 +202,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*
"lea (%0,%3,2), %0 \n\t" "lea (%0,%3,2), %0 \n\t"
"jg 1b \n\t" "jg 1b \n\t"
:"+r"(dst), "+r"(src), "+r"(h) :"+r"(dst), "+r"(src), "+r"(h)
:"r"((long)stride) :"r"((x86_reg)stride)
); );
} }
This diff is collapsed.
...@@ -55,7 +55,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ ...@@ -55,7 +55,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -105,7 +105,7 @@ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -105,7 +105,7 @@ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
} }
...@@ -152,7 +152,7 @@ static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -152,7 +152,7 @@ static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
//the following should be used, though better not with gcc ... //the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
...@@ -222,7 +222,7 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src ...@@ -222,7 +222,7 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
//the following should be used, though better not with gcc ... //the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
...@@ -277,7 +277,7 @@ static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -277,7 +277,7 @@ static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
} }
...@@ -329,7 +329,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -329,7 +329,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
//the following should be used, though better not with gcc ... //the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
...@@ -373,7 +373,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -373,7 +373,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -417,7 +417,7 @@ static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -417,7 +417,7 @@ static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
//the following should be used, though better not with gcc ... //the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
...@@ -471,7 +471,7 @@ static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int ...@@ -471,7 +471,7 @@ static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
//the following should be used, though better not with gcc ... //the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
...@@ -544,7 +544,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr ...@@ -544,7 +544,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
//the following should be used, though better not with gcc ... //the following should be used, though better not with gcc ...
/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) /* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
...@@ -586,7 +586,7 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in ...@@ -586,7 +586,7 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -616,7 +616,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_ ...@@ -616,7 +616,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D" (block) :"+g"(h), "+S"(pixels), "+D" (block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -650,7 +650,7 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in ...@@ -650,7 +650,7 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D" (block) :"+g"(h), "+S"(pixels), "+D" (block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -678,7 +678,7 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -678,7 +678,7 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -710,7 +710,7 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ ...@@ -710,7 +710,7 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -748,7 +748,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_ ...@@ -748,7 +748,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -791,7 +791,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -791,7 +791,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r" ((long)line_size) :"r" ((x86_reg)line_size)
:"%"REG_a, "memory"); :"%"REG_a, "memory");
} }
...@@ -812,7 +812,7 @@ static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_siz ...@@ -812,7 +812,7 @@ static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_siz
"movd %%mm2, (%1, %2, 2) \n\t" "movd %%mm2, (%1, %2, 2) \n\t"
"movd %%mm3, (%1, %3) \n\t" "movd %%mm3, (%1, %3) \n\t"
::"S"(pixels), "D"(block), ::"S"(pixels), "D"(block),
"r" ((long)line_size), "r"(3L*line_size) "r" ((x86_reg)line_size), "r"((x86_reg)3L*line_size)
:"memory"); :"memory");
block += 4*line_size; block += 4*line_size;
pixels += 4*line_size; pixels += 4*line_size;
...@@ -868,8 +868,8 @@ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride ...@@ -868,8 +868,8 @@ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride
"decl %0 \n\t"\ "decl %0 \n\t"\
"jnz 1b \n\t"\ "jnz 1b \n\t"\
:"+g"(h), "+r"(src)\ :"+g"(h), "+r"(src)\
:"r"((long)off1), "r"((long)off2),\ :"r"((x86_reg)off1), "r"((x86_reg)off2),\
"r"((long)(dst-src)), "r"((long)stride)\ "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
:"memory"\ :"memory"\
);\ );\
}\ }\
...@@ -885,8 +885,8 @@ static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, ...@@ -885,8 +885,8 @@ static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride,
"decl %0 \n\t"\ "decl %0 \n\t"\
"jnz 1b \n\t"\ "jnz 1b \n\t"\
:"+g"(h), "+r"(src)\ :"+g"(h), "+r"(src)\
:"r"((long)off1), "r"((long)off2),\ :"r"((x86_reg)off1), "r"((x86_reg)off2),\
"r"((long)(dst-src)), "r"((long)stride)\ "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
:"memory"\ :"memory"\
);\ );\
} }
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale) static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
{ {
long i=0; x86_reg i=0;
assert(FFABS(scale) < MAX_ABS); assert(FFABS(scale) < MAX_ABS);
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT; scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
...@@ -72,7 +72,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[ ...@@ -72,7 +72,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale) static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
{ {
long i=0; x86_reg i=0;
if(FFABS(scale) < MAX_ABS){ if(FFABS(scale) < MAX_ABS){
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT; scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
......
...@@ -57,7 +57,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -57,7 +57,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :REG_a, "memory");
} }
...@@ -107,7 +107,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -107,7 +107,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
} }
...@@ -153,7 +153,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -153,7 +153,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :REG_a, "memory");
} }
...@@ -202,7 +202,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -202,7 +202,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t
#else #else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif #endif
:"S"((long)src1Stride), "D"((long)dstStride) :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
:"memory"); :"memory");
} }
...@@ -231,7 +231,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -231,7 +231,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :REG_a, "memory");
} }
...@@ -297,7 +297,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -297,7 +297,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
"subl $2, %0 \n\t" "subl $2, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels) :"+g"(h), "+S"(pixels)
:"D"(block), "r"((long)line_size) :"D"(block), "r"((x86_reg)line_size)
:REG_a, "memory"); :REG_a, "memory");
} }
...@@ -493,7 +493,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line ...@@ -493,7 +493,7 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((long)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :REG_a, "memory");
} }
...@@ -568,7 +568,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin ...@@ -568,7 +568,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
"subl $2, %0 \n\t" "subl $2, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels) :"+g"(h), "+S"(pixels)
:"D"(block), "r"((long)line_size) :"D"(block), "r"((x86_reg)line_size)
:REG_a, "memory"); :REG_a, "memory");
} }
......
...@@ -51,7 +51,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) ...@@ -51,7 +51,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
"add $32, %%"REG_a" \n\t" "add $32, %%"REG_a" \n\t"
"js 1b \n\t" "js 1b \n\t"
: "+r" (pixels) : "+r" (pixels)
: "r" (block+64), "r" ((long)line_size), "r" ((long)line_size*2) : "r" (block+64), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*2)
: "%"REG_a : "%"REG_a
); );
} }
...@@ -80,7 +80,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint ...@@ -80,7 +80,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint
"add $16, %%"REG_a" \n\t" "add $16, %%"REG_a" \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
: "+r" (s1), "+r" (s2) : "+r" (s1), "+r" (s2)
: "r" (block+64), "r" ((long)stride) : "r" (block+64), "r" ((x86_reg)stride)
: "%"REG_a : "%"REG_a
); );
} }
...@@ -88,7 +88,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint ...@@ -88,7 +88,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint
static int pix_sum16_mmx(uint8_t * pix, int line_size){ static int pix_sum16_mmx(uint8_t * pix, int line_size){
const int h=16; const int h=16;
int sum; int sum;
long index= -line_size*h; x86_reg index= -line_size*h;
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
...@@ -117,7 +117,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ ...@@ -117,7 +117,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){
"movd %%mm6, %0 \n\t" "movd %%mm6, %0 \n\t"
"andl $0xFFFF, %0 \n\t" "andl $0xFFFF, %0 \n\t"
: "=&r" (sum), "+r" (index) : "=&r" (sum), "+r" (index)
: "r" (pix - index), "r" ((long)line_size) : "r" (pix - index), "r" ((x86_reg)line_size)
); );
return sum; return sum;
...@@ -162,7 +162,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) { ...@@ -162,7 +162,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) {
"psrlq $32, %%mm7\n" /* shift hi dword to lo */ "psrlq $32, %%mm7\n" /* shift hi dword to lo */
"paddd %%mm7,%%mm1\n" "paddd %%mm7,%%mm1\n"
"movd %%mm1,%1\n" "movd %%mm1,%1\n"
: "+r" (pix), "=r"(tmp) : "r" ((long)line_size) : "%ecx" ); : "+r" (pix), "=r"(tmp) : "r" ((x86_reg)line_size) : "%ecx" );
return tmp; return tmp;
} }
...@@ -222,7 +222,7 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int ...@@ -222,7 +222,7 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
"paddd %%mm7,%%mm1\n" "paddd %%mm7,%%mm1\n"
"movd %%mm1,%2\n" "movd %%mm1,%2\n"
: "+r" (pix1), "+r" (pix2), "=r"(tmp) : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h) : "r" ((x86_reg)line_size) , "m" (h)
: "%ecx"); : "%ecx");
return tmp; return tmp;
} }
...@@ -282,7 +282,7 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int ...@@ -282,7 +282,7 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
"paddd %%mm7,%%mm1\n" "paddd %%mm7,%%mm1\n"
"movd %%mm1,%2\n" "movd %%mm1,%2\n"
: "+r" (pix1), "+r" (pix2), "=r"(tmp) : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h) : "r" ((x86_reg)line_size) , "m" (h)
: "%ecx"); : "%ecx");
return tmp; return tmp;
} }
...@@ -345,7 +345,7 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in ...@@ -345,7 +345,7 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
"paddd %%xmm1,%%xmm7\n" "paddd %%xmm1,%%xmm7\n"
"movd %%xmm7,%3\n" "movd %%xmm7,%3\n"
: "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp) : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp)
: "r" ((long)line_size)); : "r" ((x86_reg)line_size));
return tmp; return tmp;
} }
...@@ -469,7 +469,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { ...@@ -469,7 +469,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
"paddd %%mm6,%%mm0\n" "paddd %%mm6,%%mm0\n"
"movd %%mm0,%1\n" "movd %%mm0,%1\n"
: "+r" (pix1), "=r"(tmp) : "+r" (pix1), "=r"(tmp)
: "r" ((long)line_size) , "g" (h-2) : "r" ((x86_reg)line_size) , "g" (h-2)
: "%ecx"); : "%ecx");
return tmp; return tmp;
} }
...@@ -583,7 +583,7 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) { ...@@ -583,7 +583,7 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
"paddd %%mm6,%%mm0\n" "paddd %%mm6,%%mm0\n"
"movd %%mm0,%1\n" "movd %%mm0,%1\n"
: "+r" (pix1), "=r"(tmp) : "+r" (pix1), "=r"(tmp)
: "r" ((long)line_size) , "g" (h-2) : "r" ((x86_reg)line_size) , "g" (h-2)
: "%ecx"); : "%ecx");
return tmp + hf_noise8_mmx(pix+8, line_size, h); return tmp + hf_noise8_mmx(pix+8, line_size, h);
} }
...@@ -665,7 +665,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si ...@@ -665,7 +665,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
"paddw %%mm6,%%mm0\n" "paddw %%mm6,%%mm0\n"
"movd %%mm0,%1\n" "movd %%mm0,%1\n"
: "+r" (pix), "=r"(tmp) : "+r" (pix), "=r"(tmp)
: "r" ((long)line_size) , "m" (h) : "r" ((x86_reg)line_size) , "m" (h)
: "%ecx"); : "%ecx");
return tmp & 0xFFFF; return tmp & 0xFFFF;
} }
...@@ -706,7 +706,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s ...@@ -706,7 +706,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s
"movd %%mm6,%1\n" "movd %%mm6,%1\n"
: "+r" (pix), "=r"(tmp) : "+r" (pix), "=r"(tmp)
: "r" ((long)line_size) , "m" (h) : "r" ((x86_reg)line_size) , "m" (h)
: "%ecx"); : "%ecx");
return tmp; return tmp;
} }
...@@ -785,7 +785,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in ...@@ -785,7 +785,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
"paddw %%mm6,%%mm0\n" "paddw %%mm6,%%mm0\n"
"movd %%mm0,%2\n" "movd %%mm0,%2\n"
: "+r" (pix1), "+r" (pix2), "=r"(tmp) : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h) : "r" ((x86_reg)line_size) , "m" (h)
: "%ecx"); : "%ecx");
return tmp & 0x7FFF; return tmp & 0x7FFF;
} }
...@@ -843,14 +843,14 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i ...@@ -843,14 +843,14 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i
"movd %%mm6,%2\n" "movd %%mm6,%2\n"
: "+r" (pix1), "+r" (pix2), "=r"(tmp) : "+r" (pix1), "+r" (pix2), "=r"(tmp)
: "r" ((long)line_size) , "m" (h) : "r" ((x86_reg)line_size) , "m" (h)
: "%ecx"); : "%ecx");
return tmp; return tmp;
} }
#undef SUM #undef SUM
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
long i=0; x86_reg i=0;
asm volatile( asm volatile(
"1: \n\t" "1: \n\t"
"movq (%2, %0), %%mm0 \n\t" "movq (%2, %0), %%mm0 \n\t"
...@@ -865,14 +865,14 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ ...@@ -865,14 +865,14 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
"cmp %4, %0 \n\t" "cmp %4, %0 \n\t"
" jb 1b \n\t" " jb 1b \n\t"
: "+r" (i) : "+r" (i)
: "r"(src1), "r"(src2), "r"(dst), "r"((long)w-15) : "r"(src1), "r"(src2), "r"(dst), "r"((x86_reg)w-15)
); );
for(; i<w; i++) for(; i<w; i++)
dst[i+0] = src1[i+0]-src2[i+0]; dst[i+0] = src1[i+0]-src2[i+0];
} }
static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
long i=0; x86_reg i=0;
uint8_t l, lt; uint8_t l, lt;
asm volatile( asm volatile(
...@@ -895,7 +895,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -895,7 +895,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
"cmp %4, %0 \n\t" "cmp %4, %0 \n\t"
" jb 1b \n\t" " jb 1b \n\t"
: "+r" (i) : "+r" (i)
: "r"(src1), "r"(src2), "r"(dst), "r"((long)w) : "r"(src1), "r"(src2), "r"(dst), "r"((x86_reg)w)
); );
l= *left; l= *left;
...@@ -930,7 +930,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t ...@@ -930,7 +930,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
DIFF_PIXELS_1(m0, mm##7, mm##0, (%1,%3,4), (%2,%3,4))\ DIFF_PIXELS_1(m0, mm##7, mm##0, (%1,%3,4), (%2,%3,4))\
"mov"#m1" %0, "#mm"0 \n\t"\ "mov"#m1" %0, "#mm"0 \n\t"\
: "+m"(temp), "+r"(p1b), "+r"(p2b)\ : "+m"(temp), "+r"(p1b), "+r"(p2b)\
: "r"((long)stride), "r"((long)stride*3)\ : "r"((x86_reg)stride), "r"((x86_reg)stride*3)\
);\ );\
} }
//the "+m"(temp) is needed as gcc 2.95 sometimes fails to compile "=m"(temp) //the "+m"(temp) is needed as gcc 2.95 sometimes fails to compile "=m"(temp)
...@@ -1237,7 +1237,7 @@ DCT_SAD_FUNC(ssse3) ...@@ -1237,7 +1237,7 @@ DCT_SAD_FUNC(ssse3)
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int size){ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int size){
int sum; int sum;
long i=size; x86_reg i=size;
asm volatile( asm volatile(
"pxor %%mm4, %%mm4 \n" "pxor %%mm4, %%mm4 \n"
"1: \n" "1: \n"
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "dsputil.h" #include "dsputil.h"
#include "x86_cpu.h"
static const int p1m1[2] __attribute__((aligned(8))) = static const int p1m1[2] __attribute__((aligned(8))) =
{ 0, 1 << 31 }; { 0, 1 << 31 };
...@@ -30,7 +31,8 @@ static const int m1p1[2] __attribute__((aligned(8))) = ...@@ -30,7 +31,8 @@ static const int m1p1[2] __attribute__((aligned(8))) =
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z) void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z)
{ {
int ln = s->nbits; int ln = s->nbits;
long i, j; long j;
x86_reg i;
long nblocks, nloops; long nblocks, nloops;
FFTComplex *p, *cptr; FFTComplex *p, *cptr;
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "dsputil.h" #include "dsputil.h"
#include "x86_cpu.h"
static const int p1m1[2] __attribute__((aligned(8))) = static const int p1m1[2] __attribute__((aligned(8))) =
{ 0, 1 << 31 }; { 0, 1 << 31 };
...@@ -30,7 +31,8 @@ static const int m1p1[2] __attribute__((aligned(8))) = ...@@ -30,7 +31,8 @@ static const int m1p1[2] __attribute__((aligned(8))) =
void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z)
{ {
int ln = s->nbits; int ln = s->nbits;
long i, j; long j;
x86_reg i;
long nblocks, nloops; long nblocks, nloops;
FFTComplex *p, *cptr; FFTComplex *p, *cptr;
...@@ -124,7 +126,8 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) ...@@ -124,7 +126,8 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z)
void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output,
const FFTSample *input, FFTSample *tmp) const FFTSample *input, FFTSample *tmp)
{ {
long k, n8, n4, n2, n; long n8, n4, n2, n;
x86_reg k;
const uint16_t *revtab = s->fft.revtab; const uint16_t *revtab = s->fft.revtab;
const FFTSample *tcos = s->tcos; const FFTSample *tcos = s->tcos;
const FFTSample *tsin = s->tsin; const FFTSample *tsin = s->tsin;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include "dsputil.h" #include "dsputil.h"
#include "x86_cpu.h"
static const int p1p1p1m1[4] __attribute__((aligned(16))) = static const int p1p1p1m1[4] __attribute__((aligned(16))) =
{ 0, 0, 0, 1 << 31 }; { 0, 0, 0, 1 << 31 };
...@@ -48,7 +49,8 @@ static void print_v4sf(const char *str, __m128 a) ...@@ -48,7 +49,8 @@ static void print_v4sf(const char *str, __m128 a)
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
{ {
int ln = s->nbits; int ln = s->nbits;
long i, j; x86_reg i;
long j;
long nblocks, nloops; long nblocks, nloops;
FFTComplex *p, *cptr; FFTComplex *p, *cptr;
...@@ -142,7 +144,8 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) ...@@ -142,7 +144,8 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output,
const FFTSample *input, FFTSample *tmp) const FFTSample *input, FFTSample *tmp)
{ {
long k, n8, n4, n2, n; x86_reg k;
long n8, n4, n2, n;
const uint16_t *revtab = s->fft.revtab; const uint16_t *revtab = s->fft.revtab;
const FFTSample *tcos = s->tcos; const FFTSample *tcos = s->tcos;
const FFTSample *tsin = s->tsin; const FFTSample *tsin = s->tsin;
......
...@@ -20,13 +20,14 @@ ...@@ -20,13 +20,14 @@
*/ */
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
#include "x86_cpu.h"
static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data) static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data)
{ {
double c = 2.0 / (len-1.0); double c = 2.0 / (len-1.0);
int n2 = len>>1; int n2 = len>>1;
long i = -n2*sizeof(int32_t); x86_reg i = -n2*sizeof(int32_t);
long j = n2*sizeof(int32_t); x86_reg j = n2*sizeof(int32_t);
asm volatile( asm volatile(
"movsd %0, %%xmm7 \n\t" "movsd %0, %%xmm7 \n\t"
"movapd %1, %%xmm6 \n\t" "movapd %1, %%xmm6 \n\t"
...@@ -71,7 +72,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, ...@@ -71,7 +72,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
double *data1 = tmp + lag; double *data1 = tmp + lag;
int j; int j;
if((long)data1 & 15) if((x86_reg)data1 & 15)
data1++; data1++;
apply_welch_window_sse2(data, len, data1); apply_welch_window_sse2(data, len, data1);
...@@ -81,7 +82,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, ...@@ -81,7 +82,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
data1[len] = 0.0; data1[len] = 0.0;
for(j=0; j<lag; j+=2){ for(j=0; j<lag; j+=2){
long i = -len*sizeof(double); x86_reg i = -len*sizeof(double);
if(j == lag-2) { if(j == lag-2) {
asm volatile( asm volatile(
"movsd %6, %%xmm0 \n\t" "movsd %6, %%xmm0 \n\t"
......
...@@ -96,7 +96,7 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -96,7 +96,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
int *index, const uint8_t *sig_off){ int *index, const uint8_t *sig_off){
int minusindex= 4-(int)index; int minusindex= 4-(int)index;
int coeff_count; int coeff_count;
long last=0; x86_reg last=0;
asm volatile( asm volatile(
"movl "RANGE "(%3), %%esi \n\t" "movl "RANGE "(%3), %%esi \n\t"
"movl "LOW "(%3), %%ebx \n\t" "movl "LOW "(%3), %%ebx \n\t"
......
This diff is collapsed.
...@@ -34,7 +34,7 @@ DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; ...@@ -34,7 +34,7 @@ DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL;
static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
long len= -(stride*h); x86_reg len= -(stride*h);
asm volatile( asm volatile(
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
...@@ -64,7 +64,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -64,7 +64,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"add %3, %%"REG_a" \n\t" "add %3, %%"REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) : "r" (blk1 - len), "r" (blk2 - len), "r" ((x86_reg)stride)
); );
} }
...@@ -84,7 +84,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -84,7 +84,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"sub $2, %0 \n\t" "sub $2, %0 \n\t"
" jg 1b \n\t" " jg 1b \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2) : "+r" (h), "+r" (blk1), "+r" (blk2)
: "r" ((long)stride) : "r" ((x86_reg)stride)
); );
} }
...@@ -106,7 +106,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) ...@@ -106,7 +106,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
"sub $2, %0 \n\t" "sub $2, %0 \n\t"
" jg 1b \n\t" " jg 1b \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2) : "+r" (h), "+r" (blk1), "+r" (blk2)
: "r" ((long)stride) : "r" ((x86_reg)stride)
); );
asm volatile( asm volatile(
"movhlps %%xmm6, %%xmm0 \n\t" "movhlps %%xmm6, %%xmm0 \n\t"
...@@ -135,7 +135,7 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ...@@ -135,7 +135,7 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
"sub $2, %0 \n\t" "sub $2, %0 \n\t"
" jg 1b \n\t" " jg 1b \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2) : "+r" (h), "+r" (blk1), "+r" (blk2)
: "r" ((long)stride) : "r" ((x86_reg)stride)
); );
} }
...@@ -160,7 +160,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ...@@ -160,7 +160,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h
"sub $2, %0 \n\t" "sub $2, %0 \n\t"
" jg 1b \n\t" " jg 1b \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2) : "+r" (h), "+r" (blk1), "+r" (blk2)
: "r" ((long)stride) : "r" ((x86_reg)stride)
); );
} }
...@@ -190,13 +190,13 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -190,13 +190,13 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"sub $2, %0 \n\t" "sub $2, %0 \n\t"
" jg 1b \n\t" " jg 1b \n\t"
: "+r" (h), "+r" (blk1), "+r" (blk2) : "+r" (h), "+r" (blk1), "+r" (blk2)
: "r" ((long)stride) : "r" ((x86_reg)stride)
); );
} }
static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
{ {
long len= -(stride*h); x86_reg len= -(stride*h);
asm volatile( asm volatile(
ASMALIGN(4) ASMALIGN(4)
"1: \n\t" "1: \n\t"
...@@ -228,13 +228,13 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int ...@@ -228,13 +228,13 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
"add %4, %%"REG_a" \n\t" "add %4, %%"REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((x86_reg)stride)
); );
} }
static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{ {
long len= -(stride*h); x86_reg len= -(stride*h);
asm volatile( asm volatile(
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
...@@ -281,7 +281,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ...@@ -281,7 +281,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"add %4, %%"REG_a" \n\t" "add %4, %%"REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((x86_reg)stride)
); );
} }
......
...@@ -34,7 +34,7 @@ extern uint16_t inv_zigzag_direct16[64]; ...@@ -34,7 +34,7 @@ extern uint16_t inv_zigzag_direct16[64];
static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
long level, qmul, qadd, nCoeffs; x86_reg level, qmul, qadd, nCoeffs;
qmul = qscale << 1; qmul = qscale << 1;
...@@ -109,7 +109,7 @@ asm volatile( ...@@ -109,7 +109,7 @@ asm volatile(
static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
long qmul, qadd, nCoeffs; x86_reg qmul, qadd, nCoeffs;
qmul = qscale << 1; qmul = qscale << 1;
qadd = (qscale - 1) | 1; qadd = (qscale - 1) | 1;
...@@ -200,7 +200,7 @@ asm volatile( ...@@ -200,7 +200,7 @@ asm volatile(
static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
long nCoeffs; x86_reg nCoeffs;
const uint16_t *quant_matrix; const uint16_t *quant_matrix;
int block0; int block0;
...@@ -269,7 +269,7 @@ asm volatile( ...@@ -269,7 +269,7 @@ asm volatile(
static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
long nCoeffs; x86_reg nCoeffs;
const uint16_t *quant_matrix; const uint16_t *quant_matrix;
assert(s->block_last_index[n]>=0); assert(s->block_last_index[n]>=0);
...@@ -335,7 +335,7 @@ asm volatile( ...@@ -335,7 +335,7 @@ asm volatile(
static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
long nCoeffs; x86_reg nCoeffs;
const uint16_t *quant_matrix; const uint16_t *quant_matrix;
int block0; int block0;
...@@ -401,7 +401,7 @@ asm volatile( ...@@ -401,7 +401,7 @@ asm volatile(
static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
DCTELEM *block, int n, int qscale) DCTELEM *block, int n, int qscale)
{ {
long nCoeffs; x86_reg nCoeffs;
const uint16_t *quant_matrix; const uint16_t *quant_matrix;
assert(s->block_last_index[n]>=0); assert(s->block_last_index[n]>=0);
......
...@@ -95,7 +95,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -95,7 +95,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
DCTELEM *block, int n, DCTELEM *block, int n,
int qscale, int *overflow) int qscale, int *overflow)
{ {
long last_non_zero_p1; x86_reg last_non_zero_p1;
int level=0, q; //=0 is because gcc says uninitialized ... int level=0, q; //=0 is because gcc says uninitialized ...
const uint16_t *qmat, *bias; const uint16_t *qmat, *bias;
DECLARE_ALIGNED_16(int16_t, temp_block[64]); DECLARE_ALIGNED_16(int16_t, temp_block[64]);
......
...@@ -73,7 +73,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -73,7 +73,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
IDWTELEM * const dst = b+w2; IDWTELEM * const dst = b+w2;
i = 0; i = 0;
for(; (((long)&dst[i]) & 0x1F) && i<w_r; i++){ for(; (((x86_reg)&dst[i]) & 0x1F) && i<w_r; i++){
dst[i] = dst[i] - (b[i] + b[i + 1]); dst[i] = dst[i] - (b[i] + b[i + 1]);
} }
for(; i<w_r-15; i+=16){ for(; i<w_r-15; i+=16){
...@@ -146,7 +146,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -146,7 +146,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
IDWTELEM * const src = b+w2; IDWTELEM * const src = b+w2;
i = 0; i = 0;
for(; (((long)&temp[i]) & 0x1F) && i<w_r; i++){ for(; (((x86_reg)&temp[i]) & 0x1F) && i<w_r; i++){
temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS); temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS);
} }
for(; i<w_r-7; i+=8){ for(; i<w_r-7; i+=8){
...@@ -436,7 +436,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ ...@@ -436,7 +436,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
"movdqa %%"s3", %%"t3" \n\t" "movdqa %%"s3", %%"t3" \n\t"
void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
long i = width; x86_reg i = width;
while(i & 0x1F) while(i & 0x1F)
{ {
...@@ -534,7 +534,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ...@@ -534,7 +534,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
long i = width; x86_reg i = width;
while(i & 15) while(i & 15)
{ {
i--; i--;
...@@ -605,7 +605,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I ...@@ -605,7 +605,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I
#define snow_inner_add_yblock_sse2_header \ #define snow_inner_add_yblock_sse2_header \
IDWTELEM * * dst_array = sb->line + src_y;\ IDWTELEM * * dst_array = sb->line + src_y;\
long tmp;\ x86_reg tmp;\
asm volatile(\ asm volatile(\
"mov %7, %%"REG_c" \n\t"\ "mov %7, %%"REG_c" \n\t"\
"mov %6, %2 \n\t"\ "mov %6, %2 \n\t"\
...@@ -667,7 +667,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I ...@@ -667,7 +667,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I
"jnz 1b \n\t"\ "jnz 1b \n\t"\
:"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
:\ :\
"rm"((long)(src_x<<1)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"((x86_reg)b_h),"m"((x86_reg)src_stride):\
"%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
#define snow_inner_add_yblock_sse2_end_8\ #define snow_inner_add_yblock_sse2_end_8\
...@@ -684,8 +684,8 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I ...@@ -684,8 +684,8 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I
"dec %2 \n\t"\ "dec %2 \n\t"\
snow_inner_add_yblock_sse2_end_common2 snow_inner_add_yblock_sse2_end_common2
static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
snow_inner_add_yblock_sse2_header snow_inner_add_yblock_sse2_header
snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0") snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0")
snow_inner_add_yblock_sse2_accum_8("2", "8") snow_inner_add_yblock_sse2_accum_8("2", "8")
...@@ -732,8 +732,8 @@ snow_inner_add_yblock_sse2_accum_8("0", "136") ...@@ -732,8 +732,8 @@ snow_inner_add_yblock_sse2_accum_8("0", "136")
snow_inner_add_yblock_sse2_end_8 snow_inner_add_yblock_sse2_end_8
} }
static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
snow_inner_add_yblock_sse2_header snow_inner_add_yblock_sse2_header
snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0") snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0")
snow_inner_add_yblock_sse2_accum_16("2", "16") snow_inner_add_yblock_sse2_accum_16("2", "16")
...@@ -758,7 +758,7 @@ snow_inner_add_yblock_sse2_end_16 ...@@ -758,7 +758,7 @@ snow_inner_add_yblock_sse2_end_16
#define snow_inner_add_yblock_mmx_header \ #define snow_inner_add_yblock_mmx_header \
IDWTELEM * * dst_array = sb->line + src_y;\ IDWTELEM * * dst_array = sb->line + src_y;\
long tmp;\ x86_reg tmp;\
asm volatile(\ asm volatile(\
"mov %7, %%"REG_c" \n\t"\ "mov %7, %%"REG_c" \n\t"\
"mov %6, %2 \n\t"\ "mov %6, %2 \n\t"\
...@@ -815,11 +815,11 @@ snow_inner_add_yblock_sse2_end_16 ...@@ -815,11 +815,11 @@ snow_inner_add_yblock_sse2_end_16
"jnz 1b \n\t"\ "jnz 1b \n\t"\
:"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
:\ :\
"rm"((long)(src_x<<1)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"((x86_reg)b_h),"m"((x86_reg)src_stride):\
"%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
snow_inner_add_yblock_mmx_header snow_inner_add_yblock_mmx_header
snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
snow_inner_add_yblock_mmx_accum("2", "8", "0") snow_inner_add_yblock_mmx_accum("2", "8", "0")
...@@ -829,8 +829,8 @@ snow_inner_add_yblock_mmx_mix("0", "0") ...@@ -829,8 +829,8 @@ snow_inner_add_yblock_mmx_mix("0", "0")
snow_inner_add_yblock_mmx_end("16") snow_inner_add_yblock_mmx_end("16")
} }
static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
snow_inner_add_yblock_mmx_header snow_inner_add_yblock_mmx_header
snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
snow_inner_add_yblock_mmx_accum("2", "16", "0") snow_inner_add_yblock_mmx_accum("2", "16", "0")
......
...@@ -71,7 +71,7 @@ DECLARE_ALIGNED_16(const uint64_t, ff_pw_9) = 0x0009000900090009ULL; ...@@ -71,7 +71,7 @@ DECLARE_ALIGNED_16(const uint64_t, ff_pw_9) = 0x0009000900090009ULL;
/** Sacrifying mm6 allows to pipeline loads from src */ /** Sacrifying mm6 allows to pipeline loads from src */
static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
const uint8_t *src, long int stride, const uint8_t *src, x86_reg stride,
int rnd, int64_t shift) int rnd, int64_t shift)
{ {
asm volatile( asm volatile(
...@@ -107,7 +107,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, ...@@ -107,7 +107,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
* Data is already unpacked, so some operations can directly be made from * Data is already unpacked, so some operations can directly be made from
* memory. * memory.
*/ */
static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, long int stride, static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,
const int16_t *src, int rnd) const int16_t *src, int rnd)
{ {
int h = 8; int h = 8;
...@@ -152,7 +152,7 @@ static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, long int stride, ...@@ -152,7 +152,7 @@ static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, long int stride,
* Sacrify mm6 for *9 factor. * Sacrify mm6 for *9 factor.
*/ */
static void vc1_put_shift2_mmx(uint8_t *dst, const uint8_t *src, static void vc1_put_shift2_mmx(uint8_t *dst, const uint8_t *src,
long int stride, int rnd, long int offset) x86_reg stride, int rnd, x86_reg offset)
{ {
rnd = 8-rnd; rnd = 8-rnd;
asm volatile( asm volatile(
...@@ -259,7 +259,7 @@ DECLARE_ALIGNED_16(const uint64_t, ff_pw_18) = 0x0012001200120012ULL; ...@@ -259,7 +259,7 @@ DECLARE_ALIGNED_16(const uint64_t, ff_pw_18) = 0x0012001200120012ULL;
#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \ #define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) \
static void \ static void \
vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \
long int src_stride, \ x86_reg src_stride, \
int rnd, int64_t shift) \ int rnd, int64_t shift) \
{ \ { \
int h = 8; \ int h = 8; \
...@@ -314,7 +314,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ ...@@ -314,7 +314,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \
*/ */
#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4) \ #define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4) \
static void \ static void \
vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, long int stride, \ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \
const int16_t *src, int rnd) \ const int16_t *src, int rnd) \
{ \ { \
int h = 8; \ int h = 8; \
...@@ -353,7 +353,7 @@ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, long int stride, \ ...@@ -353,7 +353,7 @@ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, long int stride, \
#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4) \ #define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4) \
static void \ static void \
vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \
long int stride, int rnd, long int offset) \ x86_reg stride, int rnd, x86_reg offset) \
{ \ { \
int h = 8; \ int h = 8; \
src -= offset; \ src -= offset; \
...@@ -387,9 +387,9 @@ MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,% ...@@ -387,9 +387,9 @@ MSPEL_FILTER13_8B (shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%
MSPEL_FILTER13_VER_16B(shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )") MSPEL_FILTER13_VER_16B(shift3, "0(%1 )", "0(%1,%3 )", "0(%1,%3,2)", "0(%1,%4 )")
MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)") MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)")
typedef void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, long int src_stride, int rnd, int64_t shift); typedef void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t *src, x86_reg src_stride, int rnd, int64_t shift);
typedef void (*vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, long int dst_stride, const int16_t *src, int rnd); typedef void (*vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, x86_reg dst_stride, const int16_t *src, int rnd);
typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, long int stride, int rnd, long int offset); typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, x86_reg stride, int rnd, x86_reg offset);
/** /**
* Interpolates fractional pel values by applying proper vertical then * Interpolates fractional pel values by applying proper vertical then
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
# define REG_D "rdi" # define REG_D "rdi"
# define REG_S "rsi" # define REG_S "rsi"
# define PTR_SIZE "8" # define PTR_SIZE "8"
typedef int64_t x86_reg;
# define REG_SP "rsp" # define REG_SP "rsp"
# define REG_BP "rbp" # define REG_BP "rbp"
...@@ -50,6 +51,7 @@ ...@@ -50,6 +51,7 @@
# define REG_D "edi" # define REG_D "edi"
# define REG_S "esi" # define REG_S "esi"
# define PTR_SIZE "4" # define PTR_SIZE "4"
typedef int32_t x86_reg;
# define REG_SP "esp" # define REG_SP "esp"
# define REG_BP "ebp" # define REG_BP "ebp"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment