Commit 8622c96c authored by michael's avatar michael

Fix accurate rounding mode on x86_64.

Fixes issue222.


git-svn-id: file:///var/local/repositories/mplayer/trunk/libswscale@27545 b3059339-0415-0410-9bf9-f77b7e298cf2
parent f9cb402d
...@@ -1293,7 +1293,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF ...@@ -1293,7 +1293,7 @@ static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outF
filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1)); filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
assert(filterSize > 0); assert(filterSize > 0);
filter= av_malloc(filterSize*dstW*sizeof(double)); filter= av_malloc(filterSize*dstW*sizeof(double));
if (filterSize >= MAX_FILTER_SIZE || !filter) if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
goto error; goto error;
*outFilterSize= filterSize; *outFilterSize= filterSize;
......
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
#include "libavutil/avutil.h" #include "libavutil/avutil.h"
#define STR(s) AV_TOSTRING(s) //AV_STINGIFY is too long
#define MAX_FILTER_SIZE 256 #define MAX_FILTER_SIZE 256
#define VOFW 2048 #define VOFW 2048
...@@ -40,6 +42,16 @@ ...@@ -40,6 +42,16 @@
#define ALT32_CORR 1 #define ALT32_CORR 1
#endif #endif
#ifdef ARCH_X86_64
# define APCK_PTR2 8
# define APCK_COEF 16
# define APCK_SIZE 24
#else
# define APCK_PTR2 4
# define APCK_COEF 8
# define APCK_SIZE 16
#endif
typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]); int srcSliceH, uint8_t* dst[], int dstStride[]);
......
...@@ -119,19 +119,19 @@ ...@@ -119,19 +119,19 @@
"1: \n\t"\ "1: \n\t"\
"movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\ "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\
"movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\ "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
"movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\ "movq " x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\
"movq %%mm0, %%mm3 \n\t"\ "movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\ "punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\ "punpckhwd %%mm1, %%mm3 \n\t"\
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\
"pmaddwd %%mm1, %%mm0 \n\t"\ "pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\ "pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\ "paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\
"movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\ "movq 8+" x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
"add $16, %%"REG_d" \n\t"\ "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\
...@@ -271,19 +271,19 @@ ...@@ -271,19 +271,19 @@
"2: \n\t"\ "2: \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ "movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ "movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\
"movq %%mm0, %%mm3 \n\t"\ "movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\ "punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\ "punpckhwd %%mm1, %%mm3 \n\t"\
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\ "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\
"pmaddwd %%mm1, %%mm0 \n\t"\ "pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\ "pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\ "paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
"add $16, %%"REG_d" \n\t"\ "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\
...@@ -315,19 +315,19 @@ ...@@ -315,19 +315,19 @@
"2: \n\t"\ "2: \n\t"\
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ "movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ "movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
"movq %%mm0, %%mm3 \n\t"\ "movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm4, %%mm0 \n\t"\ "punpcklwd %%mm4, %%mm0 \n\t"\
"punpckhwd %%mm4, %%mm3 \n\t"\ "punpckhwd %%mm4, %%mm3 \n\t"\
"movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\
"pmaddwd %%mm4, %%mm0 \n\t"\ "pmaddwd %%mm4, %%mm0 \n\t"\
"pmaddwd %%mm4, %%mm3 \n\t"\ "pmaddwd %%mm4, %%mm3 \n\t"\
"paddd %%mm0, %%mm1 \n\t"\ "paddd %%mm0, %%mm1 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
"add $16, %%"REG_d" \n\t"\ "add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\ "test %%"REG_S", %%"REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\
...@@ -3180,18 +3180,19 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -3180,18 +3180,19 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
#ifdef HAVE_MMX #ifdef HAVE_MMX
int i; int i;
if (flags & SWS_ACCURATE_RND){ if (flags & SWS_ACCURATE_RND){
int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2){ for (i=0; i<vLumFilterSize; i+=2){
lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ]; *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)]; *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[2*i+2]= lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ] lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
} }
for (i=0; i<vChrFilterSize; i+=2){ for (i=0; i<vChrFilterSize; i+=2){
chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ]; *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)]; *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[2*i+2]= chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ] chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
} }
}else{ }else{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment