Commit a18c8560 authored by michael's avatar michael

vertical scaler with accurate rounding, some people on doom9 can see +-1 errors

the +-1 issue is limited to >2tap vertical filters, so bilinear upscale was unaffected
the new code is sometime faster sometimes slower but the difference is significant (~20%) so its optional and enabled with arnd=1


git-svn-id: file:///var/local/repositories/mplayer/trunk/libswscale@19177 b3059339-0415-0410-9bf9-f77b7e298cf2
parent 9a6a6693
......@@ -848,7 +848,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
dist-1.0);
}
static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
int srcW, int dstW, int filterAlign, int one, int flags,
SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
{
......@@ -1127,10 +1127,18 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
filterAlign = 1;
}
if (flags & SWS_CPU_CAPS_MMX) {
// special case for unscaled vertical filtering
if(minFilterSize == 1 && filterAlign == 2)
filterAlign= 1;
}
ASSERT(minFilterSize > 0)
filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
ASSERT(filterSize > 0)
filter= av_malloc(filterSize*dstW*sizeof(double));
if(filterSize >= MAX_FILTER_SIZE)
return -1;
*outFilterSize= filterSize;
if(flags&SWS_PRINT_INFO)
......@@ -1216,6 +1224,7 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
}
av_free(filter);
return 0;
}
#if defined(ARCH_X86) || defined(ARCH_X86_64)
......@@ -2115,6 +2124,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
/* precalculate vertical scaler filter coefficients */
{
const int filterAlign=
(flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
(flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
1;
......
......@@ -64,6 +64,7 @@ extern "C" {
//input subsampling info
#define SWS_FULL_CHR_H_INP 0x4000
#define SWS_DIRECT_BGR 0x8000
#define SWS_ACCURATE_RND 0x40000
#define SWS_CPU_CAPS_MMX 0x80000000
#define SWS_CPU_CAPS_MMX2 0x20000000
......
......@@ -126,7 +126,9 @@ typedef struct SwsContext{
#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, its hardcoded in the asm
#define ESP_OFFSET "11*8+4*4*256*2+8"
#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
#define U_TEMP "11*8+4*4*256*2+24"
#define V_TEMP "11*8+4*4*256*2+32"
uint64_t redDither __attribute__((aligned(8)));
uint64_t greenDither __attribute__((aligned(8)));
uint64_t blueDither __attribute__((aligned(8)));
......@@ -144,6 +146,8 @@ typedef struct SwsContext{
int dstW;
uint64_t esp __attribute__((aligned(8)));
uint64_t vRounder __attribute__((aligned(8)));
uint64_t u_temp __attribute__((aligned(8)));
uint64_t v_temp __attribute__((aligned(8)));
#ifdef HAVE_ALTIVEC
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment