Commit 21e54c72 authored by michael's avatar michael

yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
disabled broken (=new) converters (they cause random sig11)
various bugfixes


git-svn-id: file:///var/local/repositories/mplayer/trunk/postproc@6533 b3059339-0415-0410-9bf9-f77b7e298cf2
parent 49b05166
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
*/ */
/* /*
supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8, Y800 supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8, Y800, YVU9
supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32, Y8, Y800 supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32, Y8, Y800, YVU9
BGR15/16 support dithering BGR15/16 support dithering
unscaled special converters unscaled special converters
...@@ -106,10 +106,10 @@ untested special converters ...@@ -106,10 +106,10 @@ untested special converters
#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \ #define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
|| (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\ || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
|| (x)==IMGFMT_Y800) || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \ #define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
|| (x)==IMGFMT_Y800) || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
#define isRGB(x) (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB) #define isRGB(x) (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
#define isBGR(x) (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR) #define isBGR(x) (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
#define isPacked(x) ((x)==IMGFMT_YUY2 || isRGB(x) || isBGR(x)) #define isPacked(x) ((x)==IMGFMT_YUY2 || isRGB(x) || isBGR(x))
...@@ -265,6 +265,7 @@ void in_asm_used_var_warning_killer() ...@@ -265,6 +265,7 @@ void in_asm_used_var_warning_killer()
#endif #endif
static int testFormat[]={ static int testFormat[]={
IMGFMT_YVU9,
IMGFMT_YV12, IMGFMT_YV12,
//IMGFMT_IYUV, //IMGFMT_IYUV,
IMGFMT_I420, IMGFMT_I420,
...@@ -335,7 +336,7 @@ static void doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcForma ...@@ -335,7 +336,7 @@ static void doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcForma
ssdU/= w*h/4; ssdU/= w*h/4;
ssdV/= w*h/4; ssdV/= w*h/4;
if(ssdY>10 || ssdU>10 || ssdV>10){ if(ssdY>100 || ssdU>50 || ssdV>50){
printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n", printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n",
vo_format_name(srcFormat), srcW, srcH, vo_format_name(srcFormat), srcW, srcH,
vo_format_name(dstFormat), dstW, dstH, vo_format_name(dstFormat), dstW, dstH,
...@@ -382,13 +383,13 @@ static void selfTest(uint8_t *src[3], int stride[3], int w, int h){ ...@@ -382,13 +383,13 @@ static void selfTest(uint8_t *src[3], int stride[3], int w, int h){
} }
} }
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, static inline void yuv2yuvXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW) uint8_t *dest, uint8_t *uDest, uint8_t *vDest)
{ {
//FIXME Optimize (just quickly writen not opti..) //FIXME Optimize (just quickly writen not opti..)
int i; int i;
for(i=0; i<dstW; i++) for(i=0; i<c->dstW; i++)
{ {
int val=0; int val=0;
int j; int j;
...@@ -399,7 +400,7 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt ...@@ -399,7 +400,7 @@ static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
} }
if(uDest != NULL) if(uDest != NULL)
for(i=0; i<(dstW>>1); i++) for(i=0; i<c->chrDstW; i++)
{ {
int u=0; int u=0;
int v=0; int v=0;
...@@ -1657,7 +1658,7 @@ static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i ...@@ -1657,7 +1658,7 @@ static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], i
* bring pointers in YUV order instead of YVU * bring pointers in YUV order instead of YVU
*/ */
static inline void orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){ static inline void orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
if(format == IMGFMT_YV12){ if(format == IMGFMT_YV12 || format == IMGFMT_YVU9){
sortedP[0]= p[0]; sortedP[0]= p[0];
sortedP[1]= p[1]; sortedP[1]= p[1];
sortedP[2]= p[2]; sortedP[2]= p[2];
...@@ -1726,14 +1727,14 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], ...@@ -1726,14 +1727,14 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[],
int plane; int plane;
for(plane=0; plane<3; plane++) for(plane=0; plane<3; plane++)
{ {
int length= plane==0 ? c->srcW : ((c->srcW+1)>>1); int length= plane==0 ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
int y= plane==0 ? srcSliceY: ((srcSliceY+1)>>1); int y= plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
int height= plane==0 ? srcSliceH: ((srcSliceH+1)>>1); int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0) if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
{ {
if(!isGray(c->dstFormat)) if(!isGray(c->dstFormat))
memset(dst[plane], 0, dstStride[plane]*height); memset(dst[plane], 128, dstStride[plane]*height);
} }
else else
{ {
...@@ -1774,6 +1775,7 @@ static void getSubSampleFactors(int *h, int *v, int format){ ...@@ -1774,6 +1775,7 @@ static void getSubSampleFactors(int *h, int *v, int format){
break; break;
case IMGFMT_YV12: case IMGFMT_YV12:
case IMGFMT_I420: case IMGFMT_I420:
case IMGFMT_Y800: //FIXME remove after different subsamplings are fully implemented
*h=1; *h=1;
*v=1; *v=1;
break; break;
...@@ -1801,7 +1803,8 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -1801,7 +1803,8 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
asm volatile("emms\n\t"::: "memory"); asm volatile("emms\n\t"::: "memory");
#endif #endif
if(swScale==NULL) globalInit(); if(swScale==NULL) globalInit();
//srcFormat= IMGFMT_Y800;
//srcFormat= IMGFMT_YVU9;
/* avoid dupplicate Formats, so we dont need to check to much */ /* avoid dupplicate Formats, so we dont need to check to much */
srcFormat = remove_dup_fourcc(srcFormat); srcFormat = remove_dup_fourcc(srcFormat);
dstFormat = remove_dup_fourcc(dstFormat); dstFormat = remove_dup_fourcc(dstFormat);
...@@ -1853,6 +1856,38 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -1853,6 +1856,38 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesFilter=1; if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesFilter=1;
if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesFilter=1; if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesFilter=1;
getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
// reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
// drop eery 2. pixel for chroma calculation unless user wants full chroma
if((isBGR(srcFormat) || isRGB(srcFormat) || srcFormat==IMGFMT_YUY2) && !(flags&SWS_FULL_CHR_V))
c->chrSrcVSubSample=1;
// drop eery 2. pixel for chroma calculation unless user wants full chroma
if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP))
c->chrSrcHSubSample=1;
c->chrIntHSubSample= c->chrDstHSubSample;
c->chrIntVSubSample= c->chrSrcVSubSample;
// note the -((-x)>>y) is so that we allways round toward +inf
c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
/* printf("%d %d %d %d / %d %d %d %d //\n",
c->chrSrcW,
c->chrSrcH,
c->chrDstW,
c->chrDstH,
srcW,
srcH,
dstW,
dstH);*/
/* unscaled special Cases */ /* unscaled special Cases */
if(unscaled && !usesFilter) if(unscaled && !usesFilter)
{ {
...@@ -1877,7 +1912,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -1877,7 +1912,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
return c; return c;
} }
/* yuv2bgr */ /* yuv2bgr */
if(isPlanarYUV(srcFormat) && isBGR(dstFormat)) if((srcFormat==IMGFMT_YV12 || srcFormat==IMGFMT_I420) && isBGR(dstFormat))
{ {
// FIXME multiple yuv2rgb converters wont work that way cuz that thing is full of globals&statics // FIXME multiple yuv2rgb converters wont work that way cuz that thing is full of globals&statics
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
...@@ -1895,10 +1930,14 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -1895,10 +1930,14 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
#if 1
/* simple copy */ /* simple copy */
if(srcFormat == dstFormat if( srcFormat == dstFormat
|| ((isPlanarYUV(srcFormat)||isGray(srcFormat)) && (isPlanarYUV(dstFormat)||isGray(dstFormat)))) || (srcFormat==IMGFMT_YV12 && dstFormat==IMGFMT_I420)
|| (srcFormat==IMGFMT_I420 && dstFormat==IMGFMT_YV12)
|| (isPlanarYUV(srcFormat) && isGray(dstFormat))
|| (isPlanarYUV(dstFormat) && isGray(srcFormat))
)
{ {
c->swScale= simpleCopy; c->swScale= simpleCopy;
...@@ -1907,7 +1946,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -1907,7 +1946,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
#endif
/* bgr32to24 & rgb32to24*/ /* bgr32to24 & rgb32to24*/
if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR24) if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR24)
||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB24)) ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB24))
...@@ -2003,6 +2042,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -2003,6 +2042,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
return c; return c;
} }
#if 0 //segfaults
/* bgr15to32 */ /* bgr15to32 */
if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR32) if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR32)
||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB32)) ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB32))
...@@ -2014,7 +2054,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -2014,7 +2054,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
#endif
/* bgr16to24 */ /* bgr16to24 */
if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR24) if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR24)
||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB24)) ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB24))
...@@ -2027,6 +2067,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -2027,6 +2067,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
return c; return c;
} }
#if 0 //segfaults
/* bgr16to32 */ /* bgr16to32 */
if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR32) if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR32)
||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB32)) ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB32))
...@@ -2038,7 +2079,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -2038,7 +2079,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
#endif
/* bgr24toYV12 */ /* bgr24toYV12 */
if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12) if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
{ {
...@@ -2063,37 +2104,6 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, ...@@ -2063,37 +2104,6 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
else else
c->canMMX2BeUsed=0; c->canMMX2BeUsed=0;
getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
// reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
// drop eery 2. pixel for chroma calculation unless user wants full chroma
if((isBGR(srcFormat) || isRGB(srcFormat) || srcFormat==IMGFMT_YUY2) && !(flags&SWS_FULL_CHR_V))
c->chrSrcVSubSample=1;
// drop eery 2. pixel for chroma calculation unless user wants full chroma
if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP))
c->chrSrcHSubSample=1;
c->chrIntHSubSample= c->chrDstHSubSample;
c->chrIntVSubSample= c->chrSrcVSubSample;
// note the -((-x)>>y) is so that we allways round toward +inf
c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
/* printf("%d %d %d %d / %d %d %d %d //\n",
c->chrSrcW,
c->chrSrcH,
c->chrDstW,
c->chrDstH,
srcW,
srcH,
dstW,
dstH);*/
c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW; c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH; c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
...@@ -2126,7 +2136,7 @@ dstH);*/ ...@@ -2126,7 +2136,7 @@ dstH);*/
srcW , dstW, filterAlign, 1<<14, flags, srcW , dstW, filterAlign, 1<<14, flags,
srcFilter->lumH, dstFilter->lumH); srcFilter->lumH, dstFilter->lumH);
initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
(srcW+1)>>1, c->chrDstW, filterAlign, 1<<14, flags, c->chrSrcW, c->chrDstW, filterAlign, 1<<14, flags,
srcFilter->chrH, dstFilter->chrH); srcFilter->chrH, dstFilter->chrH);
#ifdef ARCH_X86 #ifdef ARCH_X86
...@@ -2151,7 +2161,7 @@ dstH);*/ ...@@ -2151,7 +2161,7 @@ dstH);*/
srcH , dstH, 1, (1<<12)-4, flags, srcH , dstH, 1, (1<<12)-4, flags,
srcFilter->lumV, dstFilter->lumV); srcFilter->lumV, dstFilter->lumV);
initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
(srcH+1)>>1, c->chrDstH, 1, (1<<12)-4, flags, c->chrSrcH, c->chrDstH, 1, (1<<12)-4, flags,
srcFilter->chrV, dstFilter->chrV); srcFilter->chrV, dstFilter->chrV);
// Calculate Buffer Sizes so that they wont run out while handling these damn slices // Calculate Buffer Sizes so that they wont run out while handling these damn slices
...@@ -2161,12 +2171,12 @@ dstH);*/ ...@@ -2161,12 +2171,12 @@ dstH);*/
{ {
int chrI= i*c->chrDstH / dstH; int chrI= i*c->chrDstH / dstH;
int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<1)); ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
nextSlice&= ~1; // Slices start at even boundaries nextSlice&= ~3; // Slices start at boundaries which are divisable through 4
if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice) if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice)
c->vLumBufSize= nextSlice - c->vLumFilterPos[i ]; c->vLumBufSize= nextSlice - c->vLumFilterPos[i ];
if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>1)) if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
c->vChrBufSize= (nextSlice>>1) - c->vChrFilterPos[chrI]; c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
} }
// allocate pixbufs (we use dynamic allocation because otherwise we would need to // allocate pixbufs (we use dynamic allocation because otherwise we would need to
......
...@@ -655,7 +655,7 @@ ...@@ -655,7 +655,7 @@
static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW,
int16_t * lumMmxFilter, int16_t * chrMmxFilter) int16_t * lumMmxFilter, int16_t * chrMmxFilter)
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
...@@ -664,14 +664,14 @@ static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lu ...@@ -664,14 +664,14 @@ static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lu
asm volatile( asm volatile(
YSCALEYUV2YV12X(0) YSCALEYUV2YV12X(0)
:: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize),
"r" (chrMmxFilter+chrFilterSize*4), "r" (uDest), "m" (dstW>>1) "r" (chrMmxFilter+chrFilterSize*4), "r" (uDest), "m" (chrDstW)
: "%eax", "%edx", "%esi" : "%eax", "%edx", "%esi"
); );
asm volatile( asm volatile(
YSCALEYUV2YV12X(4096) YSCALEYUV2YV12X(4096)
:: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize),
"r" (chrMmxFilter+chrFilterSize*4), "r" (vDest), "m" (dstW>>1) "r" (chrMmxFilter+chrFilterSize*4), "r" (vDest), "m" (chrDstW)
: "%eax", "%edx", "%esi" : "%eax", "%edx", "%esi"
); );
} }
...@@ -683,29 +683,29 @@ static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lu ...@@ -683,29 +683,29 @@ static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lu
: "%eax", "%edx", "%esi" : "%eax", "%edx", "%esi"
); );
#else #else
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, yuv2yuvXinC(c, lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize, chrFilter, chrSrc, chrFilterSize,
dest, uDest, vDest, dstW); dest, uDest, vDest);
#endif #endif
} }
static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW) uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(uDest != NULL) if(uDest != NULL)
{ {
asm volatile( asm volatile(
YSCALEYUV2YV121 YSCALEYUV2YV121
:: "r" (chrSrc + (dstW>>1)), "r" (uDest + (dstW>>1)), :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
"g" (-(dstW>>1)) "g" (-chrDstW)
: "%eax" : "%eax"
); );
asm volatile( asm volatile(
YSCALEYUV2YV121 YSCALEYUV2YV121
:: "r" (chrSrc + 2048 + (dstW>>1)), "r" (vDest + (dstW>>1)), :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
"g" (-(dstW>>1)) "g" (-chrDstW)
: "%eax" : "%eax"
); );
} }
...@@ -731,7 +731,7 @@ static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, ...@@ -731,7 +731,7 @@ static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
} }
if(uDest != NULL) if(uDest != NULL)
for(i=0; i<(dstW>>1); i++) for(i=0; i<chrDstW; i++)
{ {
int u=chrSrc[i]>>7; int u=chrSrc[i]>>7;
int v=chrSrc[i + 2048]>>7; int v=chrSrc[i + 2048]>>7;
...@@ -2582,6 +2582,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar ...@@ -2582,6 +2582,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
const int dstW= c->dstW; const int dstW= c->dstW;
const int dstH= c->dstH; const int dstH= c->dstH;
const int chrDstW= c->chrDstW; const int chrDstW= c->chrDstW;
const int chrSrcW= c->chrSrcW;
const int lumXInc= c->lumXInc; const int lumXInc= c->lumXInc;
const int chrXInc= c->chrXInc; const int chrXInc= c->chrXInc;
const int dstFormat= c->dstFormat; const int dstFormat= c->dstFormat;
...@@ -2609,6 +2610,8 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar ...@@ -2609,6 +2610,8 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
uint8_t *funnyYCode= c->funnyYCode; uint8_t *funnyYCode= c->funnyYCode;
uint8_t *funnyUVCode= c->funnyUVCode; uint8_t *funnyUVCode= c->funnyUVCode;
uint8_t *formatConvBuffer= c->formatConvBuffer; uint8_t *formatConvBuffer= c->formatConvBuffer;
const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
/* vars whch will change and which we need to storw back in the context */ /* vars whch will change and which we need to storw back in the context */
int dstY= c->dstY; int dstY= c->dstY;
...@@ -2629,7 +2632,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar ...@@ -2629,7 +2632,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
srcStride[1]= srcStrideParam[2]; srcStride[1]= srcStrideParam[2];
srcStride[2]= srcStrideParam[1]; srcStride[2]= srcStrideParam[1];
} }
else if(c->srcFormat==IMGFMT_YV12){ else if(c->srcFormat==IMGFMT_YV12 || c->srcFormat==IMGFMT_YVU9){
src[0]= srcParam[0]; src[0]= srcParam[0];
src[1]= srcParam[1]; src[1]= srcParam[1];
src[2]= srcParam[2]; src[2]= srcParam[2];
...@@ -2726,7 +2729,7 @@ i--; ...@@ -2726,7 +2729,7 @@ i--;
ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1) ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1)
// Do we have enough lines in this slice to output the dstY line // Do we have enough lines in this slice to output the dstY line
if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < ((srcSliceY + srcSliceH + 1)>>1)) if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample))
{ {
//Do horizontal scaling //Do horizontal scaling
while(lastInLumBuf < lastLumSrcY) while(lastInLumBuf < lastLumSrcY)
...@@ -2746,16 +2749,16 @@ i--; ...@@ -2746,16 +2749,16 @@ i--;
} }
while(lastInChrBuf < lastChrSrcY) while(lastInChrBuf < lastChrSrcY)
{ {
uint8_t *src1= src[1]+(lastInChrBuf + 1 - (srcSliceY>>1))*srcStride[1]; uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
uint8_t *src2= src[2]+(lastInChrBuf + 1 - (srcSliceY>>1))*srcStride[2]; uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
chrBufIndex++; chrBufIndex++;
ASSERT(chrBufIndex < 2*vChrBufSize) ASSERT(chrBufIndex < 2*vChrBufSize)
ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) < ((srcSliceH+1)>>1)) ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH))
ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) >= 0) ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
//FIXME replace parameters through context struct (some at least) //FIXME replace parameters through context struct (some at least)
if(!(isGray(srcFormat) || isGray(dstFormat))) if(!(isGray(srcFormat) || isGray(dstFormat)))
RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc, RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
funnyUVCode, c->srcFormat, formatConvBuffer, funnyUVCode, c->srcFormat, formatConvBuffer,
c->chrMmx2Filter, c->chrMmx2FilterPos); c->chrMmx2Filter, c->chrMmx2FilterPos);
...@@ -2770,8 +2773,8 @@ i--; ...@@ -2770,8 +2773,8 @@ i--;
/* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n", /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n",
firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY, firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY,
lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize, lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize,
vChrBufSize, vLumBufSize); vChrBufSize, vLumBufSize);*/
*/
//Do horizontal scaling //Do horizontal scaling
while(lastInLumBuf+1 < srcSliceY + srcSliceH) while(lastInLumBuf+1 < srcSliceY + srcSliceH)
{ {
...@@ -2786,17 +2789,17 @@ i--; ...@@ -2786,17 +2789,17 @@ i--;
c->lumMmx2Filter, c->lumMmx2FilterPos); c->lumMmx2Filter, c->lumMmx2FilterPos);
lastInLumBuf++; lastInLumBuf++;
} }
while(lastInChrBuf+1 < ((srcSliceY + srcSliceH)>>1)) while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
{ {
uint8_t *src1= src[1]+(lastInChrBuf + 1 - (srcSliceY>>1))*srcStride[1]; uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
uint8_t *src2= src[2]+(lastInChrBuf + 1 - (srcSliceY>>1))*srcStride[2]; uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
chrBufIndex++; chrBufIndex++;
ASSERT(chrBufIndex < 2*vChrBufSize) ASSERT(chrBufIndex < 2*vChrBufSize)
ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) < ((srcSliceH+1)>>1)) ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH)
ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) >= 0) ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0)
if(!(isGray(srcFormat) || isGray(dstFormat))) if(!(isGray(srcFormat) || isGray(dstFormat)))
RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc, RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
funnyUVCode, c->srcFormat, formatConvBuffer, funnyUVCode, c->srcFormat, formatConvBuffer,
c->chrMmx2Filter, c->chrMmx2FilterPos); c->chrMmx2Filter, c->chrMmx2FilterPos);
...@@ -2823,7 +2826,7 @@ i--; ...@@ -2823,7 +2826,7 @@ i--;
{ {
int16_t *lumBuf = lumPixBuf[0]; int16_t *lumBuf = lumPixBuf[0];
int16_t *chrBuf= chrPixBuf[0]; int16_t *chrBuf= chrPixBuf[0];
RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW); RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
} }
else //General YV12 else //General YV12
{ {
...@@ -2831,9 +2834,9 @@ i--; ...@@ -2831,9 +2834,9 @@ i--;
int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
RENAME(yuv2yuvX)( RENAME(yuv2yuvX)(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+(dstY>>1)*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, vDest, dstW, dest, uDest, vDest, dstW, chrDstW,
lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+(dstY>>1)*vChrFilterSize*4); lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+chrDstY*vChrFilterSize*4);
} }
} }
else else
...@@ -2874,11 +2877,11 @@ i--; ...@@ -2874,11 +2877,11 @@ i--;
int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
if(isPlanarYUV(dstFormat)) //YV12 if(isPlanarYUV(dstFormat)) //YV12
{ {
if(dstY&1) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if(dstY&1) uDest=vDest= NULL;
yuv2yuvXinC( yuv2yuvXinC(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+(dstY>>1)*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, vDest, dstW); dest, uDest, vDest);
} }
else else
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment