YV12 like formats support (I420/IYUV)

bugfixes related to vo_x11 resizeing git-svn-id: file:///var/local/repositories/mplayer/trunk/postproc@4420 b3059339-0415-0410-9bf9-f77b7e298cf2

YV12 like formats support (I420/IYUV)
bugfixes related to vo_x11 resizeing git-svn-id: file:///var/local/repositories/mplayer/trunk/postproc@4420 b3059339-0415-0410-9bf9-f77b7e298cf2
00c63c9b · michael · 77796b2c · 00c63c9b · 00c63c9b · 00c63c9b
Commit 00c63c9b authored Jan 30, 2002 by michael
Show whitespace changes
Inline Side-by-side

Showing with 183 additions and 128 deletions

swscale.c swscale.c +142 -123

swscale.h swscale.h +1 -0

swscale_template.c swscale_template.c +40 -5

No files found.
--- a/swscale.c
+++ b/swscale.c
@@ -17,8 +17,9 @@
 */

 /*
-  supported Input formats: YV12 (grayscale soon too)
-  supported output formats: YV12, BGR15, BGR16, BGR24, BGR32 (grayscale soon too)
+  supported Input formats: YV12, I420, IYUV (grayscale soon too)
+  supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too)
+  BGR15/16 support dithering
 */

 #include <inttypes.h>
@@ -56,6 +57,12 @@
 #define PI 3.14159265358979323846
 #endif

+//FIXME replace this with something faster
+#define isYUV(x)       ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+#define isHalfChrV(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+#define isHalfChrH(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+
 extern int verbose; // defined in mplayer.c
 /*
 NOTES
@@ -63,8 +70,6 @@ NOTES
 known BUGS with known cause (no bugreports please!, but patches are welcome :) )
 horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11)

-Supported output formats BGR15 BGR16 BGR24 BGR32 YV12
-BGR15 & BGR16 MMX verions support dithering
 Special versions: fast Y 1:1 scaling (no interpolation in y direction)

 TODO
@@ -507,7 +512,7 @@ static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilt
 // minor note: the HAVE_xyz is messed up after that line so dont use it


-// old global scaler, dont use for new code, unless it uses only the stuff from the command line
+// old global scaler, dont use for new code
 // will use sws_flags from the command line
 void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,
 			     int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp,
@@ -515,18 +520,40 @@ void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,

 	static SwsContext *context=NULL;
 	int dstFormat;
-	int flags=0;
-	static int firstTime=1;
 	int dstStride3[3]= {dstStride, dstStride>>1, dstStride>>1};

-	if(firstTime)
+	switch(dstbpp)
 	{
+		case 8 : dstFormat= IMGFMT_Y8;		break;
+		case 12: dstFormat= IMGFMT_YV12;	break;
+		case 15: dstFormat= IMGFMT_BGR15;	break;
+		case 16: dstFormat= IMGFMT_BGR16;	break;
+		case 24: dstFormat= IMGFMT_BGR24;	break;
+		case 32: dstFormat= IMGFMT_BGR32;	break;
+		default: return;
+	}
+
+	if(!context) context=getSwsContextFromCmdLine(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat);
+
+	swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3);
+}
+
+// will use sws_flags & src_filter (from cmd line)
+SwsContext *getSwsContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat)
+{
+	int flags=0;
+	static int firstTime=1;
+
 #ifdef ARCH_X86
 	if(gCpuCaps.hasMMX)
 		asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
 #endif
-		flags= SWS_PRINT_INFO;
+	if(firstTime)
+	{
 		firstTime=0;
+		flags= SWS_PRINT_INFO;
+	}
+	else if(verbose>1) flags= SWS_PRINT_INFO;

 	if(src_filter.lumH) freeVec(src_filter.lumH);
 	if(src_filter.lumV) freeVec(src_filter.lumV);
@@ -584,18 +611,6 @@ void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,

 	if(verbose > 1) printVec(src_filter.chrH);
 	if(verbose > 1) printVec(src_filter.lumH);
-	}
-
-	switch(dstbpp)
-	{
-		case 8 : dstFormat= IMGFMT_Y8;		break;
-		case 12: dstFormat= IMGFMT_YV12;	break;
-		case 15: dstFormat= IMGFMT_BGR15;	break;
-		case 16: dstFormat= IMGFMT_BGR16;	break;
-		case 24: dstFormat= IMGFMT_BGR24;	break;
-		case 32: dstFormat= IMGFMT_BGR32;	break;
-		default: return;
-	}

 	switch(sws_flags)
 	{
@@ -608,12 +623,10 @@ void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,
 		default:flags|= SWS_BILINEAR; break;
 	}

-	if(!context) context=getSwsContext(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat, flags, &src_filter, NULL);
-
-
-	swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3);
+	return getSwsContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, &src_filter, NULL);
 }

+
 static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
 			      int srcW, int dstW, int filterAlign, int one, int flags,
 			      SwsVector *srcFilter, SwsVector *dstFilter)
@@ -629,7 +642,9 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
 		asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
 #endif

-	*filterPos = (int16_t*)memalign(8, dstW*sizeof(int16_t));
+	*filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
+	(*filterPos)[dstW]=0; // the MMX scaler will read over the end 
+
 	if(ABS(xInc - 0x10000) <10) // unscaled
 	{
 		int i;
@@ -846,18 +861,26 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
 		if(min>minFilterSize) minFilterSize= min;
 	}

+	filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
+	filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
+	*outFilterSize= filterSize;
+
+	if((flags&SWS_PRINT_INFO) && verbose)
+		printf("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
 	/* try to reduce the filter-size (step2 reduce it) */
 	for(i=0; i<dstW; i++)
 	{
 		int j;

-		for(j=0; j<minFilterSize; j++)
-			filter2[i*minFilterSize + j]= filter2[i*filter2Size + j];
+		for(j=0; j<filterSize; j++)
+		{
+			if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
+			else		   filter[i*filterSize + j]= filter2[i*filter2Size + j];
 		}
-	if((flags&SWS_PRINT_INFO) && verbose)
-		printf("SwScaler: reducing filtersize %d -> %d\n", filter2Size, minFilterSize);
-	filter2Size= minFilterSize;
-	ASSERT(filter2Size > 0)
+	}
+	free(filter2); filter2=NULL;
+	
+	ASSERT(filterSize > 0)

 	//FIXME try to align filterpos if possible

@@ -868,33 +891,32 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
 		if((*filterPos)[i] < 0)
 		{
 			// Move filter coeffs left to compensate for filterPos
-			for(j=1; j<filter2Size; j++)
+			for(j=1; j<filterSize; j++)
 			{
 				int left= MAX(j + (*filterPos)[i], 0);
-				filter2[i*filter2Size + left] += filter2[i*filter2Size + j];
-				filter2[i*filter2Size + j]=0;
+				filter[i*filterSize + left] += filter[i*filterSize + j];
+				filter[i*filterSize + j]=0;
 			}
 			(*filterPos)[i]= 0;
 		}

-		if((*filterPos)[i] + filter2Size > srcW)
+		if((*filterPos)[i] + filterSize > srcW)
 		{
-			int shift= (*filterPos)[i] + filter2Size - srcW;
+			int shift= (*filterPos)[i] + filterSize - srcW;
 			// Move filter coeffs right to compensate for filterPos
-			for(j=filter2Size-2; j>=0; j--)
+			for(j=filterSize-2; j>=0; j--)
 			{
-				int right= MIN(j + shift, filter2Size-1);
-				filter2[i*filter2Size +right] += filter2[i*filter2Size +j];
-				filter2[i*filter2Size +j]=0;
+				int right= MIN(j + shift, filterSize-1);
+				filter[i*filterSize +right] += filter[i*filterSize +j];
+				filter[i*filterSize +j]=0;
 			}
-			(*filterPos)[i]= srcW - filter2Size;
+			(*filterPos)[i]= srcW - filterSize;
 		}
 	}

-
-	*outFilterSize= (filter2Size +(filterAlign-1)) & (~(filterAlign-1));
-	*outFilter= (int16_t*)memalign(8, *outFilterSize*dstW*sizeof(int16_t));
-	memset(*outFilter, 0, *outFilterSize*dstW*sizeof(int16_t));
+	// Note the +1 is for the MMXscaler which reads over the end
+	*outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
+	memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));

 	/* Normalize & Store in outFilter */
 	for(i=0; i<dstW; i++)
@@ -902,18 +924,18 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
 		int j;
 		double sum=0;
 		double scale= one;
-		for(j=0; j<filter2Size; j++)
+		for(j=0; j<filterSize; j++)
 		{
-			sum+= filter2[i*filter2Size + j];
+			sum+= filter[i*filterSize + j];
 		}
 		scale/= sum;
-		for(j=0; j<filter2Size; j++)
+		for(j=0; j<filterSize; j++)
 		{
-			(*outFilter)[i*(*outFilterSize) + j]= (int)(filter2[i*filter2Size + j]*scale);
+			(*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale);
 		}
 	}

-	free(filter2);
+	free(filter);
 }

 #ifdef ARCH_X86
@@ -1069,7 +1091,6 @@ cpuCaps= gCpuCaps;
 SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
                         SwsFilter *srcFilter, SwsFilter *dstFilter){

-	const int widthAlign= dstFormat==IMGFMT_YV12 ? 16 : 8;
 	SwsContext *c;
 	int i;
 	SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
@@ -1082,17 +1103,10 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
 	if(swScale==NULL) globalInit();

 	/* sanity check */
-	if(srcW<1 || srcH<1 || dstW<1 || dstH<1) return NULL;
+	if(srcW<4 || srcH<1 || dstW<8 || dstH<1) return NULL; //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
+	
+	if(srcFormat!=IMGFMT_YV12 && srcFormat!=IMGFMT_I420 && srcFormat!=IMGFMT_IYUV) return NULL;

-/* FIXME
-	if(dstStride[0]%widthAlign !=0 )
-	{
-		if(flags & SWS_PRINT_INFO)
-			fprintf(stderr, "SwScaler: Warning: dstStride is not a multiple of %d!\n"
-					"SwScaler:          ->cannot do aligned memory acesses anymore\n",
-					widthAlign);
-	}
-*/
 	if(!dstFilter) dstFilter= &dummyFilter;
 	if(!srcFilter) srcFilter= &dummyFilter;

@@ -1135,13 +1149,14 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
 	}

 	/* set chrXInc & chrDstW */
-	if((flags&SWS_FULL_UV_IPOL) && dstFormat!=IMGFMT_YV12)
+	if((flags&SWS_FULL_UV_IPOL) && !isHalfChrH(dstFormat))
 		c->chrXInc= c->lumXInc>>1, c->chrDstW= dstW;
 	else
 		c->chrXInc= c->lumXInc,    c->chrDstW= (dstW+1)>>1;

 	/* set chrYInc & chrDstH */
-	if(dstFormat==IMGFMT_YV12)	c->chrYInc= c->lumYInc,    c->chrDstH= (dstH+1)>>1;
+	if(isHalfChrV(dstFormat))
+		c->chrYInc= c->lumYInc,    c->chrDstH= (dstH+1)>>1;
 	else	c->chrYInc= c->lumYInc>>1, c->chrDstH= dstH;

 	/* precalculate horizontal scaler filter coefficients */
@@ -1191,9 +1206,9 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
 	}

 	// allocate pixbufs (we use dynamic allocation because otherwise we would need to
-	// allocate several megabytes to handle all possible cases)
 	c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
 	c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
+	//Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
 	for(i=0; i<c->vLumBufSize; i++)
 		c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
 	for(i=0; i<c->vChrBufSize; i++)
@@ -1248,6 +1263,10 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
 			fprintf(stderr, "with BGR32 output ");
 		else if(dstFormat==IMGFMT_YV12)
 			fprintf(stderr, "with YV12 output ");
+		else if(dstFormat==IMGFMT_I420)
+			fprintf(stderr, "with I420 output ");
+		else if(dstFormat==IMGFMT_IYUV)
+			fprintf(stderr, "with IYUV output ");
 		else
 			fprintf(stderr, "without output ");

@@ -1295,12 +1314,12 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
 				printf("SwScaler: using C scaler for horizontal scaling\n");
 #endif
 		}
-		if(dstFormat==IMGFMT_YV12)
+		if(isPlanarYUV(dstFormat))
 		{
 			if(c->vLumFilterSize==1)
-				printf("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12)\n", cpuCaps.hasMMX ? "MMX" : "C");
+				printf("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
 			else
-				printf("SwScaler: using n-tap %s scaler for vertical scaling (YV12)\n", cpuCaps.hasMMX ? "MMX" : "C");
+				printf("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
 		}
 		else
 		{
@@ -1561,7 +1580,7 @@ void freeSwsContext(SwsContext *c){

 	if(c->lumPixBuf)
 	{
-		for(i=0; i<c->vLumBufSize*2; i++)
+		for(i=0; i<c->vLumBufSize; i++)
 		{
 			if(c->lumPixBuf[i]) free(c->lumPixBuf[i]);
 			c->lumPixBuf[i]=NULL;
@@ -1572,7 +1591,7 @@ void freeSwsContext(SwsContext *c){

 	if(c->chrPixBuf)
 	{
-		for(i=0; i<c->vChrBufSize*2; i++)
+		for(i=0; i<c->vChrBufSize; i++)
 		{
 			if(c->chrPixBuf[i]) free(c->chrPixBuf[i]);
 			c->chrPixBuf[i]=NULL;

--- a/swscale.h
+++ b/swscale.h
@@ -103,6 +103,7 @@ void SwScale_Init();

 void freeSwsContext(SwsContext *swsContext);

+SwsContext *getSwsContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat);
 SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
 			 SwsFilter *srcFilter, SwsFilter *dstFilter);


--- a/swscale_template.c
+++ b/swscale_template.c
@@ -1974,8 +1974,8 @@ FUNNYUVCODE
   }
 }

-static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
-             int srcSliceH, uint8_t* dst[], int dstStride[]){
+static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], int srcSliceY,
+             int srcSliceH, uint8_t* dstParam[], int dstStride[]){

 	/* load a few things into local vars to make the code more readable? and faster */
 	const int srcW= c->srcW;
@@ -2014,6 +2014,41 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int
 	int chrBufIndex= c->chrBufIndex;
 	int lastInLumBuf= c->lastInLumBuf;
 	int lastInChrBuf= c->lastInChrBuf;
+	uint8_t *src[3];
+	uint8_t *dst[3];
+	
+	if((c->srcFormat == IMGFMT_IYUV) || (c->srcFormat == IMGFMT_I420)){
+		src[0]= srcParam[0];
+		src[1]= srcParam[2];
+		src[2]= srcParam[1];
+		
+	}else{
+		src[0]= srcParam[0];
+		src[1]= srcParam[1];
+		src[2]= srcParam[2];
+	}
+
+	if((c->dstFormat == IMGFMT_IYUV) || (c->dstFormat == IMGFMT_I420)){
+		dst[0]= dstParam[0];
+		dst[1]= dstParam[2];
+		dst[2]= dstParam[1];
+		
+	}else{
+		dst[0]= dstParam[0];
+		dst[1]= dstParam[1];
+		dst[2]= dstParam[2];
+	}
+
+	if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
+	{
+		static int firstTime=1; //FIXME move this into the context perhaps
+		if(flags & SWS_PRINT_INFO && firstTime)
+		{
+			fprintf(stderr, "SwScaler: Warning: dstStride is not aligned!\n"
+					"SwScaler:          ->cannot do aligned memory acesses anymore\n");
+			firstTime=0;
+		}
+	}

 	if(srcSliceY ==0){
 		lumBufIndex=0;
@@ -2027,7 +2062,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int
 		unsigned char *dest =dst[0]+dstStride[0]*dstY;
 		unsigned char *uDest=dst[1]+dstStride[1]*(dstY>>1);
 		unsigned char *vDest=dst[2]+dstStride[2]*(dstY>>1);
-		const int chrDstY= dstFormat==IMGFMT_YV12 ? (dstY>>1) : dstY;
+		const int chrDstY= isHalfChrV(dstFormat) ? (dstY>>1) : dstY;

 		const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
 		const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
@@ -2124,7 +2159,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int
 #endif
 	    if(dstY < dstH-2)
 	    {
-		if(dstFormat==IMGFMT_YV12) //YV12
+		if(isPlanarYUV(dstFormat)) //YV12 like
 		{
 			if(dstY&1) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
 			if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
@@ -2180,7 +2215,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int
 	    {
 		int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
 		int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
-		if(dstFormat==IMGFMT_YV12) //YV12
+		if(isPlanarYUV(dstFormat)) //YV12
 		{
 			if(dstY&1) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
 			yuv2yuvXinC(