Commit e73e16b2 authored by diego's avatar diego

cosmetics: Remove trailing whitespace.


git-svn-id: file:///var/local/repositories/mplayer/trunk/libswscale@23147 b3059339-0415-0410-9bf9-f77b7e298cf2
parent 36629a8f
...@@ -65,12 +65,12 @@ int main(int argc, char **argv) ...@@ -65,12 +65,12 @@ int main(int argc, char **argv)
uint8_t *dstBuffer= (uint8_t*)av_malloc(SIZE); uint8_t *dstBuffer= (uint8_t*)av_malloc(SIZE);
int failedNum=0; int failedNum=0;
int passedNum=0; int passedNum=0;
av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n"); av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
args_parse(argc, argv); args_parse(argc, argv);
av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps); av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
sws_rgb2rgb_init(cpu_caps); sws_rgb2rgb_init(cpu_caps);
for(funcNum=0; ; funcNum++){ for(funcNum=0; ; funcNum++){
struct func_info_s { struct func_info_s {
int src_bpp; int src_bpp;
...@@ -128,7 +128,7 @@ int main(int argc, char **argv) ...@@ -128,7 +128,7 @@ int main(int argc, char **argv)
uint8_t *src= srcBuffer+srcOffset; uint8_t *src= srcBuffer+srcOffset;
uint8_t *dst= dstBuffer+dstOffset; uint8_t *dst= dstBuffer+dstOffset;
char *name=NULL; char *name=NULL;
if(failed) break; //don't fill the screen with shit ... if(failed) break; //don't fill the screen with shit ...
srcBpp = func_info[funcNum].src_bpp; srcBpp = func_info[funcNum].src_bpp;
...@@ -141,7 +141,7 @@ int main(int argc, char **argv) ...@@ -141,7 +141,7 @@ int main(int argc, char **argv)
for(i=0; i<SIZE; i++){ for(i=0; i<SIZE; i++){
if(srcBuffer[i]!=srcByte){ if(srcBuffer[i]!=srcByte){
av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;
break; break;
...@@ -149,7 +149,7 @@ int main(int argc, char **argv) ...@@ -149,7 +149,7 @@ int main(int argc, char **argv)
} }
for(i=0; i<dstOffset; i++){ for(i=0; i<dstOffset; i++){
if(dstBuffer[i]!=dstByte){ if(dstBuffer[i]!=dstByte){
av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;
break; break;
...@@ -157,7 +157,7 @@ int main(int argc, char **argv) ...@@ -157,7 +157,7 @@ int main(int argc, char **argv)
} }
for(i=dstOffset + width*dstBpp; i<SIZE; i++){ for(i=dstOffset + width*dstBpp; i<SIZE; i++){
if(dstBuffer[i]!=dstByte){ if(dstBuffer[i]!=dstByte){
av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;
break; break;
...@@ -169,7 +169,7 @@ int main(int argc, char **argv) ...@@ -169,7 +169,7 @@ int main(int argc, char **argv)
if(failed) failedNum++; if(failed) failedNum++;
else if(srcBpp) passedNum++; else if(srcBpp) passedNum++;
} }
av_log(NULL, AV_LOG_INFO, "\n%d converters passed, %d converters randomly overwrote memory\n", passedNum, failedNum); av_log(NULL, AV_LOG_INFO, "\n%d converters passed, %d converters randomly overwrote memory\n", passedNum, failedNum);
return failedNum; return failedNum;
} }
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with FFmpeg; if not, write to the Free Software * along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
* *
* the C code (not assembly, mmx, ...) of this file can be used * the C code (not assembly, mmx, ...) of this file can be used
* under the LGPL license too * under the LGPL license too
*/ */
...@@ -271,7 +271,7 @@ void palette8tobgr32(const uint8_t *src, uint8_t *dst, long num_pixels, const ui ...@@ -271,7 +271,7 @@ void palette8tobgr32(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
dst[2]= palette[ src[i]*4+2 ]; dst[2]= palette[ src[i]*4+2 ];
//dst[3]= 0; /* do we need this cleansing? */ //dst[3]= 0; /* do we need this cleansing? */
#endif #endif
dst+= 4; dst+= 4;
} }
} }
...@@ -430,7 +430,7 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -430,7 +430,7 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for(i=0; i<num_pixels; i++) for(i=0; i<num_pixels; i++)
{ {
unsigned b,g,r; unsigned b,g,r;
...@@ -447,7 +447,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -447,7 +447,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for(i=0; i<num_pixels; i++) for(i=0; i<num_pixels; i++)
{ {
unsigned b,g,r; unsigned b,g,r;
...@@ -504,7 +504,7 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -504,7 +504,7 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for(i=0; i<num_pixels; i++) for(i=0; i<num_pixels; i++)
{ {
unsigned b,g,r; unsigned b,g,r;
...@@ -521,7 +521,7 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size) ...@@ -521,7 +521,7 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for(i=0; i<num_pixels; i++) for(i=0; i<num_pixels; i++)
{ {
unsigned b,g,r; unsigned b,g,r;
......
...@@ -141,7 +141,7 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint ...@@ -141,7 +141,7 @@ extern void (*yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint
long width, long height, long width, long height,
long srcStride1, long srcStride2, long srcStride1, long srcStride2,
long srcStride3, long dstStride); long srcStride3, long dstStride);
void sws_rgb2rgb_init(int flags); void sws_rgb2rgb_init(int flags);
#endif #endif
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with FFmpeg; if not, write to the Free Software * along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
* *
* the C code (not assembly, mmx, ...) of this file can be used * the C code (not assembly, mmx, ...) of this file can be used
* under the LGPL license too * under the LGPL license too
*/ */
...@@ -372,7 +372,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -372,7 +372,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
"pmaddwd %%mm7, %%mm3 \n\t" "pmaddwd %%mm7, %%mm3 \n\t"
"pand %%mm5, %%mm1 \n\t" "pand %%mm5, %%mm1 \n\t"
"pand %%mm5, %%mm4 \n\t" "pand %%mm5, %%mm4 \n\t"
"por %%mm1, %%mm0 \n\t" "por %%mm1, %%mm0 \n\t"
"por %%mm4, %%mm3 \n\t" "por %%mm4, %%mm3 \n\t"
"psrld $5, %%mm0 \n\t" "psrld $5, %%mm0 \n\t"
"pslld $11, %%mm3 \n\t" "pslld $11, %%mm3 \n\t"
...@@ -531,7 +531,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -531,7 +531,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
"pmaddwd %%mm7, %%mm3 \n\t" "pmaddwd %%mm7, %%mm3 \n\t"
"pand %%mm5, %%mm1 \n\t" "pand %%mm5, %%mm1 \n\t"
"pand %%mm5, %%mm4 \n\t" "pand %%mm5, %%mm4 \n\t"
"por %%mm1, %%mm0 \n\t" "por %%mm1, %%mm0 \n\t"
"por %%mm4, %%mm3 \n\t" "por %%mm4, %%mm3 \n\t"
"psrld $6, %%mm0 \n\t" "psrld $6, %%mm0 \n\t"
"pslld $10, %%mm3 \n\t" "pslld $10, %%mm3 \n\t"
...@@ -978,7 +978,7 @@ static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -978,7 +978,7 @@ static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_
"movq %%mm0, %%mm6\n\t" "movq %%mm0, %%mm6\n\t"
"movq %%mm3, %%mm7\n\t" "movq %%mm3, %%mm7\n\t"
"movq 8%1, %%mm0\n\t" "movq 8%1, %%mm0\n\t"
"movq 8%1, %%mm1\n\t" "movq 8%1, %%mm1\n\t"
"movq 8%1, %%mm2\n\t" "movq 8%1, %%mm2\n\t"
...@@ -1015,7 +1015,7 @@ static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -1015,7 +1015,7 @@ static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_
"movq %%mm3, %%mm5\n\t" "movq %%mm3, %%mm5\n\t"
"movq %%mm6, %%mm0\n\t" "movq %%mm6, %%mm0\n\t"
"movq %%mm7, %%mm1\n\t" "movq %%mm7, %%mm1\n\t"
"movq %%mm4, %%mm6\n\t" "movq %%mm4, %%mm6\n\t"
"movq %%mm5, %%mm7\n\t" "movq %%mm5, %%mm7\n\t"
"movq %%mm0, %%mm2\n\t" "movq %%mm0, %%mm2\n\t"
...@@ -1117,7 +1117,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -1117,7 +1117,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_
"psllq $16, %%mm5\n\t" "psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t" "por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t" "por %%mm5, %%mm3\n\t"
"movq %%mm0, %%mm6\n\t" "movq %%mm0, %%mm6\n\t"
"movq %%mm3, %%mm7\n\t" "movq %%mm3, %%mm7\n\t"
...@@ -1148,7 +1148,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -1148,7 +1148,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_
"por %%mm4, %%mm3\n\t" "por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t" "por %%mm5, %%mm3\n\t"
:"=m"(*d) :"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory"); :"memory");
/* Borrowed 32 to 24 */ /* Borrowed 32 to 24 */
__asm __volatile( __asm __volatile(
...@@ -1156,7 +1156,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_ ...@@ -1156,7 +1156,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_
"movq %%mm3, %%mm5\n\t" "movq %%mm3, %%mm5\n\t"
"movq %%mm6, %%mm0\n\t" "movq %%mm6, %%mm0\n\t"
"movq %%mm7, %%mm1\n\t" "movq %%mm7, %%mm1\n\t"
"movq %%mm4, %%mm6\n\t" "movq %%mm4, %%mm6\n\t"
"movq %%mm5, %%mm7\n\t" "movq %%mm5, %%mm7\n\t"
"movq %%mm0, %%mm2\n\t" "movq %%mm0, %%mm2\n\t"
...@@ -1451,7 +1451,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -1451,7 +1451,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
"pand %%mm6, %%mm1 \n\t" "pand %%mm6, %%mm1 \n\t"
"pand %%mm7, %%mm2 \n\t" "pand %%mm7, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t" "por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t"
"movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG
MOVNTQ" %%mm1, (%2, %%"REG_a")\n\t" // RGB RGB RG MOVNTQ" %%mm1, (%2, %%"REG_a")\n\t" // RGB RGB RG
"movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B
...@@ -1460,7 +1460,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -1460,7 +1460,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
"pand %%mm5, %%mm1 \n\t" "pand %%mm5, %%mm1 \n\t"
"pand %%mm6, %%mm2 \n\t" "pand %%mm6, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t" "por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t"
"movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B
MOVNTQ" %%mm1, 8(%2, %%"REG_a")\n\t" // B RGB RGB R MOVNTQ" %%mm1, 8(%2, %%"REG_a")\n\t" // B RGB RGB R
"movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR
...@@ -1469,7 +1469,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s ...@@ -1469,7 +1469,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
"pand %%mm7, %%mm1 \n\t" "pand %%mm7, %%mm1 \n\t"
"pand %%mm5, %%mm2 \n\t" "pand %%mm5, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t" "por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t"
MOVNTQ" %%mm1, 16(%2, %%"REG_a")\n\t" MOVNTQ" %%mm1, 16(%2, %%"REG_a")\n\t"
"add $24, %%"REG_a" \n\t" "add $24, %%"REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
...@@ -1908,16 +1908,16 @@ static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, ...@@ -1908,16 +1908,16 @@ static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc,
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
{ {
long x,y; long x,y;
dst[0]= src[0]; dst[0]= src[0];
// first line // first line
for(x=0; x<srcWidth-1; x++){ for(x=0; x<srcWidth-1; x++){
dst[2*x+1]= (3*src[x] + src[x+1])>>2; dst[2*x+1]= (3*src[x] + src[x+1])>>2;
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
} }
dst[2*srcWidth-1]= src[srcWidth-1]; dst[2*srcWidth-1]= src[srcWidth-1];
dst+= dstStride; dst+= dstStride;
for(y=1; y<srcHeight; y++){ for(y=1; y<srcHeight; y++){
...@@ -1983,11 +1983,11 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi ...@@ -1983,11 +1983,11 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
dst+=dstStride*2; dst+=dstStride*2;
src+=srcStride; src+=srcStride;
} }
// last line // last line
#if 1 #if 1
dst[0]= src[0]; dst[0]= src[0];
for(x=0; x<srcWidth-1; x++){ for(x=0; x<srcWidth-1; x++){
dst[2*x+1]= (3*src[x] + src[x+1])>>2; dst[2*x+1]= (3*src[x] + src[x+1])>>2;
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
...@@ -2657,7 +2657,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 ...@@ -2657,7 +2657,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
"punpckhbw %%mm1, %%mm3\n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/ "punpckhbw %%mm1, %%mm3\n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
MOVNTQ" %%mm0, (%4, %0, 8)\n\t" MOVNTQ" %%mm0, (%4, %0, 8)\n\t"
MOVNTQ" %%mm3, 8(%4, %0, 8)\n\t" MOVNTQ" %%mm3, 8(%4, %0, 8)\n\t"
"punpckhbw %%mm2, %%mm6\n\t" /* U2V2 U2V2 U3V3 U3V3*/ "punpckhbw %%mm2, %%mm6\n\t" /* U2V2 U2V2 U3V3 U3V3*/
"movq 8(%1, %0, 4), %%mm0\n\t" "movq 8(%1, %0, 4), %%mm0\n\t"
"movq %%mm0, %%mm3\n\t" "movq %%mm0, %%mm3\n\t"
...@@ -2674,7 +2674,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 ...@@ -2674,7 +2674,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
"punpckhbw %%mm4, %%mm3\n\t" /* Y U5 Y V5 Y U5 Y V5*/ "punpckhbw %%mm4, %%mm3\n\t" /* Y U5 Y V5 Y U5 Y V5*/
MOVNTQ" %%mm0, 32(%4, %0, 8)\n\t" MOVNTQ" %%mm0, 32(%4, %0, 8)\n\t"
MOVNTQ" %%mm3, 40(%4, %0, 8)\n\t" MOVNTQ" %%mm3, 40(%4, %0, 8)\n\t"
"punpckhbw %%mm5, %%mm6\n\t" "punpckhbw %%mm5, %%mm6\n\t"
"movq 24(%1, %0, 4), %%mm0\n\t" "movq 24(%1, %0, 4), %%mm0\n\t"
"movq %%mm0, %%mm3\n\t" "movq %%mm0, %%mm3\n\t"
......
...@@ -35,7 +35,7 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i ...@@ -35,7 +35,7 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i
uint64_t ssd=0; uint64_t ssd=0;
//printf("%d %d\n", w, h); //printf("%d %d\n", w, h);
for(y=0; y<h; y++){ for(y=0; y<h; y++){
for(x=0; x<w; x++){ for(x=0; x<w; x++){
int d= src1[x + y*stride1] - src2[x + y*stride2]; int d= src1[x + y*stride1] - src2[x + y*stride2];
...@@ -49,7 +49,7 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i ...@@ -49,7 +49,7 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i
// test by ref -> src -> dst -> out & compare out against ref // test by ref -> src -> dst -> out & compare out against ref
// ref & out are YV12 // ref & out are YV12
static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat, static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat,
int srcW, int srcH, int dstW, int dstH, int flags){ int srcW, int srcH, int dstW, int dstH, int flags){
uint8_t *src[3]; uint8_t *src[3];
uint8_t *dst[3]; uint8_t *dst[3];
...@@ -59,7 +59,7 @@ static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat ...@@ -59,7 +59,7 @@ static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat
uint64_t ssdY, ssdU, ssdV; uint64_t ssdY, ssdU, ssdV;
struct SwsContext *srcContext, *dstContext, *outContext; struct SwsContext *srcContext, *dstContext, *outContext;
int res; int res;
res = 0; res = 0;
for(i=0; i<3; i++){ for(i=0; i<3; i++){
// avoid stride % bpp != 0 // avoid stride % bpp != 0
...@@ -67,12 +67,12 @@ static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat ...@@ -67,12 +67,12 @@ static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat
srcStride[i]= srcW*3; srcStride[i]= srcW*3;
else else
srcStride[i]= srcW*4; srcStride[i]= srcW*4;
if(dstFormat==PIX_FMT_RGB24 || dstFormat==PIX_FMT_BGR24) if(dstFormat==PIX_FMT_RGB24 || dstFormat==PIX_FMT_BGR24)
dstStride[i]= dstW*3; dstStride[i]= dstW*3;
else else
dstStride[i]= dstW*4; dstStride[i]= dstW*4;
src[i]= (uint8_t*) malloc(srcStride[i]*srcH); src[i]= (uint8_t*) malloc(srcStride[i]*srcH);
dst[i]= (uint8_t*) malloc(dstStride[i]*dstH); dst[i]= (uint8_t*) malloc(dstStride[i]*dstH);
out[i]= (uint8_t*) malloc(refStride[i]*h); out[i]= (uint8_t*) malloc(refStride[i]*h);
...@@ -122,27 +122,27 @@ static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat ...@@ -122,27 +122,27 @@ static int doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat
#if defined(ARCH_X86) #if defined(ARCH_X86)
asm volatile ("emms\n\t"); asm volatile ("emms\n\t");
#endif #endif
ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h); ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1); ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1);
ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1); ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1);
if(srcFormat == PIX_FMT_GRAY8 || dstFormat==PIX_FMT_GRAY8) ssdU=ssdV=0; //FIXME check that output is really gray if(srcFormat == PIX_FMT_GRAY8 || dstFormat==PIX_FMT_GRAY8) ssdU=ssdV=0; //FIXME check that output is really gray
ssdY/= w*h; ssdY/= w*h;
ssdU/= w*h/4; ssdU/= w*h/4;
ssdV/= w*h/4; ssdV/= w*h/4;
if(ssdY>100 || ssdU>100 || ssdV>100){ if(ssdY>100 || ssdU>100 || ssdV>100){
printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n", printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n",
sws_format_name(srcFormat), srcW, srcH, sws_format_name(srcFormat), srcW, srcH,
sws_format_name(dstFormat), dstW, dstH, sws_format_name(dstFormat), dstW, dstH,
flags, flags,
ssdY, ssdU, ssdV); ssdY, ssdU, ssdV);
} }
end: end:
sws_freeContext(srcContext); sws_freeContext(srcContext);
sws_freeContext(dstContext); sws_freeContext(dstContext);
sws_freeContext(outContext); sws_freeContext(outContext);
...@@ -170,14 +170,14 @@ static void selfTest(uint8_t *src[3], int stride[3], int w, int h){ ...@@ -170,14 +170,14 @@ static void selfTest(uint8_t *src[3], int stride[3], int w, int h){
printf("%s -> %s\n", printf("%s -> %s\n",
sws_format_name(srcFormat), sws_format_name(srcFormat),
sws_format_name(dstFormat)); sws_format_name(dstFormat));
srcW= w; srcW= w;
srcH= h; srcH= h;
for(dstW=w - w/3; dstW<= 4*w/3; dstW+= w/3){ for(dstW=w - w/3; dstW<= 4*w/3; dstW+= w/3){
for(dstH=h - h/3; dstH<= 4*h/3; dstH+= h/3){ for(dstH=h - h/3; dstH<= 4*h/3; dstH+= h/3){
for(flags=1; flags<33; flags*=2) { for(flags=1; flags<33; flags*=2) {
int res; int res;
res = doTest(src, stride, w, h, srcFormat, dstFormat, res = doTest(src, stride, w, h, srcFormat, dstFormat,
srcW, srcH, dstW, dstH, flags); srcW, srcH, dstW, dstH, flags);
if (res < 0) { if (res < 0) {
...@@ -206,7 +206,7 @@ int main(int argc, char **argv){ ...@@ -206,7 +206,7 @@ int main(int argc, char **argv){
struct SwsContext *sws; struct SwsContext *sws;
sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUV420P, 2, NULL, NULL, NULL); sws= sws_getContext(W/12, H/12, PIX_FMT_RGB32, W, H, PIX_FMT_YUV420P, 2, NULL, NULL, NULL);
for(y=0; y<H; y++){ for(y=0; y<H; y++){
for(x=0; x<W*4; x++){ for(x=0; x<W*4; x++){
rgb_data[ x + y*4*W]= random(); rgb_data[ x + y*4*W]= random();
......
This diff is collapsed.
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
/** /**
* @file swscale.h * @file swscale.h
* @brief * @brief
* external api for the swscale stuff * external api for the swscale stuff
*/ */
...@@ -130,7 +130,7 @@ SwsVector *sws_cloneVec(SwsVector *a); ...@@ -130,7 +130,7 @@ SwsVector *sws_cloneVec(SwsVector *a);
void sws_printVec(SwsVector *a); void sws_printVec(SwsVector *a);
void sws_freeVec(SwsVector *a); void sws_freeVec(SwsVector *a);
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
float lumaSarpen, float chromaSharpen, float lumaSarpen, float chromaSharpen,
float chromaHShift, float chromaVShift, float chromaHShift, float chromaVShift,
int verbose); int verbose);
......
...@@ -56,7 +56,7 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) { ...@@ -56,7 +56,7 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) {
vector signed int v23 = vec_perm(v2,v3,perm1); vector signed int v23 = vec_perm(v2,v3,perm1);
vector signed int v34 = vec_perm(v3,v4,perm1); vector signed int v34 = vec_perm(v3,v4,perm1);
vector signed int v45 = vec_perm(v4,v5,perm1); vector signed int v45 = vec_perm(v4,v5,perm1);
vector signed int vA = vec_sra(v12, altivec_vectorShiftInt19); vector signed int vA = vec_sra(v12, altivec_vectorShiftInt19);
vector signed int vB = vec_sra(v23, altivec_vectorShiftInt19); vector signed int vB = vec_sra(v23, altivec_vectorShiftInt19);
vector signed int vC = vec_sra(v34, altivec_vectorShiftInt19); vector signed int vC = vec_sra(v34, altivec_vectorShiftInt19);
...@@ -99,44 +99,44 @@ yuv2yuvX_altivec_real(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, ...@@ -99,44 +99,44 @@ yuv2yuvX_altivec_real(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
register int i, j; register int i, j;
{ {
int __attribute__ ((aligned (16))) val[dstW]; int __attribute__ ((aligned (16))) val[dstW];
for (i = 0; i < (dstW -7); i+=4) { for (i = 0; i < (dstW -7); i+=4) {
vec_st(vini, i << 2, val); vec_st(vini, i << 2, val);
} }
for (; i < dstW; i++) { for (; i < dstW; i++) {
val[i] = (1 << 18); val[i] = (1 << 18);
} }
for (j = 0; j < lumFilterSize; j++) { for (j = 0; j < lumFilterSize; j++) {
vector signed short l1, vLumFilter = vec_ld(j << 1, lumFilter); vector signed short l1, vLumFilter = vec_ld(j << 1, lumFilter);
vector unsigned char perm, perm0 = vec_lvsl(j << 1, lumFilter); vector unsigned char perm, perm0 = vec_lvsl(j << 1, lumFilter);
vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0); vLumFilter = vec_perm(vLumFilter, vLumFilter, perm0);
vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 times in vLumFilter
perm = vec_lvsl(0, lumSrc[j]); perm = vec_lvsl(0, lumSrc[j]);
l1 = vec_ld(0, lumSrc[j]); l1 = vec_ld(0, lumSrc[j]);
for (i = 0; i < (dstW - 7); i+=8) { for (i = 0; i < (dstW - 7); i+=8) {
int offset = i << 2; int offset = i << 2;
vector signed short l2 = vec_ld((i << 1) + 16, lumSrc[j]); vector signed short l2 = vec_ld((i << 1) + 16, lumSrc[j]);
vector signed int v1 = vec_ld(offset, val); vector signed int v1 = vec_ld(offset, val);
vector signed int v2 = vec_ld(offset + 16, val); vector signed int v2 = vec_ld(offset + 16, val);
vector signed short ls = vec_perm(l1, l2, perm); // lumSrc[j][i] ... lumSrc[j][i+7] vector signed short ls = vec_perm(l1, l2, perm); // lumSrc[j][i] ... lumSrc[j][i+7]
vector signed int i1 = vec_mule(vLumFilter, ls); vector signed int i1 = vec_mule(vLumFilter, ls);
vector signed int i2 = vec_mulo(vLumFilter, ls); vector signed int i2 = vec_mulo(vLumFilter, ls);
vector signed int vf1 = vec_mergeh(i1, i2); vector signed int vf1 = vec_mergeh(i1, i2);
vector signed int vf2 = vec_mergel(i1, i2); // lumSrc[j][i] * lumFilter[j] ... lumSrc[j][i+7] * lumFilter[j] vector signed int vf2 = vec_mergel(i1, i2); // lumSrc[j][i] * lumFilter[j] ... lumSrc[j][i+7] * lumFilter[j]
vector signed int vo1 = vec_add(v1, vf1); vector signed int vo1 = vec_add(v1, vf1);
vector signed int vo2 = vec_add(v2, vf2); vector signed int vo2 = vec_add(v2, vf2);
vec_st(vo1, offset, val); vec_st(vo1, offset, val);
vec_st(vo2, offset + 16, val); vec_st(vo2, offset + 16, val);
l1 = l2; l1 = l2;
} }
for ( ; i < dstW; i++) { for ( ; i < dstW; i++) {
...@@ -157,57 +157,57 @@ yuv2yuvX_altivec_real(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, ...@@ -157,57 +157,57 @@ yuv2yuvX_altivec_real(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
u[i] = (1 << 18); u[i] = (1 << 18);
v[i] = (1 << 18); v[i] = (1 << 18);
} }
for (j = 0; j < chrFilterSize; j++) { for (j = 0; j < chrFilterSize; j++) {
vector signed short l1, l1_V, vChrFilter = vec_ld(j << 1, chrFilter); vector signed short l1, l1_V, vChrFilter = vec_ld(j << 1, chrFilter);
vector unsigned char perm, perm0 = vec_lvsl(j << 1, chrFilter); vector unsigned char perm, perm0 = vec_lvsl(j << 1, chrFilter);
vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0); vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0);
vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter
perm = vec_lvsl(0, chrSrc[j]); perm = vec_lvsl(0, chrSrc[j]);
l1 = vec_ld(0, chrSrc[j]); l1 = vec_ld(0, chrSrc[j]);
l1_V = vec_ld(2048 << 1, chrSrc[j]); l1_V = vec_ld(2048 << 1, chrSrc[j]);
for (i = 0; i < (chrDstW - 7); i+=8) { for (i = 0; i < (chrDstW - 7); i+=8) {
int offset = i << 2; int offset = i << 2;
vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]); vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]);
vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, chrSrc[j]); vector signed short l2_V = vec_ld(((i + 2048) << 1) + 16, chrSrc[j]);
vector signed int v1 = vec_ld(offset, u); vector signed int v1 = vec_ld(offset, u);
vector signed int v2 = vec_ld(offset + 16, u); vector signed int v2 = vec_ld(offset + 16, u);
vector signed int v1_V = vec_ld(offset, v); vector signed int v1_V = vec_ld(offset, v);
vector signed int v2_V = vec_ld(offset + 16, v); vector signed int v2_V = vec_ld(offset + 16, v);
vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7] vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7]
vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+2048] ... chrSrc[j][i+2055] vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+2048] ... chrSrc[j][i+2055]
vector signed int i1 = vec_mule(vChrFilter, ls); vector signed int i1 = vec_mule(vChrFilter, ls);
vector signed int i2 = vec_mulo(vChrFilter, ls); vector signed int i2 = vec_mulo(vChrFilter, ls);
vector signed int i1_V = vec_mule(vChrFilter, ls_V); vector signed int i1_V = vec_mule(vChrFilter, ls_V);
vector signed int i2_V = vec_mulo(vChrFilter, ls_V); vector signed int i2_V = vec_mulo(vChrFilter, ls_V);
vector signed int vf1 = vec_mergeh(i1, i2); vector signed int vf1 = vec_mergeh(i1, i2);
vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
vector signed int vf1_V = vec_mergeh(i1_V, i2_V); vector signed int vf1_V = vec_mergeh(i1_V, i2_V);
vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j]
vector signed int vo1 = vec_add(v1, vf1); vector signed int vo1 = vec_add(v1, vf1);
vector signed int vo2 = vec_add(v2, vf2); vector signed int vo2 = vec_add(v2, vf2);
vector signed int vo1_V = vec_add(v1_V, vf1_V); vector signed int vo1_V = vec_add(v1_V, vf1_V);
vector signed int vo2_V = vec_add(v2_V, vf2_V); vector signed int vo2_V = vec_add(v2_V, vf2_V);
vec_st(vo1, offset, u); vec_st(vo1, offset, u);
vec_st(vo2, offset + 16, u); vec_st(vo2, offset + 16, u);
vec_st(vo1_V, offset, v); vec_st(vo1_V, offset, v);
vec_st(vo2_V, offset + 16, v); vec_st(vo2_V, offset + 16, v);
l1 = l2; l1 = l2;
l1_V = l2_V; l1_V = l2_V;
} }
for ( ; i < chrDstW; i++) { for ( ; i < chrDstW; i++) {
u[i] += chrSrc[j][i] * chrFilter[j]; u[i] += chrSrc[j][i] * chrFilter[j];
v[i] += chrSrc[j][i + 2048] * chrFilter[j]; v[i] += chrSrc[j][i + 2048] * chrFilter[j];
} }
} }
altivec_packIntArrayToCharArray(u,uDest,chrDstW); altivec_packIntArrayToCharArray(u,uDest,chrDstW);
altivec_packIntArrayToCharArray(v,vDest,chrDstW); altivec_packIntArrayToCharArray(v,vDest,chrDstW);
...@@ -325,7 +325,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int ...@@ -325,7 +325,7 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
} }
} }
break; break;
default: default:
{ {
for(i=0; i<dstW; i++) { for(i=0; i<dstW; i++) {
...@@ -342,17 +342,17 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int ...@@ -342,17 +342,17 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
for (j = 0 ; j < filterSize - 15; j += 16) { for (j = 0 ; j < filterSize - 15; j += 16) {
vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src); vector unsigned char src_v1 = vec_ld(srcPos + j + 16, src);
vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS); vector unsigned char src_vF = vec_perm(src_v0, src_v1, permS);
vector signed short src_vA = // vec_unpackh sign-extends... vector signed short src_vA = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF)); (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
vector signed short src_vB = // vec_unpackh sign-extends... vector signed short src_vB = // vec_unpackh sign-extends...
(vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF)); (vector signed short)(vec_mergel((vector unsigned char)vzero, src_vF));
vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter); vector signed short filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter); vector signed short filter_v2R = vec_ld((i * 2 * filterSize) + (j * 2) + 32, filter);
vector signed short filter_v0 = vec_perm(filter_v0R, filter_v1R, permF); vector signed short filter_v0 = vec_perm(filter_v0R, filter_v1R, permF);
vector signed short filter_v1 = vec_perm(filter_v1R, filter_v2R, permF); vector signed short filter_v1 = vec_perm(filter_v1R, filter_v2R, permF);
vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v); vector signed int val_acc = vec_msums(src_vA, filter_v0, val_v);
val_v = vec_msums(src_vB, filter_v1, val_acc); val_v = vec_msums(src_vB, filter_v1, val_acc);
...@@ -369,23 +369,23 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int ...@@ -369,23 +369,23 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
src_v1 = vec_ld(srcPos + j + 16, src); src_v1 = vec_ld(srcPos + j + 16, src);
} }
src_vF = vec_perm(src_v0, src_v1, permS); src_vF = vec_perm(src_v0, src_v1, permS);
src_v = // vec_unpackh sign-extends... src_v = // vec_unpackh sign-extends...
(vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF)); (vector signed short)(vec_mergeh((vector unsigned char)vzero, src_vF));
// loading filter_v0R is useless, it's already done above // loading filter_v0R is useless, it's already done above
//vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter); //vector signed short filter_v0R = vec_ld((i * 2 * filterSize) + j, filter);
filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter); filter_v1R = vec_ld((i * 2 * filterSize) + (j * 2) + 16, filter);
filter_v = vec_perm(filter_v0R, filter_v1R, permF); filter_v = vec_perm(filter_v0R, filter_v1R, permF);
val_v = vec_msums(src_v, filter_v, val_v); val_v = vec_msums(src_v, filter_v, val_v);
} }
val_s = vec_sums(val_v, vzero); val_s = vec_sums(val_v, vzero);
vec_st(val_s, 0, tempo); vec_st(val_s, 0, tempo);
dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1); dst[i] = av_clip(tempo[3]>>7, 0, (1<<15)-1);
} }
} }
} }
} }
...@@ -403,7 +403,7 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int ...@@ -403,7 +403,7 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int
const int chromStride = srcStride[1]; const int chromStride = srcStride[1];
const int dstStride = dstStride_a[0]; const int dstStride = dstStride_a[0];
const vector unsigned char yperm = vec_lvsl(0, ysrc); const vector unsigned char yperm = vec_lvsl(0, ysrc);
const int vertLumPerChroma = 2; const int vertLumPerChroma = 2;
register unsigned int y; register unsigned int y;
if(width&15){ if(width&15){
...@@ -418,7 +418,7 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int ...@@ -418,7 +418,7 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int
3) width is a multiple of 16 3) width is a multiple of 16
4) lum&chrom stride are multiple of 8 4) lum&chrom stride are multiple of 8
*/ */
for(y=0; y<height; y++) for(y=0; y<height; y++)
{ {
int i; int i;
...@@ -465,7 +465,7 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int ...@@ -465,7 +465,7 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int
ysrc += lumStride; ysrc += lumStride;
dst += dstStride; dst += dstStride;
} }
return srcSliceH; return srcSliceH;
} }
...@@ -497,7 +497,7 @@ static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int ...@@ -497,7 +497,7 @@ static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int
3) width is a multiple of 16 3) width is a multiple of 16
4) lum&chrom stride are multiple of 8 4) lum&chrom stride are multiple of 8
*/ */
for(y=0; y<height; y++) for(y=0; y<height; y++)
{ {
int i; int i;
......
...@@ -457,9 +457,9 @@ ...@@ -457,9 +457,9 @@
"psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) #define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c)
#define REAL_YSCALEYUV2RGB(index, c) \ #define REAL_YSCALEYUV2RGB(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ASMALIGN(4)\
...@@ -525,7 +525,7 @@ ...@@ -525,7 +525,7 @@
"packuswb %%mm3, %%mm4 \n\t"\ "packuswb %%mm3, %%mm4 \n\t"\
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
#define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c) #define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c)
#define REAL_YSCALEYUV2PACKED1(index, c) \ #define REAL_YSCALEYUV2PACKED1(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ASMALIGN(4)\
...@@ -538,9 +538,9 @@ ...@@ -538,9 +538,9 @@
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
"psraw $7, %%mm1 \n\t" \ "psraw $7, %%mm1 \n\t" \
"psraw $7, %%mm7 \n\t" \ "psraw $7, %%mm7 \n\t" \
#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) #define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c)
#define REAL_YSCALEYUV2RGB1(index, c) \ #define REAL_YSCALEYUV2RGB1(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
ASMALIGN(4)\ ASMALIGN(4)\
...@@ -605,9 +605,9 @@ ...@@ -605,9 +605,9 @@
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\
"psraw $7, %%mm1 \n\t" \ "psraw $7, %%mm1 \n\t" \
"psraw $7, %%mm7 \n\t" "psraw $7, %%mm7 \n\t"
#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) #define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c)
// do vertical chrominance interpolation // do vertical chrominance interpolation
#define REAL_YSCALEYUV2RGB1b(index, c) \ #define REAL_YSCALEYUV2RGB1b(index, c) \
"xor "#index", "#index" \n\t"\ "xor "#index", "#index" \n\t"\
...@@ -1001,7 +1001,7 @@ static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, ...@@ -1001,7 +1001,7 @@ static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
for(i=0; i<dstW; i++) for(i=0; i<dstW; i++)
{ {
int val= lumSrc[i]>>7; int val= lumSrc[i]>>7;
if(val&256){ if(val&256){
if(val<0) val=0; if(val<0) val=0;
else val=255; else val=255;
...@@ -1056,7 +1056,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ ...@@ -1056,7 +1056,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_
WRITEBGR24(%%REGc, %5, %%REGa) WRITEBGR24(%%REGc, %5, %%REGa)
:: "r" (&c->redDither), :: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW) "r" (dest), "m" (dstW)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
...@@ -1116,7 +1116,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_ ...@@ -1116,7 +1116,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_
"add %4, %%"REG_c" \n\t" "add %4, %%"REG_c" \n\t"
WRITEBGR24(%%REGc, %5, %%REGa) WRITEBGR24(%%REGc, %5, %%REGa)
:: "r" (&c->redDither), :: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW) "r" (dest), "m" (dstW)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
...@@ -1503,7 +1503,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t * ...@@ -1503,7 +1503,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *
{ {
const int yalpha1=0; const int yalpha1=0;
int i; int i;
uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
const int yalpha= 4096; //FIXME ... const int yalpha= 4096; //FIXME ...
...@@ -1980,7 +1980,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 ...@@ -1980,7 +1980,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
#endif #endif
"movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
"movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
"pmaddwd %%mm0, %%mm1 \n\t" "pmaddwd %%mm0, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm0 \n\t" "pmaddwd %%mm6, %%mm0 \n\t"
...@@ -2026,7 +2026,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 ...@@ -2026,7 +2026,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
#endif #endif
"movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t"
"movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t"
"pmaddwd %%mm4, %%mm1 \n\t" "pmaddwd %%mm4, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t" "pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm4 \n\t" "pmaddwd %%mm6, %%mm4 \n\t"
...@@ -2044,7 +2044,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 ...@@ -2044,7 +2044,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
"add $24, %%"REG_d" \n\t" "add $24, %%"REG_d" \n\t"
"packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
"psraw $7, %%mm4 \n\t" "psraw $7, %%mm4 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"punpckldq %%mm4, %%mm0 \n\t" "punpckldq %%mm4, %%mm0 \n\t"
"punpckhdq %%mm4, %%mm1 \n\t" "punpckhdq %%mm4, %%mm1 \n\t"
...@@ -2095,7 +2095,7 @@ static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 ...@@ -2095,7 +2095,7 @@ static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
for(i=0; i<width; i++) for(i=0; i<width; i++)
{ {
int d0= ((uint32_t*)src1)[i]; int d0= ((uint32_t*)src1)[i];
int dl= (d0&0x07E0F81F); int dl= (d0&0x07E0F81F);
int dh= ((d0>>5)&0x07C0F83F); int dh= ((d0>>5)&0x07C0F83F);
...@@ -2131,7 +2131,7 @@ static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 ...@@ -2131,7 +2131,7 @@ static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
for(i=0; i<width; i++) for(i=0; i<width; i++)
{ {
int d0= ((uint32_t*)src1)[i]; int d0= ((uint32_t*)src1)[i];
int dl= (d0&0x03E07C1F); int dl= (d0&0x03E07C1F);
int dh= ((d0>>5)&0x03E0F81F); int dh= ((d0>>5)&0x03E0F81F);
...@@ -2228,7 +2228,7 @@ static inline void RENAME(rgb16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 ...@@ -2228,7 +2228,7 @@ static inline void RENAME(rgb16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
for(i=0; i<width; i++) for(i=0; i<width; i++)
{ {
int d0= ((uint32_t*)src1)[i]; int d0= ((uint32_t*)src1)[i];
int dl= (d0&0x07E0F81F); int dl= (d0&0x07E0F81F);
int dh= ((d0>>5)&0x07C0F83F); int dh= ((d0>>5)&0x07C0F83F);
...@@ -2264,7 +2264,7 @@ static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1 ...@@ -2264,7 +2264,7 @@ static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
for(i=0; i<width; i++) for(i=0; i<width; i++)
{ {
int d0= ((uint32_t*)src1)[i]; int d0= ((uint32_t*)src1)[i];
int dl= (d0&0x03E07C1F); int dl= (d0&0x03E07C1F);
int dh= ((d0>>5)&0x03E0F81F); int dh= ((d0>>5)&0x03E0F81F);
...@@ -2392,7 +2392,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW ...@@ -2392,7 +2392,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
"pmaddwd %%mm2, %%mm5 \n\t" "pmaddwd %%mm2, %%mm5 \n\t"
"paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm0 \n\t"
"paddd %%mm5, %%mm3 \n\t" "paddd %%mm5, %%mm3 \n\t"
"psrad $8, %%mm0 \n\t" "psrad $8, %%mm0 \n\t"
"psrad $8, %%mm3 \n\t" "psrad $8, %%mm3 \n\t"
"packssdw %%mm3, %%mm0 \n\t" "packssdw %%mm3, %%mm0 \n\t"
...@@ -2489,7 +2489,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW ...@@ -2489,7 +2489,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
// *** horizontal scale Y line to temp buffer // *** horizontal scale Y line to temp buffer
static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc, static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, int srcW, int xInc,
int flags, int canMMX2BeUsed, int16_t *hLumFilter, int flags, int canMMX2BeUsed, int16_t *hLumFilter,
int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
int32_t *mmx2FilterPos, uint8_t *pal) int32_t *mmx2FilterPos, uint8_t *pal)
{ {
...@@ -2862,7 +2862,7 @@ FUNNY_UV_CODE ...@@ -2862,7 +2862,7 @@ FUNNY_UV_CODE
{ {
#endif #endif
long xInc_shr16 = (long) (xInc >> 16); long xInc_shr16 = (long) (xInc >> 16);
uint16_t xInc_mask = xInc & 0xffff; uint16_t xInc_mask = xInc & 0xffff;
asm volatile( asm volatile(
"xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"REG_a", %%"REG_a" \n\t" // i
"xor %%"REG_d", %%"REG_d" \n\t" // xx "xor %%"REG_d", %%"REG_d" \n\t" // xx
...@@ -2975,7 +2975,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s ...@@ -2975,7 +2975,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
int chrBufIndex= c->chrBufIndex; int chrBufIndex= c->chrBufIndex;
int lastInLumBuf= c->lastInLumBuf; int lastInLumBuf= c->lastInLumBuf;
int lastInChrBuf= c->lastInChrBuf; int lastInChrBuf= c->lastInChrBuf;
if(isPacked(c->srcFormat)){ if(isPacked(c->srcFormat)){
pal= src[1]; pal= src[1];
src[0]= src[0]=
...@@ -3020,7 +3020,7 @@ i--; ...@@ -3020,7 +3020,7 @@ i--;
if(srcSliceY ==0){ if(srcSliceY ==0){
lumBufIndex=0; lumBufIndex=0;
chrBufIndex=0; chrBufIndex=0;
dstY=0; dstY=0;
lastInLumBuf= -1; lastInLumBuf= -1;
lastInChrBuf= -1; lastInChrBuf= -1;
} }
...@@ -3062,7 +3062,7 @@ i--; ...@@ -3062,7 +3062,7 @@ i--;
// printf("%d %d\n", lumBufIndex, vLumBufSize); // printf("%d %d\n", lumBufIndex, vLumBufSize);
RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
funnyYCode, c->srcFormat, formatConvBuffer, funnyYCode, c->srcFormat, formatConvBuffer,
c->lumMmx2Filter, c->lumMmx2FilterPos, pal); c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
lastInLumBuf++; lastInLumBuf++;
} }
...@@ -3079,7 +3079,7 @@ i--; ...@@ -3079,7 +3079,7 @@ i--;
if(!(isGray(srcFormat) || isGray(dstFormat))) if(!(isGray(srcFormat) || isGray(dstFormat)))
RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
funnyUVCode, c->srcFormat, formatConvBuffer, funnyUVCode, c->srcFormat, formatConvBuffer,
c->chrMmx2Filter, c->chrMmx2FilterPos, pal); c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
lastInChrBuf++; lastInChrBuf++;
} }
...@@ -3104,7 +3104,7 @@ i--; ...@@ -3104,7 +3104,7 @@ i--;
ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
funnyYCode, c->srcFormat, formatConvBuffer, funnyYCode, c->srcFormat, formatConvBuffer,
c->lumMmx2Filter, c->lumMmx2FilterPos, pal); c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
lastInLumBuf++; lastInLumBuf++;
} }
...@@ -3120,7 +3120,7 @@ i--; ...@@ -3120,7 +3120,7 @@ i--;
if(!(isGray(srcFormat) || isGray(dstFormat))) if(!(isGray(srcFormat) || isGray(dstFormat)))
RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
funnyUVCode, c->srcFormat, formatConvBuffer, funnyUVCode, c->srcFormat, formatConvBuffer,
c->chrMmx2Filter, c->chrMmx2FilterPos, pal); c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
lastInChrBuf++; lastInChrBuf++;
} }
...@@ -3162,16 +3162,16 @@ i--; ...@@ -3162,16 +3162,16 @@ i--;
{ {
lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32; lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
lumMmxFilter[4*i+2]= lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]= lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
} }
for(i=0; i<vChrFilterSize; i++) for(i=0; i<vChrFilterSize; i++)
{ {
chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32; chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
chrMmxFilter[4*i+2]= chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]= chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
} }
} }
...@@ -3257,7 +3257,7 @@ i--; ...@@ -3257,7 +3257,7 @@ i--;
{ {
ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
yuv2packedXinC(c, yuv2packedXinC(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, dstW, dstY); dest, dstW, dstY);
......
...@@ -427,15 +427,15 @@ PROLOG(yuv2rgb_c_4, uint8_t) ...@@ -427,15 +427,15 @@ PROLOG(yuv2rgb_c_4, uint8_t)
acc = r[Y] + g[Y] + b[Y]; \ acc = r[Y] + g[Y] + b[Y]; \
Y = py_1[2*i+1]; \ Y = py_1[2*i+1]; \
acc |= (r[Y] + g[Y] + b[Y])<<4;\ acc |= (r[Y] + g[Y] + b[Y])<<4;\
dst_1[i] = acc; dst_1[i] = acc;
#define DST2_4(i) \ #define DST2_4(i) \
Y = py_2[2*i]; \ Y = py_2[2*i]; \
acc = r[Y] + g[Y] + b[Y]; \ acc = r[Y] + g[Y] + b[Y]; \
Y = py_2[2*i+1]; \ Y = py_2[2*i+1]; \
acc |= (r[Y] + g[Y] + b[Y])<<4;\ acc |= (r[Y] + g[Y] + b[Y])<<4;\
dst_2[i] = acc; dst_2[i] = acc;
RGB(0); RGB(0);
DST1_4(0); DST1_4(0);
DST2_4(0); DST2_4(0);
...@@ -572,7 +572,7 @@ PROLOG(yuv2rgb_c_1_ordered_dither, uint8_t) ...@@ -572,7 +572,7 @@ PROLOG(yuv2rgb_c_1_ordered_dither, uint8_t)
DST2bpp1(3,6); DST2bpp1(3,6);
DST1bpp1(3,6); DST1bpp1(3,6);
dst_1[0]= out_1; dst_1[0]= out_1;
dst_2[0]= out_2; dst_2[0]= out_2;
EPILOG(1) EPILOG(1)
...@@ -644,7 +644,7 @@ static int div_round (int dividend, int divisor) ...@@ -644,7 +644,7 @@ static int div_round (int dividend, int divisor)
} }
int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation) int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation)
{ {
const int isRgb = isBGR(c->dstFormat); const int isRgb = isBGR(c->dstFormat);
const int bpp = fmt_depth(c->dstFormat); const int bpp = fmt_depth(c->dstFormat);
int i; int i;
...@@ -676,7 +676,7 @@ int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange, ...@@ -676,7 +676,7 @@ int yuv2rgb_c_init_tables (SwsContext *c, const int inv_table[4], int fullRange,
cgu= (cgu*224) / 255; cgu= (cgu*224) / 255;
cgv= (cgv*224) / 255; cgv= (cgv*224) / 255;
} }
cy = (cy *contrast )>>16; cy = (cy *contrast )>>16;
crv= (crv*contrast * saturation)>>32; crv= (crv*contrast * saturation)>>32;
cbu= (cbu*contrast * saturation)>>32; cbu= (cbu*contrast * saturation)>>32;
......
...@@ -59,7 +59,7 @@ ...@@ -59,7 +59,7 @@
NOTE quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor NOTE quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor
Integrated luma prescaling adjustment for saturation/contrast/brightness adjustment. Integrated luma prescaling adjustment for saturation/contrast/brightness adjustment.
*/ */
/* /*
...@@ -443,105 +443,105 @@ DEFCSP420_CVT (yuv2_abgr, out_abgr) ...@@ -443,105 +443,105 @@ DEFCSP420_CVT (yuv2_abgr, out_abgr)
#if 1 #if 1
DEFCSP420_CVT (yuv2_bgra, out_bgra) DEFCSP420_CVT (yuv2_bgra, out_bgra)
#else #else
static int altivec_yuv2_bgra32 (SwsContext *c, static int altivec_yuv2_bgra32 (SwsContext *c,
unsigned char **in, int *instrides, unsigned char **in, int *instrides,
int srcSliceY, int srcSliceH, int srcSliceY, int srcSliceH,
unsigned char **oplanes, int *outstrides) unsigned char **oplanes, int *outstrides)
{ {
int w = c->srcW; int w = c->srcW;
int h = srcSliceH; int h = srcSliceH;
int i,j; int i,j;
int instrides_scl[3]; int instrides_scl[3];
vector unsigned char y0,y1; vector unsigned char y0,y1;
vector signed char u,v; vector signed char u,v;
vector signed short Y0,Y1,Y2,Y3; vector signed short Y0,Y1,Y2,Y3;
vector signed short U,V; vector signed short U,V;
vector signed short vx,ux,uvx; vector signed short vx,ux,uvx;
vector signed short vx0,ux0,uvx0; vector signed short vx0,ux0,uvx0;
vector signed short vx1,ux1,uvx1; vector signed short vx1,ux1,uvx1;
vector signed short R0,G0,B0; vector signed short R0,G0,B0;
vector signed short R1,G1,B1; vector signed short R1,G1,B1;
vector unsigned char R,G,B; vector unsigned char R,G,B;
vector unsigned char *uivP, *vivP; vector unsigned char *uivP, *vivP;
vector unsigned char align_perm; vector unsigned char align_perm;
vector signed short vector signed short
lCY = c->CY, lCY = c->CY,
lOY = c->OY, lOY = c->OY,
lCRV = c->CRV, lCRV = c->CRV,
lCBU = c->CBU, lCBU = c->CBU,
lCGU = c->CGU, lCGU = c->CGU,
lCGV = c->CGV; lCGV = c->CGV;
vector unsigned short lCSHIFT = c->CSHIFT; vector unsigned short lCSHIFT = c->CSHIFT;
ubyte *y1i = in[0]; ubyte *y1i = in[0];
ubyte *y2i = in[0]+w; ubyte *y2i = in[0]+w;
ubyte *ui = in[1]; ubyte *ui = in[1];
ubyte *vi = in[2]; ubyte *vi = in[2];
vector unsigned char *oute vector unsigned char *oute
= (vector unsigned char *) = (vector unsigned char *)
(oplanes[0]+srcSliceY*outstrides[0]); (oplanes[0]+srcSliceY*outstrides[0]);
vector unsigned char *outo vector unsigned char *outo
= (vector unsigned char *) = (vector unsigned char *)
(oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
instrides_scl[0] = instrides[0]; instrides_scl[0] = instrides[0];
instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */ instrides_scl[1] = instrides[1]-w/2; /* the loop moves ui by w/2 */
instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */ instrides_scl[2] = instrides[2]-w/2; /* the loop moves vi by w/2 */
for (i=0;i<h/2;i++) { for (i=0;i<h/2;i++) {
vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
for (j=0;j<w/16;j++) { for (j=0;j<w/16;j++) {
y0 = vec_ldl (0,y1i); y0 = vec_ldl (0,y1i);
y1 = vec_ldl (0,y2i); y1 = vec_ldl (0,y2i);
uivP = (vector unsigned char *)ui; uivP = (vector unsigned char *)ui;
vivP = (vector unsigned char *)vi; vivP = (vector unsigned char *)vi;
align_perm = vec_lvsl (0, ui); align_perm = vec_lvsl (0, ui);
u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm); u = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
align_perm = vec_lvsl (0, vi); align_perm = vec_lvsl (0, vi);
v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm); v = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
u = (vector signed char) u = (vector signed char)
vec_sub (u,(vector signed char) vec_sub (u,(vector signed char)
vec_splat((vector signed char)AVV(128),0)); vec_splat((vector signed char)AVV(128),0));
v = (vector signed char) v = (vector signed char)
vec_sub (v, (vector signed char) vec_sub (v, (vector signed char)
vec_splat((vector signed char)AVV(128),0)); vec_splat((vector signed char)AVV(128),0));
U = vec_unpackh (u); U = vec_unpackh (u);
V = vec_unpackh (v); V = vec_unpackh (v);
Y0 = vec_unh (y0); Y0 = vec_unh (y0);
Y1 = vec_unl (y0); Y1 = vec_unl (y0);
Y2 = vec_unh (y1); Y2 = vec_unh (y1);
Y3 = vec_unl (y1); Y3 = vec_unl (y1);
Y0 = vec_mradds (Y0, lCY, lOY); Y0 = vec_mradds (Y0, lCY, lOY);
Y1 = vec_mradds (Y1, lCY, lOY); Y1 = vec_mradds (Y1, lCY, lOY);
Y2 = vec_mradds (Y2, lCY, lOY); Y2 = vec_mradds (Y2, lCY, lOY);
Y3 = vec_mradds (Y3, lCY, lOY); Y3 = vec_mradds (Y3, lCY, lOY);
/* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */ /* ux = (CBU*(u<<CSHIFT)+0x4000)>>15 */
ux = vec_sl (U, lCSHIFT); ux = vec_sl (U, lCSHIFT);
ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0)); ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0));
ux0 = vec_mergeh (ux,ux); ux0 = vec_mergeh (ux,ux);
ux1 = vec_mergel (ux,ux); ux1 = vec_mergel (ux,ux);
/* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */ /* vx = (CRV*(v<<CSHIFT)+0x4000)>>15; */
vx = vec_sl (V, lCSHIFT); vx = vec_sl (V, lCSHIFT);
vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0)); vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0));
vx0 = vec_mergeh (vx,vx); vx0 = vec_mergeh (vx,vx);
vx1 = vec_mergel (vx,vx); vx1 = vec_mergel (vx,vx);
...@@ -559,7 +559,7 @@ static int altivec_yuv2_bgra32 (SwsContext *c, ...@@ -559,7 +559,7 @@ static int altivec_yuv2_bgra32 (SwsContext *c,
R = vec_packclp (R0,R1); R = vec_packclp (R0,R1);
G = vec_packclp (G0,G1); G = vec_packclp (G0,G1);
B = vec_packclp (B0,B1); B = vec_packclp (B0,B1);
out_argb(R,G,B,oute); out_argb(R,G,B,oute);
R0 = vec_add (Y2,vx0); R0 = vec_add (Y2,vx0);
G0 = vec_add (Y2,uvx0); G0 = vec_add (Y2,uvx0);
...@@ -570,24 +570,24 @@ static int altivec_yuv2_bgra32 (SwsContext *c, ...@@ -570,24 +570,24 @@ static int altivec_yuv2_bgra32 (SwsContext *c,
R = vec_packclp (R0,R1); R = vec_packclp (R0,R1);
G = vec_packclp (G0,G1); G = vec_packclp (G0,G1);
B = vec_packclp (B0,B1); B = vec_packclp (B0,B1);
out_argb(R,G,B,outo); out_argb(R,G,B,outo);
y1i += 16; y1i += 16;
y2i += 16; y2i += 16;
ui += 8; ui += 8;
vi += 8; vi += 8;
} }
outo += (outstrides[0])>>4; outo += (outstrides[0])>>4;
oute += (outstrides[0])>>4; oute += (outstrides[0])>>4;
ui += instrides_scl[1]; ui += instrides_scl[1];
vi += instrides_scl[2]; vi += instrides_scl[2];
y1i += instrides_scl[0]; y1i += instrides_scl[0];
y2i += instrides_scl[0]; y2i += instrides_scl[0];
} }
return srcSliceH; return srcSliceH;
} }
#endif #endif
...@@ -686,15 +686,15 @@ static int altivec_uyvy_rgb32 (SwsContext *c, ...@@ -686,15 +686,15 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
*/ */
SwsFunc yuv2rgb_init_altivec (SwsContext *c) SwsFunc yuv2rgb_init_altivec (SwsContext *c)
{ {
if (!(c->flags & SWS_CPU_CAPS_ALTIVEC)) if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
return NULL; return NULL;
/* /*
and this seems not to matter too much I tried a bunch of and this seems not to matter too much I tried a bunch of
videos with abnormal widths and mplayer crashes else where. videos with abnormal widths and mplayer crashes else where.
mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv
boom with X11 bad match. boom with X11 bad match.
*/ */
if ((c->srcW & 0xf) != 0) return NULL; if ((c->srcW & 0xf) != 0) return NULL;
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
* \arg Maximum Rv value: 117570 * \arg Maximum Rv value: 117570
* \arg Maximum Bu value: 138420 * \arg Maximum Bu value: 138420
* \arg Maximum Gv + Gu value: 25642 + 53281 = 78923 * \arg Maximum Gv + Gu value: 25642 + 53281 = 78923
* *
* These values are needed to allocate table_{r, g, b}. If you modify * These values are needed to allocate table_{r, g, b}. If you modify
* this table, please update allocate_tables() accordingly * this table, please update allocate_tables() accordingly
*/ */
...@@ -168,7 +168,7 @@ static int get_entry_size(int bpp) ...@@ -168,7 +168,7 @@ static int get_entry_size(int bpp)
* together, so that they are contiguous in memory * together, so that they are contiguous in memory
* *
* table_r is indexed in the range * table_r is indexed in the range
* [-128 * 117570 / 76309, 255 + 127 * 117570 / 76309] = * [-128 * 117570 / 76309, 255 + 127 * 117570 / 76309] =
* [-197.21, 451.67] ---> [-198, 452] * [-197.21, 451.67] ---> [-198, 452]
* table_b is indexed in the range * table_b is indexed in the range
* [-128 * 138420 / 76309, 255 + 127 * 138420 / 76309] = * [-128 * 138420 / 76309, 255 + 127 * 138420 / 76309] =
...@@ -210,7 +210,7 @@ static void *allocate_tables(uint8_t **table_r, uint8_t **table_g, uint8_t **tab ...@@ -210,7 +210,7 @@ static void *allocate_tables(uint8_t **table_r, uint8_t **table_g, uint8_t **tab
return NULL; return NULL;
} }
/* ...and then, assign the table_* value */ /* ...and then, assign the table_* value */
switch (bpp) { switch (bpp) {
case 32: case 32:
...@@ -242,7 +242,7 @@ static void *allocate_tables(uint8_t **table_r, uint8_t **table_g, uint8_t **tab ...@@ -242,7 +242,7 @@ static void *allocate_tables(uint8_t **table_r, uint8_t **table_g, uint8_t **tab
* @param fullRange 0->MPEG YUV space 1->JPEG YUV space * @param fullRange 0->MPEG YUV space 1->JPEG YUV space
*/ */
int yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation) int yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int fullRange, int brightness, int contrast, int saturation)
{ {
int i; int i;
static uint8_t ytable[1024]; static uint8_t ytable[1024];
int64_t cy, oy; int64_t cy, oy;
......
/* /*
* yuv2rgb_mlib.c, Software YUV to RGB coverter using mediaLib * yuv2rgb_mlib.c, Software YUV to RGB coverter using mediaLib
* *
* Copyright (C) 2000, Hkan Hjort <d95hjort@dtek.chalmers.se> * Copyright (C) 2000, Hkan Hjort <d95hjort@dtek.chalmers.se>
...@@ -31,50 +31,50 @@ ...@@ -31,50 +31,50 @@
#include "swscale.h" #include "swscale.h"
static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[]){
if(c->srcFormat == PIX_FMT_YUV422P){ if(c->srcFormat == PIX_FMT_YUV422P){
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
assert(srcStride[1] == srcStride[2]); assert(srcStride[1] == srcStride[2]);
mlib_VideoColorYUV2ARGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW, mlib_VideoColorYUV2ARGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
srcSliceH, dstStride[0], srcStride[0], srcStride[1]); srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
return srcSliceH; return srcSliceH;
} }
static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[]){
if(c->srcFormat == PIX_FMT_YUV422P){ if(c->srcFormat == PIX_FMT_YUV422P){
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
assert(srcStride[1] == srcStride[2]); assert(srcStride[1] == srcStride[2]);
mlib_VideoColorYUV2ABGR420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW, mlib_VideoColorYUV2ABGR420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
srcSliceH, dstStride[0], srcStride[0], srcStride[1]); srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
return srcSliceH; return srcSliceH;
} }
static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[]){
if(c->srcFormat == PIX_FMT_YUV422P){ if(c->srcFormat == PIX_FMT_YUV422P){
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
assert(srcStride[1] == srcStride[2]); assert(srcStride[1] == srcStride[2]);
mlib_VideoColorYUV2RGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW, mlib_VideoColorYUV2RGB420(dst[0]+srcSliceY*dstStride[0], src[0], src[1], src[2], c->dstW,
srcSliceH, dstStride[0], srcStride[0], srcStride[1]); srcSliceH, dstStride[0], srcStride[0], srcStride[1]);
return srcSliceH; return srcSliceH;
} }
SwsFunc yuv2rgb_init_mlib(SwsContext *c) SwsFunc yuv2rgb_init_mlib(SwsContext *c)
{ {
switch(c->dstFormat){ switch(c->dstFormat){
case PIX_FMT_RGB24: return mlib_YUV2RGB420_24; case PIX_FMT_RGB24: return mlib_YUV2RGB420_24;
......
/* /*
* yuv2rgb_mmx.c, Software YUV to RGB coverter with Intel MMX "technology" * yuv2rgb_mmx.c, Software YUV to RGB coverter with Intel MMX "technology"
* *
...@@ -134,7 +133,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr ...@@ -134,7 +133,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr
h_size= (c->dstW+7)&~7; h_size= (c->dstW+7)&~7;
if(h_size*2 > FFABS(dstStride[0])) h_size-=8; if(h_size*2 > FFABS(dstStride[0])) h_size-=8;
__asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ ); __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
//printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0], //printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0],
//srcStride[0],srcStride[1],srcStride[2],dstStride[0]); //srcStride[0],srcStride[1],srcStride[2],dstStride[0]);
...@@ -203,18 +202,18 @@ YUV2RGB ...@@ -203,18 +202,18 @@ YUV2RGB
"movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */ MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
"add $16, %1 \n\t" "add $16, %1 \n\t"
"add $4, %0 \n\t" "add $4, %0 \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+r" (index), "+r" (_image) : "+r" (index), "+r" (_image)
: "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index) : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
); );
} }
__asm__ __volatile__ (EMMS); __asm__ __volatile__ (EMMS);
return srcSliceH; return srcSliceH;
} }
...@@ -229,7 +228,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStr ...@@ -229,7 +228,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStr
h_size= (c->dstW+7)&~7; h_size= (c->dstW+7)&~7;
if(h_size*2 > FFABS(dstStride[0])) h_size-=8; if(h_size*2 > FFABS(dstStride[0])) h_size-=8;
__asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ ); __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
//printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0], //printf("%X %X %X %X %X %X %X %X %X %X\n", (int)&c->redDither, (int)&b5Dither, (int)src[0], (int)src[1], (int)src[2], (int)dst[0],
//srcStride[0],srcStride[1],srcStride[2],dstStride[0]); //srcStride[0],srcStride[1],srcStride[2],dstStride[0]);
...@@ -294,7 +293,7 @@ YUV2RGB ...@@ -294,7 +293,7 @@ YUV2RGB
"movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */ MOVNTQ " %%mm5, 8 (%1);" /* store pixel 4-7 */
"add $16, %1 \n\t" "add $16, %1 \n\t"
"add $4, %0 \n\t" "add $4, %0 \n\t"
" js 1b \n\t" " js 1b \n\t"
...@@ -318,7 +317,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStr ...@@ -318,7 +317,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStr
h_size= (c->dstW+7)&~7; h_size= (c->dstW+7)&~7;
if(h_size*3 > FFABS(dstStride[0])) h_size-=8; if(h_size*3 > FFABS(dstStride[0])) h_size-=8;
__asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ ); __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
for (y= 0; y<srcSliceH; y++ ) { for (y= 0; y<srcSliceH; y++ ) {
...@@ -439,11 +438,11 @@ YUV2RGB ...@@ -439,11 +438,11 @@ YUV2RGB
"movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */ "movd 4 (%3, %0), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
#endif #endif
"add $24, %1 \n\t" "add $24, %1 \n\t"
"add $4, %0 \n\t" "add $4, %0 \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+r" (index), "+r" (_image) : "+r" (index), "+r" (_image)
: "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index) : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
); );
...@@ -464,7 +463,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStr ...@@ -464,7 +463,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStr
h_size= (c->dstW+7)&~7; h_size= (c->dstW+7)&~7;
if(h_size*4 > FFABS(dstStride[0])) h_size-=8; if(h_size*4 > FFABS(dstStride[0])) h_size-=8;
__asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ ); __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
for (y= 0; y<srcSliceH; y++ ) { for (y= 0; y<srcSliceH; y++ ) {
...@@ -529,7 +528,7 @@ YUV2RGB ...@@ -529,7 +528,7 @@ YUV2RGB
"add $32, %1 \n\t" "add $32, %1 \n\t"
"add $4, %0 \n\t" "add $4, %0 \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+r" (index), "+r" (_image) : "+r" (index), "+r" (_image)
: "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index) : "r" (_pu - index), "r" (_pv - index), "r"(&c->redDither), "r" (_py - 2*index)
); );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment