Commit 29baaf7d authored by atmos4's avatar atmos4

mangle for win32 in postproc


git-svn-id: file:///var/local/repositories/mplayer/trunk/postproc@4249 b3059339-0415-0410-9bf9-f77b7e298cf2
parent 2322fbeb
...@@ -86,6 +86,7 @@ dont use #ifdef ARCH_X86 for the asm stuff ... cross compilers? (note cpudetect ...@@ -86,6 +86,7 @@ dont use #ifdef ARCH_X86 for the asm stuff ... cross compilers? (note cpudetect
//#include "../libvo/fastmemcpy.h" //#include "../libvo/fastmemcpy.h"
#include "postprocess.h" #include "postprocess.h"
#include "../cpudetect.h" #include "../cpudetect.h"
#include "../mangle.h"
#define MIN(a,b) ((a) > (b) ? (b) : (a)) #define MIN(a,b) ((a) > (b) ? (b) : (a))
#define MAX(a,b) ((a) < (b) ? (b) : (a)) #define MAX(a,b) ((a) < (b) ? (b) : (a))
......
...@@ -60,8 +60,8 @@ asm volatile( ...@@ -60,8 +60,8 @@ asm volatile(
"leal (%%eax, %2, 4), %%ebx \n\t" "leal (%%eax, %2, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9 // 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2 // %1 eax eax+%2 eax+2%2 %1+4%2 ebx ebx+%2 ebx+2%2 %1+8%2 ebx+4%2
"movq mmxDCOffset, %%mm7 \n\t" // mm7 = 0x7F "movq "MANGLE(mmxDCOffset)", %%mm7 \n\t" // mm7 = 0x7F
"movq mmxDCThreshold, %%mm6 \n\t" // mm6 = 0x7D "movq "MANGLE(mmxDCThreshold)", %%mm6 \n\t" // mm6 = 0x7D
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq (%%eax), %%mm1 \n\t" "movq (%%eax), %%mm1 \n\t"
"psubb %%mm1, %%mm0 \n\t" // mm0 = differnece "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
...@@ -171,12 +171,12 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, int QP) ...@@ -171,12 +171,12 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, int QP)
"psubusb %%mm2, %%mm1 \n\t" "psubusb %%mm2, %%mm1 \n\t"
"por %%mm1, %%mm0 \n\t" // ABS Diff "por %%mm1, %%mm0 \n\t" // ABS Diff
"movq pQPb, %%mm7 \n\t" // QP,..., QP "movq "MANGLE(pQPb)", %%mm7 \n\t" // QP,..., QP
"paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP
"psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0 "psubusb %%mm7, %%mm0 \n\t" // Diff <= 2QP -> 0
"pcmpeqd b00, %%mm0 \n\t" "pcmpeqd "MANGLE(b00)", %%mm0 \n\t"
"psrlq $16, %%mm0 \n\t" "psrlq $16, %%mm0 \n\t"
"pcmpeqd bFF, %%mm0 \n\t" "pcmpeqd "MANGLE(bFF)", %%mm0 \n\t"
// "movd %%mm0, (%1, %2, 4)\n\t" // "movd %%mm0, (%1, %2, 4)\n\t"
"movd %%mm0, %0 \n\t" "movd %%mm0, %0 \n\t"
: "=r" (isOk) : "=r" (isOk)
...@@ -219,7 +219,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP) ...@@ -219,7 +219,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
src+= stride*3; src+= stride*3;
asm volatile( //"movv %0 %1 %2\n\t" asm volatile( //"movv %0 %1 %2\n\t"
"movq pQPb, %%mm0 \n\t" // QP,..., QP "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
"movq (%0), %%mm6 \n\t" "movq (%0), %%mm6 \n\t"
"movq (%0, %1), %%mm5 \n\t" "movq (%0, %1), %%mm5 \n\t"
...@@ -229,7 +229,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP) ...@@ -229,7 +229,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"psubusb %%mm1, %%mm2 \n\t" "psubusb %%mm1, %%mm2 \n\t"
"por %%mm5, %%mm2 \n\t" // ABS Diff of lines "por %%mm5, %%mm2 \n\t" // ABS Diff of lines
"psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0 "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
"pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
"pand %%mm2, %%mm6 \n\t" "pand %%mm2, %%mm6 \n\t"
"pandn %%mm1, %%mm2 \n\t" "pandn %%mm1, %%mm2 \n\t"
...@@ -247,7 +247,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP) ...@@ -247,7 +247,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, int QP)
"psubusb %%mm1, %%mm2 \n\t" "psubusb %%mm1, %%mm2 \n\t"
"por %%mm5, %%mm2 \n\t" // ABS Diff of lines "por %%mm5, %%mm2 \n\t" // ABS Diff of lines
"psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0 "psubusb %%mm0, %%mm2 \n\t" // diff <= QP -> 0
"pcmpeqb b00, %%mm2 \n\t" // diff <= QP -> FF "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // diff <= QP -> FF
"pand %%mm2, %%mm7 \n\t" "pand %%mm2, %%mm7 \n\t"
"pandn %%mm1, %%mm2 \n\t" "pandn %%mm1, %%mm2 \n\t"
...@@ -403,16 +403,16 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) ...@@ -403,16 +403,16 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
// FIXME rounding // FIXME rounding
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" // 0 "pxor %%mm7, %%mm7 \n\t" // 0
"movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
"leal (%0, %1), %%eax \n\t" "leal (%0, %1), %%eax \n\t"
"leal (%%eax, %1, 4), %%ebx \n\t" "leal (%%eax, %1, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9 // 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
"movq pQPb, %%mm0 \n\t" // QP,..., QP "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP
"movq %%mm0, %%mm1 \n\t" // QP,..., QP "movq %%mm0, %%mm1 \n\t" // QP,..., QP
"paddusb b02, %%mm0 \n\t" "paddusb "MANGLE(b02)", %%mm0 \n\t"
"psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm0 \n\t"
"pand b3F, %%mm0 \n\t" // QP/4,..., QP/4 "pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4
"paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ... "paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ...
"movq (%0, %1, 4), %%mm2 \n\t" // line 4 "movq (%0, %1, 4), %%mm2 \n\t" // line 4
"movq (%%ebx), %%mm3 \n\t" // line 5 "movq (%%ebx), %%mm3 \n\t" // line 5
...@@ -441,8 +441,8 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) ...@@ -441,8 +441,8 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
"paddb %%mm6, %%mm5 \n\t" "paddb %%mm6, %%mm5 \n\t"
"psrlw $2, %%mm5 \n\t" "psrlw $2, %%mm5 \n\t"
"pand b3F, %%mm5 \n\t" "pand "MANGLE(b3F)", %%mm5 \n\t"
"psubb b20, %%mm5 \n\t" // (l5-l4)/8 "psubb "MANGLE(b20)", %%mm5 \n\t" // (l5-l4)/8
"movq (%%eax, %1, 2), %%mm2 \n\t" "movq (%%eax, %1, 2), %%mm2 \n\t"
"paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80 "paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80
...@@ -503,7 +503,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP) ...@@ -503,7 +503,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" // 0 "pxor %%mm7, %%mm7 \n\t" // 0
// "movq b80, %%mm6 \n\t" // MIN_SIGNED_BYTE // "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE
"leal (%0, %1), %%eax \n\t" "leal (%0, %1), %%eax \n\t"
"leal (%%eax, %1, 4), %%ebx \n\t" "leal (%%eax, %1, 4), %%ebx \n\t"
// 0 1 2 3 4 5 6 7 8 9 // 0 1 2 3 4 5 6 7 8 9
...@@ -529,9 +529,9 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP) ...@@ -529,9 +529,9 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, int QP)
"por %%mm5, %%mm4 \n\t" // |l4 - l5| "por %%mm5, %%mm4 \n\t" // |l4 - l5|
"psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2) "psubusb %%mm0, %%mm4 \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
"movq %%mm4, %%mm3 \n\t" // d "movq %%mm4, %%mm3 \n\t" // d
"psubusb pQPb, %%mm4 \n\t" "psubusb "MANGLE(pQPb)", %%mm4 \n\t"
"pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0 "pcmpeqb %%mm7, %%mm4 \n\t" // d <= QP ? -1 : 0
"psubusb b01, %%mm3 \n\t" "psubusb "MANGLE(b01)", %%mm3 \n\t"
"pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0 "pand %%mm4, %%mm3 \n\t" // d <= QP ? d : 0
PAVGB(%%mm7, %%mm3) // d/2 PAVGB(%%mm7, %%mm3) // d/2
...@@ -740,18 +740,18 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP) ...@@ -740,18 +740,18 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8 PMINUB(%%mm2, %%mm1, %%mm4) // MIN(|lenergy|,|renergy|)/8
"movq pQPb, %%mm4 \n\t" // QP //FIXME QP+1 ? "movq "MANGLE(pQPb)", %%mm4 \n\t" // QP //FIXME QP+1 ?
"paddusb b01, %%mm4 \n\t" "paddusb "MANGLE(b01)", %%mm4 \n\t"
"pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP "pcmpgtb %%mm3, %%mm4 \n\t" // |menergy|/8 < QP
"psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8 "psubusb %%mm1, %%mm3 \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
"pand %%mm4, %%mm3 \n\t" "pand %%mm4, %%mm3 \n\t"
"movq %%mm3, %%mm1 \n\t" "movq %%mm3, %%mm1 \n\t"
// "psubusb b01, %%mm3 \n\t" // "psubusb "MANGLE(b01)", %%mm3 \n\t"
PAVGB(%%mm7, %%mm3) PAVGB(%%mm7, %%mm3)
PAVGB(%%mm7, %%mm3) PAVGB(%%mm7, %%mm3)
"paddusb %%mm1, %%mm3 \n\t" "paddusb %%mm1, %%mm3 \n\t"
// "paddusb b01, %%mm3 \n\t" // "paddusb "MANGLE(b01)", %%mm3 \n\t"
"movq (%%eax, %1, 2), %%mm6 \n\t" //l3 "movq (%%eax, %1, 2), %%mm6 \n\t" //l3
"movq (%0, %1, 4), %%mm5 \n\t" //l4 "movq (%0, %1, 4), %%mm5 \n\t" //l4
...@@ -764,7 +764,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP) ...@@ -764,7 +764,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"pand %%mm0, %%mm3 \n\t" "pand %%mm0, %%mm3 \n\t"
PMINUB(%%mm5, %%mm3, %%mm0) PMINUB(%%mm5, %%mm3, %%mm0)
"psubusb b01, %%mm3 \n\t" "psubusb "MANGLE(b01)", %%mm3 \n\t"
PAVGB(%%mm7, %%mm3) PAVGB(%%mm7, %%mm3)
"movq (%%eax, %1, 2), %%mm0 \n\t" "movq (%%eax, %1, 2), %%mm0 \n\t"
...@@ -796,7 +796,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP) ...@@ -796,7 +796,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"movq (%%eax, %1), %%mm3 \n\t" // l2 "movq (%%eax, %1), %%mm3 \n\t" // l2
"pxor %%mm6, %%mm2 \n\t" // -l5-1 "pxor %%mm6, %%mm2 \n\t" // -l5-1
"movq %%mm2, %%mm5 \n\t" // -l5-1 "movq %%mm2, %%mm5 \n\t" // -l5-1
"movq b80, %%mm4 \n\t" // 128 "movq "MANGLE(b80)", %%mm4 \n\t" // 128
"leal (%%eax, %1, 4), %%ebx \n\t" "leal (%%eax, %1, 4), %%ebx \n\t"
PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2
PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128
...@@ -808,7 +808,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP) ...@@ -808,7 +808,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"pxor %%mm6, %%mm2 \n\t" // -l1-1 "pxor %%mm6, %%mm2 \n\t" // -l1-1
PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2
PAVGB((%0), %%mm1) // (l0-l3+256)/2 PAVGB((%0), %%mm1) // (l0-l3+256)/2
"movq b80, %%mm3 \n\t" // 128 "movq "MANGLE(b80)", %%mm3 \n\t" // 128
PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128 PAVGB(%%mm2, %%mm3) // ~(l2-l1)/4 + 128
PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128 PAVGB(%%mm1, %%mm3) // ~(l0-l3)/4 +(l2-l1)/8 + 128
PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128 PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128
...@@ -818,14 +818,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP) ...@@ -818,14 +818,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
"movq (%%ebx, %1, 2), %%mm1 \n\t" // l7 "movq (%%ebx, %1, 2), %%mm1 \n\t" // l7
"pxor %%mm6, %%mm1 \n\t" // -l7-1 "pxor %%mm6, %%mm1 \n\t" // -l7-1
PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2
"movq b80, %%mm2 \n\t" // 128 "movq "MANGLE(b80)", %%mm2 \n\t" // 128
PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128 PAVGB(%%mm5, %%mm2) // ~(l6-l5)/4 + 128
PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128 PAVGB(%%mm1, %%mm2) // ~(l4-l7)/4 +(l6-l5)/8 + 128
PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128 PAVGB(%%mm5, %%mm2) // ~(l4-l7)/8 +5(l6-l5)/16 + 128
// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128 // mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
"movq b00, %%mm1 \n\t" // 0 "movq "MANGLE(b00)", %%mm1 \n\t" // 0
"movq b00, %%mm5 \n\t" // 0 "movq "MANGLE(b00)", %%mm5 \n\t" // 0
"psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16 "psubb %%mm2, %%mm1 \n\t" // 128 - renergy/16
"psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16 "psubb %%mm3, %%mm5 \n\t" // 128 - lenergy/16
PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16| PMAXUB(%%mm1, %%mm2) // 128 + |renergy/16|
...@@ -834,8 +834,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP) ...@@ -834,8 +834,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128 // mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
"movq b00, %%mm7 \n\t" // 0 "movq "MANGLE(b00)", %%mm7 \n\t" // 0
"movq pQPb, %%mm2 \n\t" // QP "movq "MANGLE(pQPb)", %%mm2 \n\t" // QP
PAVGB(%%mm6, %%mm2) // 128 + QP/2 PAVGB(%%mm6, %%mm2) // 128 + QP/2
"psubb %%mm6, %%mm2 \n\t" "psubb %%mm6, %%mm2 \n\t"
...@@ -848,13 +848,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP) ...@@ -848,13 +848,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, int QP)
// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16 // mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
"movq %%mm4, %%mm3 \n\t" // d "movq %%mm4, %%mm3 \n\t" // d
"psubusb b01, %%mm4 \n\t" "psubusb "MANGLE(b01)", %%mm4 \n\t"
PAVGB(%%mm7, %%mm4) // d/32 PAVGB(%%mm7, %%mm4) // d/32
PAVGB(%%mm7, %%mm4) // (d + 32)/64 PAVGB(%%mm7, %%mm4) // (d + 32)/64
"paddb %%mm3, %%mm4 \n\t" // 5d/64 "paddb %%mm3, %%mm4 \n\t" // 5d/64
"pand %%mm2, %%mm4 \n\t" "pand %%mm2, %%mm4 \n\t"
"movq b80, %%mm5 \n\t" // 128 "movq "MANGLE(b80)", %%mm5 \n\t" // 128
"psubb %%mm0, %%mm5 \n\t" // q "psubb %%mm0, %%mm5 \n\t" // q
"paddsb %%mm6, %%mm5 \n\t" // fix bad rounding "paddsb %%mm6, %%mm5 \n\t" // fix bad rounding
"pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q) "pcmpgtb %%mm5, %%mm7 \n\t" // SIGN(q)
...@@ -991,8 +991,8 @@ src-=8; ...@@ -991,8 +991,8 @@ src-=8;
"psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3
"psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
"movq %%mm0, temp0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq %%mm0, "MANGLE(temp0)" \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, temp1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "movq %%mm1, "MANGLE(temp1)" \n\t" // 2H0 - 5H1 + 5H2 - 2H3
"movq (%0, %1, 4), %%mm0 \n\t" "movq (%0, %1, 4), %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
...@@ -1001,8 +1001,8 @@ src-=8; ...@@ -1001,8 +1001,8 @@ src-=8;
"psubw %%mm0, %%mm2 \n\t" // L3 - L4 "psubw %%mm0, %%mm2 \n\t" // L3 - L4
"psubw %%mm1, %%mm3 \n\t" // H3 - H4 "psubw %%mm1, %%mm3 \n\t" // H3 - H4
"movq %%mm2, temp2 \n\t" // L3 - L4 "movq %%mm2, "MANGLE(temp2)" \n\t" // L3 - L4
"movq %%mm3, temp3 \n\t" // H3 - H4 "movq %%mm3, "MANGLE(temp3)" \n\t" // H3 - H4
"paddw %%mm4, %%mm4 \n\t" // 2L2 "paddw %%mm4, %%mm4 \n\t" // 2L2
"paddw %%mm5, %%mm5 \n\t" // 2H2 "paddw %%mm5, %%mm5 \n\t" // 2H2
"psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4
...@@ -1049,8 +1049,8 @@ src-=8; ...@@ -1049,8 +1049,8 @@ src-=8;
"psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7
"psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7
"movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq "MANGLE(temp0)", %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 "movq "MANGLE(temp1)", %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
"movq %%mm7, %%mm6 \n\t" // 0 "movq %%mm7, %%mm6 \n\t" // 0
...@@ -1138,8 +1138,8 @@ src-=8; ...@@ -1138,8 +1138,8 @@ src-=8;
"pmulhw %%mm2, %%mm5 \n\t" // ld/13 "pmulhw %%mm2, %%mm5 \n\t" // ld/13
*/ */
"movq temp2, %%mm0 \n\t" // L3 - L4 "movq "MANGLE(temp2)", %%mm0 \n\t" // L3 - L4
"movq temp3, %%mm1 \n\t" // H3 - H4 "movq "MANGLE(temp3)", %%mm1 \n\t" // H3 - H4
"pxor %%mm2, %%mm2 \n\t" "pxor %%mm2, %%mm2 \n\t"
"pxor %%mm3, %%mm3 \n\t" "pxor %%mm3, %%mm3 \n\t"
...@@ -1235,9 +1235,9 @@ static inline void RENAME(dering)(uint8_t src[], int stride, int QP) ...@@ -1235,9 +1235,9 @@ static inline void RENAME(dering)(uint8_t src[], int stride, int QP)
{ {
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
asm volatile( asm volatile(
"movq pQPb, %%mm0 \n\t" "movq "MANGLE(pQPb)", %%mm0 \n\t"
"paddusb %%mm0, %%mm0 \n\t" "paddusb %%mm0, %%mm0 \n\t"
"movq %%mm0, pQPb2 \n\t" "movq %%mm0, "MANGLE(pQPb2)" \n\t"
"leal (%0, %1), %%eax \n\t" "leal (%0, %1), %%eax \n\t"
"leal (%%eax, %1, 4), %%ebx \n\t" "leal (%%eax, %1, 4), %%ebx \n\t"
...@@ -1319,13 +1319,13 @@ FIND_MIN_MAX((%0, %1, 8)) ...@@ -1319,13 +1319,13 @@ FIND_MIN_MAX((%0, %1, 8))
"movq %%mm6, %%mm0 \n\t" // max "movq %%mm6, %%mm0 \n\t" // max
"psubb %%mm7, %%mm6 \n\t" // max - min "psubb %%mm7, %%mm6 \n\t" // max - min
"movd %%mm6, %%ecx \n\t" "movd %%mm6, %%ecx \n\t"
"cmpb deringThreshold, %%cl \n\t" "cmpb "MANGLE(deringThreshold)", %%cl \n\t"
" jb 1f \n\t" " jb 1f \n\t"
PAVGB(%%mm0, %%mm7) // a=(max + min)/2 PAVGB(%%mm0, %%mm7) // a=(max + min)/2
"punpcklbw %%mm7, %%mm7 \n\t" "punpcklbw %%mm7, %%mm7 \n\t"
"punpcklbw %%mm7, %%mm7 \n\t" "punpcklbw %%mm7, %%mm7 \n\t"
"punpcklbw %%mm7, %%mm7 \n\t" "punpcklbw %%mm7, %%mm7 \n\t"
"movq %%mm7, temp0 \n\t" "movq %%mm7, "MANGLE(temp0)" \n\t"
"movq (%0), %%mm0 \n\t" // L10 "movq (%0), %%mm0 \n\t" // L10
"movq %%mm0, %%mm1 \n\t" // L10 "movq %%mm0, %%mm1 \n\t" // L10
...@@ -1344,9 +1344,9 @@ FIND_MIN_MAX((%0, %1, 8)) ...@@ -1344,9 +1344,9 @@ FIND_MIN_MAX((%0, %1, 8))
"psubusb %%mm7, %%mm0 \n\t" "psubusb %%mm7, %%mm0 \n\t"
"psubusb %%mm7, %%mm2 \n\t" "psubusb %%mm7, %%mm2 \n\t"
"psubusb %%mm7, %%mm3 \n\t" "psubusb %%mm7, %%mm3 \n\t"
"pcmpeqb b00, %%mm0 \n\t" // L10 > a ? 0 : -1 "pcmpeqb "MANGLE(b00)", %%mm0 \n\t" // L10 > a ? 0 : -1
"pcmpeqb b00, %%mm2 \n\t" // L20 > a ? 0 : -1 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L20 > a ? 0 : -1
"pcmpeqb b00, %%mm3 \n\t" // L00 > a ? 0 : -1 "pcmpeqb "MANGLE(b00)", %%mm3 \n\t" // L00 > a ? 0 : -1
"paddb %%mm2, %%mm0 \n\t" "paddb %%mm2, %%mm0 \n\t"
"paddb %%mm3, %%mm0 \n\t" "paddb %%mm3, %%mm0 \n\t"
...@@ -1367,9 +1367,9 @@ FIND_MIN_MAX((%0, %1, 8)) ...@@ -1367,9 +1367,9 @@ FIND_MIN_MAX((%0, %1, 8))
"psubusb %%mm7, %%mm2 \n\t" "psubusb %%mm7, %%mm2 \n\t"
"psubusb %%mm7, %%mm4 \n\t" "psubusb %%mm7, %%mm4 \n\t"
"psubusb %%mm7, %%mm5 \n\t" "psubusb %%mm7, %%mm5 \n\t"
"pcmpeqb b00, %%mm2 \n\t" // L11 > a ? 0 : -1 "pcmpeqb "MANGLE(b00)", %%mm2 \n\t" // L11 > a ? 0 : -1
"pcmpeqb b00, %%mm4 \n\t" // L21 > a ? 0 : -1 "pcmpeqb "MANGLE(b00)", %%mm4 \n\t" // L21 > a ? 0 : -1
"pcmpeqb b00, %%mm5 \n\t" // L01 > a ? 0 : -1 "pcmpeqb "MANGLE(b00)", %%mm5 \n\t" // L01 > a ? 0 : -1
"paddb %%mm4, %%mm2 \n\t" "paddb %%mm4, %%mm2 \n\t"
"paddb %%mm5, %%mm2 \n\t" "paddb %%mm5, %%mm2 \n\t"
// 0, 2, 3, 1 // 0, 2, 3, 1
...@@ -1389,12 +1389,12 @@ FIND_MIN_MAX((%0, %1, 8)) ...@@ -1389,12 +1389,12 @@ FIND_MIN_MAX((%0, %1, 8))
PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\
PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\
PAVGB(lx, pplx) \ PAVGB(lx, pplx) \
"movq " #lx ", temp1 \n\t"\ "movq " #lx ", "MANGLE(temp1)" \n\t"\
"movq temp0, " #lx " \n\t"\ "movq "MANGLE(temp0)", " #lx " \n\t"\
"psubusb " #lx ", " #t1 " \n\t"\ "psubusb " #lx ", " #t1 " \n\t"\
"psubusb " #lx ", " #t0 " \n\t"\ "psubusb " #lx ", " #t0 " \n\t"\
"psubusb " #lx ", " #sx " \n\t"\ "psubusb " #lx ", " #sx " \n\t"\
"movq b00, " #lx " \n\t"\ "movq "MANGLE(b00)", " #lx " \n\t"\
"pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\
"pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\
"pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\
...@@ -1404,20 +1404,20 @@ FIND_MIN_MAX((%0, %1, 8)) ...@@ -1404,20 +1404,20 @@ FIND_MIN_MAX((%0, %1, 8))
PAVGB(plx, pplx) /* filtered */\ PAVGB(plx, pplx) /* filtered */\
"movq " #dst ", " #t0 " \n\t" /* dst */\ "movq " #dst ", " #t0 " \n\t" /* dst */\
"movq " #t0 ", " #t1 " \n\t" /* dst */\ "movq " #t0 ", " #t1 " \n\t" /* dst */\
"psubusb pQPb2, " #t0 " \n\t"\ "psubusb "MANGLE(pQPb2)", " #t0 " \n\t"\
"paddusb pQPb2, " #t1 " \n\t"\ "paddusb "MANGLE(pQPb2)", " #t1 " \n\t"\
PMAXUB(t0, pplx)\ PMAXUB(t0, pplx)\
PMINUB(t1, pplx, t0)\ PMINUB(t1, pplx, t0)\
"paddb " #sx ", " #ppsx " \n\t"\ "paddb " #sx ", " #ppsx " \n\t"\
"paddb " #psx ", " #ppsx " \n\t"\ "paddb " #psx ", " #ppsx " \n\t"\
"#paddb b02, " #ppsx " \n\t"\ "#paddb "MANGLE(b02)", " #ppsx " \n\t"\
"pand b08, " #ppsx " \n\t"\ "pand "MANGLE(b08)", " #ppsx " \n\t"\
"pcmpeqb " #lx ", " #ppsx " \n\t"\ "pcmpeqb " #lx ", " #ppsx " \n\t"\
"pand " #ppsx ", " #pplx " \n\t"\ "pand " #ppsx ", " #pplx " \n\t"\
"pandn " #dst ", " #ppsx " \n\t"\ "pandn " #dst ", " #ppsx " \n\t"\
"por " #pplx ", " #ppsx " \n\t"\ "por " #pplx ", " #ppsx " \n\t"\
"movq " #ppsx ", " #dst " \n\t"\ "movq " #ppsx ", " #dst " \n\t"\
"movq temp1, " #lx " \n\t" "movq "MANGLE(temp1)", " #lx " \n\t"
/* /*
0000000 0000000
...@@ -2082,7 +2082,7 @@ static void inline RENAME(tempNoiseReducer)(uint8_t *src, int stride, ...@@ -2082,7 +2082,7 @@ static void inline RENAME(tempNoiseReducer)(uint8_t *src, int stride,
"paddw %%mm6, %%mm0 \n\t" "paddw %%mm6, %%mm0 \n\t"
#elif defined (FAST_L2_DIFF) #elif defined (FAST_L2_DIFF)
"pcmpeqb %%mm7, %%mm7 \n\t" "pcmpeqb %%mm7, %%mm7 \n\t"
"movq b80, %%mm6 \n\t" "movq "MANGLE(b80)", %%mm6 \n\t"
"pxor %%mm0, %%mm0 \n\t" "pxor %%mm0, %%mm0 \n\t"
#define L2_DIFF_CORE(a, b)\ #define L2_DIFF_CORE(a, b)\
"movq " #a ", %%mm5 \n\t"\ "movq " #a ", %%mm5 \n\t"\
...@@ -2155,9 +2155,9 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) ...@@ -2155,9 +2155,9 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx))
// "movl %3, %%ecx \n\t" // "movl %3, %%ecx \n\t"
// "movl %%ecx, test \n\t" // "movl %%ecx, test \n\t"
// "jmp 4f \n\t" // "jmp 4f \n\t"
"cmpl 4+maxTmpNoise, %%ecx \n\t" "cmpl 4+"MANGLE(maxTmpNoise)", %%ecx \n\t"
" jb 2f \n\t" " jb 2f \n\t"
"cmpl 8+maxTmpNoise, %%ecx \n\t" "cmpl 8+"MANGLE(maxTmpNoise)", %%ecx \n\t"
" jb 1f \n\t" " jb 1f \n\t"
"leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
...@@ -2216,7 +2216,7 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) ...@@ -2216,7 +2216,7 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx))
"jmp 4f \n\t" "jmp 4f \n\t"
"2: \n\t" "2: \n\t"
"cmpl maxTmpNoise, %%ecx \n\t" "cmpl "MANGLE(maxTmpNoise)", %%ecx \n\t"
" jb 3f \n\t" " jb 3f \n\t"
"leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride "leal (%%ebx, %2, 2), %%ecx \n\t" // 7*stride
...@@ -2461,8 +2461,8 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[] ...@@ -2461,8 +2461,8 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[]
asm volatile( asm volatile(
"leal (%0,%2), %%eax \n\t" "leal (%0,%2), %%eax \n\t"
"leal (%1,%3), %%ebx \n\t" "leal (%1,%3), %%ebx \n\t"
"movq packedYOffset, %%mm2 \n\t" "movq "MANGLE(packedYOffset)", %%mm2\n\t"
"movq packedYScale, %%mm3 \n\t" "movq "MANGLE(packedYScale)", %%mm3\n\t"
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
#define SCALED_CPY(src1, src2, dst1, dst2) \ #define SCALED_CPY(src1, src2, dst1, dst2) \
...@@ -2884,7 +2884,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int ...@@ -2884,7 +2884,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
"packuswb %%mm7, %%mm7 \n\t" // QP,..., QP "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
"movq %%mm7, pQPb \n\t" "movq %%mm7, "MANGLE(pQPb)" \n\t"
: : "r" (QP) : : "r" (QP)
); );
#endif #endif
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include "../config.h" #include "../config.h"
#include "../mangle.h"
#ifdef HAVE_MALLOC_H #ifdef HAVE_MALLOC_H
#include <malloc.h> #include <malloc.h>
#endif #endif
......
...@@ -135,19 +135,19 @@ ...@@ -135,19 +135,19 @@
"addl $1, %%edx \n\t"\ "addl $1, %%edx \n\t"\
" jnz 2b \n\t"\ " jnz 2b \n\t"\
\ \
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\ "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\ "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw ugCoeff, %%mm3 \n\t"\ "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\ "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"pmulhw ubCoeff, %%mm2 \n\t"\ "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
"pmulhw vrCoeff, %%mm5 \n\t"\ "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw yCoeff, %%mm1 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
"pmulhw yCoeff, %%mm7 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\ "paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
...@@ -197,23 +197,23 @@ ...@@ -197,23 +197,23 @@
"movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
"psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w400, %%mm3 \n\t" /* 8(U-128)*/\ "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\
"pmulhw yCoeff, %%mm1 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
\ \
\ \
"pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"pmulhw ubCoeff, %%mm3 \n\t"\ "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
"psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"pmulhw ugCoeff, %%mm2 \n\t"\ "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
"paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
"psubw w400, %%mm0 \n\t" /* (V-128)8*/\ "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\
\ \
\ \
"movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
"pmulhw vrCoeff, %%mm0 \n\t"\ "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\ "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
"paddw %%mm1, %%mm3 \n\t" /* B*/\ "paddw %%mm1, %%mm3 \n\t" /* B*/\
"paddw %%mm1, %%mm0 \n\t" /* R*/\ "paddw %%mm1, %%mm0 \n\t" /* R*/\
"packuswb %%mm3, %%mm3 \n\t"\ "packuswb %%mm3, %%mm3 \n\t"\
...@@ -228,11 +228,11 @@ ...@@ -228,11 +228,11 @@
"movd %6, %%mm6 \n\t" /*yalpha1*/\ "movd %6, %%mm6 \n\t" /*yalpha1*/\
"punpcklwd %%mm6, %%mm6 \n\t"\ "punpcklwd %%mm6, %%mm6 \n\t"\
"punpcklwd %%mm6, %%mm6 \n\t"\ "punpcklwd %%mm6, %%mm6 \n\t"\
"movq %%mm6, asm_yalpha1 \n\t"\ "movq %%mm6, "MANGLE(asm_yalpha1)"\n\t"\
"movd %7, %%mm5 \n\t" /*uvalpha1*/\ "movd %7, %%mm5 \n\t" /*uvalpha1*/\
"punpcklwd %%mm5, %%mm5 \n\t"\ "punpcklwd %%mm5, %%mm5 \n\t"\
"punpcklwd %%mm5, %%mm5 \n\t"\ "punpcklwd %%mm5, %%mm5 \n\t"\
"movq %%mm5, asm_uvalpha1 \n\t"\ "movq %%mm5, "MANGLE(asm_uvalpha1)"\n\t"\
"xorl %%eax, %%eax \n\t"\ "xorl %%eax, %%eax \n\t"\
".balign 16 \n\t"\ ".balign 16 \n\t"\
"1: \n\t"\ "1: \n\t"\
...@@ -242,19 +242,19 @@ ...@@ -242,19 +242,19 @@
"movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq asm_uvalpha1, %%mm0 \n\t"\ "movq "MANGLE(asm_uvalpha1)", %%mm0\n\t"\
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\ "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\ "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw ugCoeff, %%mm3 \n\t"\ "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\ "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
"movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
...@@ -262,18 +262,18 @@ ...@@ -262,18 +262,18 @@
"movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
"pmulhw asm_yalpha1, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ "pmulhw "MANGLE(asm_yalpha1)", %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
"pmulhw asm_yalpha1, %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ "pmulhw "MANGLE(asm_yalpha1)", %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
"pmulhw ubCoeff, %%mm2 \n\t"\ "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
"pmulhw vrCoeff, %%mm5 \n\t"\ "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw yCoeff, %%mm1 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
"pmulhw yCoeff, %%mm7 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\ "paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
...@@ -305,23 +305,23 @@ ...@@ -305,23 +305,23 @@
"movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\ "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\ "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw ugCoeff, %%mm3 \n\t"\ "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\ "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
"movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"pmulhw ubCoeff, %%mm2 \n\t"\ "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
"pmulhw vrCoeff, %%mm5 \n\t"\ "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw yCoeff, %%mm1 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
"pmulhw yCoeff, %%mm7 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\ "paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
...@@ -358,23 +358,23 @@ ...@@ -358,23 +358,23 @@
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
"psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\
"psubw w400, %%mm3 \n\t" /* (U-128)8*/\ "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\
"psubw w400, %%mm4 \n\t" /* (V-128)8*/\ "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\
"movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
"movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
"pmulhw ugCoeff, %%mm3 \n\t"\ "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\
"pmulhw vgCoeff, %%mm4 \n\t"\ "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
"movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
"movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
"psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
"pmulhw ubCoeff, %%mm2 \n\t"\ "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\
"pmulhw vrCoeff, %%mm5 \n\t"\ "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\
"psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\
"psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\
"pmulhw yCoeff, %%mm1 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\
"pmulhw yCoeff, %%mm7 \n\t"\ "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
"paddw %%mm3, %%mm4 \n\t"\ "paddw %%mm3, %%mm4 \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
...@@ -423,9 +423,9 @@ ...@@ -423,9 +423,9 @@
" jb 1b \n\t" " jb 1b \n\t"
#define WRITEBGR16 \ #define WRITEBGR16 \
"pand bF8, %%mm2 \n\t" /* B */\ "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
"pand bFC, %%mm4 \n\t" /* G */\ "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\
"pand bF8, %%mm5 \n\t" /* R */\ "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
"psrlq $3, %%mm2 \n\t"\ "psrlq $3, %%mm2 \n\t"\
\ \
"movq %%mm2, %%mm1 \n\t"\ "movq %%mm2, %%mm1 \n\t"\
...@@ -450,9 +450,9 @@ ...@@ -450,9 +450,9 @@
" jb 1b \n\t" " jb 1b \n\t"
#define WRITEBGR15 \ #define WRITEBGR15 \
"pand bF8, %%mm2 \n\t" /* B */\ "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\
"pand bF8, %%mm4 \n\t" /* G */\ "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\
"pand bF8, %%mm5 \n\t" /* R */\ "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\
"psrlq $3, %%mm2 \n\t"\ "psrlq $3, %%mm2 \n\t"\
"psrlq $1, %%mm5 \n\t"\ "psrlq $1, %%mm5 \n\t"\
\ \
...@@ -494,8 +494,8 @@ ...@@ -494,8 +494,8 @@
\ \
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
"psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
"pand bm00000111, %%mm4 \n\t" /* 00000RGB 0 */\ "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
"pand bm11111000, %%mm0 \n\t" /* 00RGB000 0.5 */\ "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
"por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
"movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
"psllq $48, %%mm2 \n\t" /* GB000000 1 */\ "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
...@@ -505,11 +505,11 @@ ...@@ -505,11 +505,11 @@
"psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
"psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
"por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
"pand bm00001111, %%mm2 \n\t" /* 0000RGBR 1 */\ "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
"movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
"psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
"pand bm00000111, %%mm4 \n\t" /* 00000RGB 2 */\ "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
"pand bm11111000, %%mm1 \n\t" /* 00RGB000 2.5 */\ "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
"por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
"movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
"psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
...@@ -518,8 +518,8 @@ ...@@ -518,8 +518,8 @@
"psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
"movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
"psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
"pand bm00000111, %%mm5 \n\t" /* 00000RGB 3 */\ "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
"pand bm11111000, %%mm3 \n\t" /* 00RGB000 3.5 */\ "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
"por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
"psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
"por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
...@@ -588,8 +588,8 @@ ...@@ -588,8 +588,8 @@
#define WRITEBGR24MMX2 \ #define WRITEBGR24MMX2 \
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
"movq M24A, %%mm0 \n\t"\ "movq "MANGLE(M24A)", %%mm0 \n\t"\
"movq M24C, %%mm7 \n\t"\ "movq "MANGLE(M24C)", %%mm7 \n\t"\
"pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\
"pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\
"pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\
...@@ -608,7 +608,7 @@ ...@@ -608,7 +608,7 @@
"pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\
"pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\
\ \
"pand M24B, %%mm1 \n\t" /* B5 B4 B3 */\ "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\
"pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\
"pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\
\ \
...@@ -622,7 +622,7 @@ ...@@ -622,7 +622,7 @@
\ \
"pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\
"pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\
"pand M24B, %%mm6 \n\t" /* R7 R6 R5 */\ "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\
\ \
"por %%mm1, %%mm3 \n\t"\ "por %%mm1, %%mm3 \n\t"\
"por %%mm3, %%mm6 \n\t"\ "por %%mm3, %%mm6 \n\t"\
...@@ -777,9 +777,9 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu ...@@ -777,9 +777,9 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu
YSCALEYUV2RGBX YSCALEYUV2RGBX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t" "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb g5Dither, %%mm4 \n\t" "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb r5Dither, %%mm5 \n\t" "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif #endif
WRITEBGR15 WRITEBGR15
...@@ -797,9 +797,9 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu ...@@ -797,9 +797,9 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu
YSCALEYUV2RGBX YSCALEYUV2RGBX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t" "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb g6Dither, %%mm4 \n\t" "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb r5Dither, %%mm5 \n\t" "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif #endif
WRITEBGR16 WRITEBGR16
...@@ -876,8 +876,8 @@ FULL_YSCALEYUV2RGB ...@@ -876,8 +876,8 @@ FULL_YSCALEYUV2RGB
"movq %%mm3, %%mm2 \n\t" // BGR0BGR0 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
"psrlq $8, %%mm3 \n\t" // GR0BGR00 "psrlq $8, %%mm3 \n\t" // GR0BGR00
"pand bm00000111, %%mm2 \n\t" // BGR00000 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
"pand bm11111000, %%mm3 \n\t" // 000BGR00 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
"por %%mm2, %%mm3 \n\t" // BGRBGR00 "por %%mm2, %%mm3 \n\t" // BGRBGR00
"movq %%mm1, %%mm2 \n\t" "movq %%mm1, %%mm2 \n\t"
"psllq $48, %%mm1 \n\t" // 000000BG "psllq $48, %%mm1 \n\t" // 000000BG
...@@ -916,9 +916,9 @@ FULL_YSCALEYUV2RGB ...@@ -916,9 +916,9 @@ FULL_YSCALEYUV2RGB
FULL_YSCALEYUV2RGB FULL_YSCALEYUV2RGB
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb g5Dither, %%mm1 \n\t" "paddusb "MANGLE(g5Dither)", %%mm1\n\t"
"paddusb r5Dither, %%mm0 \n\t" "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
"paddusb b5Dither, %%mm3 \n\t" "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
#endif #endif
"punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
"punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
...@@ -927,8 +927,8 @@ FULL_YSCALEYUV2RGB ...@@ -927,8 +927,8 @@ FULL_YSCALEYUV2RGB
"psrlw $3, %%mm3 \n\t" "psrlw $3, %%mm3 \n\t"
"psllw $2, %%mm1 \n\t" "psllw $2, %%mm1 \n\t"
"psllw $7, %%mm0 \n\t" "psllw $7, %%mm0 \n\t"
"pand g15Mask, %%mm1 \n\t" "pand "MANGLE(g15Mask)", %%mm1 \n\t"
"pand r15Mask, %%mm0 \n\t" "pand "MANGLE(r15Mask)", %%mm0 \n\t"
"por %%mm3, %%mm1 \n\t" "por %%mm3, %%mm1 \n\t"
"por %%mm1, %%mm0 \n\t" "por %%mm1, %%mm0 \n\t"
...@@ -950,9 +950,9 @@ FULL_YSCALEYUV2RGB ...@@ -950,9 +950,9 @@ FULL_YSCALEYUV2RGB
FULL_YSCALEYUV2RGB FULL_YSCALEYUV2RGB
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb g6Dither, %%mm1 \n\t" "paddusb "MANGLE(g6Dither)", %%mm1\n\t"
"paddusb r5Dither, %%mm0 \n\t" "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
"paddusb b5Dither, %%mm3 \n\t" "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
#endif #endif
"punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
"punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
...@@ -961,8 +961,8 @@ FULL_YSCALEYUV2RGB ...@@ -961,8 +961,8 @@ FULL_YSCALEYUV2RGB
"psrlw $3, %%mm3 \n\t" "psrlw $3, %%mm3 \n\t"
"psllw $3, %%mm1 \n\t" "psllw $3, %%mm1 \n\t"
"psllw $8, %%mm0 \n\t" "psllw $8, %%mm0 \n\t"
"pand g16Mask, %%mm1 \n\t" "pand "MANGLE(g16Mask)", %%mm1 \n\t"
"pand r16Mask, %%mm0 \n\t" "pand "MANGLE(r16Mask)", %%mm0 \n\t"
"por %%mm3, %%mm1 \n\t" "por %%mm3, %%mm1 \n\t"
"por %%mm1, %%mm0 \n\t" "por %%mm1, %%mm0 \n\t"
...@@ -1057,9 +1057,9 @@ FULL_YSCALEYUV2RGB ...@@ -1057,9 +1057,9 @@ FULL_YSCALEYUV2RGB
YSCALEYUV2RGB YSCALEYUV2RGB
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t" "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb g5Dither, %%mm4 \n\t" "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb r5Dither, %%mm5 \n\t" "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif #endif
WRITEBGR15 WRITEBGR15
...@@ -1075,9 +1075,9 @@ FULL_YSCALEYUV2RGB ...@@ -1075,9 +1075,9 @@ FULL_YSCALEYUV2RGB
YSCALEYUV2RGB YSCALEYUV2RGB
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t" "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb g6Dither, %%mm4 \n\t" "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb r5Dither, %%mm5 \n\t" "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif #endif
WRITEBGR16 WRITEBGR16
...@@ -1234,9 +1234,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * ...@@ -1234,9 +1234,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
YSCALEYUV2RGB1 YSCALEYUV2RGB1
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t" "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb g5Dither, %%mm4 \n\t" "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb r5Dither, %%mm5 \n\t" "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif #endif
WRITEBGR15 WRITEBGR15
:: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
...@@ -1250,9 +1250,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * ...@@ -1250,9 +1250,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
YSCALEYUV2RGB1 YSCALEYUV2RGB1
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t" "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb g6Dither, %%mm4 \n\t" "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb r5Dither, %%mm5 \n\t" "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif #endif
WRITEBGR16 WRITEBGR16
...@@ -1291,9 +1291,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * ...@@ -1291,9 +1291,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
YSCALEYUV2RGB1b YSCALEYUV2RGB1b
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t" "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb g5Dither, %%mm4 \n\t" "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
"paddusb r5Dither, %%mm5 \n\t" "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif #endif
WRITEBGR15 WRITEBGR15
:: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
...@@ -1307,9 +1307,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * ...@@ -1307,9 +1307,9 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *
YSCALEYUV2RGB1b YSCALEYUV2RGB1b
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
"paddusb b5Dither, %%mm2 \n\t" "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
"paddusb g6Dither, %%mm4 \n\t" "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
"paddusb r5Dither, %%mm5 \n\t" "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
#endif #endif
WRITEBGR16 WRITEBGR16
...@@ -1435,7 +1435,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW ...@@ -1435,7 +1435,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
dst-= counter/2; dst-= counter/2;
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"movq w02, %%mm6 \n\t" "movq "MANGLE(w02)", %%mm6 \n\t"
"pushl %%ebp \n\t" // we use 7 regs here ... "pushl %%ebp \n\t" // we use 7 regs here ...
"movl %%eax, %%ebp \n\t" "movl %%eax, %%ebp \n\t"
".balign 16 \n\t" ".balign 16 \n\t"
...@@ -1473,7 +1473,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW ...@@ -1473,7 +1473,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
dst-= counter/2; dst-= counter/2;
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"movq w02, %%mm6 \n\t" "movq "MANGLE(w02)", %%mm6 \n\t"
"pushl %%ebp \n\t" // we use 7 regs here ... "pushl %%ebp \n\t" // we use 7 regs here ...
"movl %%eax, %%ebp \n\t" "movl %%eax, %%ebp \n\t"
".balign 16 \n\t" ".balign 16 \n\t"
...@@ -1523,7 +1523,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW ...@@ -1523,7 +1523,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
dst-= counter/2; dst-= counter/2;
asm volatile( asm volatile(
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"movq w02, %%mm6 \n\t" "movq "MANGLE(w02)", %%mm6 \n\t"
".balign 16 \n\t" ".balign 16 \n\t"
"1: \n\t" "1: \n\t"
"movl %2, %%ecx \n\t" "movl %2, %%ecx \n\t"
...@@ -1614,7 +1614,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in ...@@ -1614,7 +1614,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in
"psllq $16, %%mm2 \n\t" "psllq $16, %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t" "paddw %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFF "psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFF
"movq %%mm2, temp0 \n\t" "movq %%mm2, "MANGLE(temp0)" \n\t"
"movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF "movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF
"punpcklwd %%mm6, %%mm6 \n\t" "punpcklwd %%mm6, %%mm6 \n\t"
"punpcklwd %%mm6, %%mm6 \n\t" "punpcklwd %%mm6, %%mm6 \n\t"
...@@ -1630,8 +1630,8 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in ...@@ -1630,8 +1630,8 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in
PREFETCH" 1024(%%esi) \n\t"\ PREFETCH" 1024(%%esi) \n\t"\
PREFETCH" 1056(%%esi) \n\t"\ PREFETCH" 1056(%%esi) \n\t"\
PREFETCH" 1088(%%esi) \n\t"\ PREFETCH" 1088(%%esi) \n\t"\
"call funnyYCode \n\t"\ "call "MANGLE(funnyYCode)" \n\t"\
"movq temp0, %%mm2 \n\t"\ "movq "MANGLE(temp0)", %%mm2 \n\t"\
"xorl %%ecx, %%ecx \n\t" "xorl %%ecx, %%ecx \n\t"
FUNNY_Y_CODE FUNNY_Y_CODE
...@@ -1741,7 +1741,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, ...@@ -1741,7 +1741,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth,
"psllq $16, %%mm2 \n\t" "psllq $16, %%mm2 \n\t"
"paddw %%mm6, %%mm2 \n\t" "paddw %%mm6, %%mm2 \n\t"
"psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFFFF "psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFFFF
"movq %%mm2, temp0 \n\t" "movq %%mm2, "MANGLE(temp0)" \n\t"
"movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF "movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF
"punpcklwd %%mm6, %%mm6 \n\t" "punpcklwd %%mm6, %%mm6 \n\t"
"punpcklwd %%mm6, %%mm6 \n\t" "punpcklwd %%mm6, %%mm6 \n\t"
...@@ -1757,8 +1757,8 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, ...@@ -1757,8 +1757,8 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth,
PREFETCH" 1024(%%esi) \n\t"\ PREFETCH" 1024(%%esi) \n\t"\
PREFETCH" 1056(%%esi) \n\t"\ PREFETCH" 1056(%%esi) \n\t"\
PREFETCH" 1088(%%esi) \n\t"\ PREFETCH" 1088(%%esi) \n\t"\
"call funnyUVCode \n\t"\ "call "MANGLE(funnyUVCode)" \n\t"\
"movq temp0, %%mm2 \n\t"\ "movq "MANGLE(temp0)", %%mm2 \n\t"\
"xorl %%ecx, %%ecx \n\t" "xorl %%ecx, %%ecx \n\t"
FUNNYUVCODE FUNNYUVCODE
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment