Commit cd29b9ca authored by michael's avatar michael

replace a few mov + psrlq with pshufw, there are more cases which could...

replace a few mov + psrlq with pshufw, there are more cases which could benefit from this but they would require us to duplicate some functions ...
the trick is from various places (my own code in libpostproc, a patch on the x264 list, ...)


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@4608 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent b5b80d37
......@@ -1621,11 +1621,9 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
"movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t"
"pshufw $0x0E, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"psrlq $16, %%mm0 \n\t"
"pshufw $0x01, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
......
......@@ -22,7 +22,11 @@
#ifdef HAVE_MMX2
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
#define PMAX(a,b) \
"pshufw $0x0E," #a ", " #b " \n\t"\
PMAXW(b, a)\
"pshufw $0x01," #a ", " #b " \n\t"\
PMAXW(b, a)
#else
#define SPREADW(a) \
"punpcklwd " #a ", " #a " \n\t"\
......@@ -30,6 +34,14 @@
#define PMAXW(a,b) \
"psubusw " #a ", " #b " \n\t"\
"paddw " #a ", " #b " \n\t"
#define PMAX(a,b) \
"movq " #a ", " #b " \n\t"\
"psrlq $32, " #a " \n\t"\
PMAXW(b, a)\
"movq " #a ", " #b " \n\t"\
"psrlq $16, " #a " \n\t"\
PMAXW(b, a)
#endif
static int RENAME(dct_quantize)(MpegEncContext *s,
......@@ -119,12 +131,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
"movq %%mm3, %%mm0 \n\t"
"psrlq $32, %%mm3 \n\t"
PMAXW(%%mm0, %%mm3)
"movq %%mm3, %%mm0 \n\t"
"psrlq $16, %%mm3 \n\t"
PMAXW(%%mm0, %%mm3)
PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
......@@ -170,12 +177,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
"movq %%mm3, %%mm0 \n\t"
"psrlq $32, %%mm3 \n\t"
PMAXW(%%mm0, %%mm3)
"movq %%mm3, %%mm0 \n\t"
"psrlq $16, %%mm3 \n\t"
PMAXW(%%mm0, %%mm3)
PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment