Commit f9f93b79 authored by lorenm's avatar lorenm

faster h264_chroma_mc8_mmx, added h264_chroma_mc4_mmx.

2-4% overall speedup.


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@4666 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 18bd8ec7
This diff is collapsed.
...@@ -43,6 +43,7 @@ static const uint64_t ff_pw_20 attribute_used __attribute__ ((aligned(8))) = 0x0 ...@@ -43,6 +43,7 @@ static const uint64_t ff_pw_20 attribute_used __attribute__ ((aligned(8))) = 0x0
static const uint64_t ff_pw_3 attribute_used __attribute__ ((aligned(8))) = 0x0003000300030003ULL; static const uint64_t ff_pw_3 attribute_used __attribute__ ((aligned(8))) = 0x0003000300030003ULL;
static const uint64_t ff_pw_4 attribute_used __attribute__ ((aligned(8))) = 0x0004000400040004ULL; static const uint64_t ff_pw_4 attribute_used __attribute__ ((aligned(8))) = 0x0004000400040004ULL;
static const uint64_t ff_pw_5 attribute_used __attribute__ ((aligned(8))) = 0x0005000500050005ULL; static const uint64_t ff_pw_5 attribute_used __attribute__ ((aligned(8))) = 0x0005000500050005ULL;
static const uint64_t ff_pw_8 attribute_used __attribute__ ((aligned(8))) = 0x0008000800080008ULL;
static const uint64_t ff_pw_16 attribute_used __attribute__ ((aligned(8))) = 0x0010001000100010ULL; static const uint64_t ff_pw_16 attribute_used __attribute__ ((aligned(8))) = 0x0010001000100010ULL;
static const uint64_t ff_pw_32 attribute_used __attribute__ ((aligned(8))) = 0x0020002000200020ULL; static const uint64_t ff_pw_32 attribute_used __attribute__ ((aligned(8))) = 0x0020002000200020ULL;
static const uint64_t ff_pw_64 attribute_used __attribute__ ((aligned(8))) = 0x0040004000400040ULL; static const uint64_t ff_pw_64 attribute_used __attribute__ ((aligned(8))) = 0x0040004000400040ULL;
...@@ -2726,6 +2727,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2726,6 +2727,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->h263_v_loop_filter= h263_v_loop_filter_mmx; c->h263_v_loop_filter= h263_v_loop_filter_mmx;
c->h263_h_loop_filter= h263_h_loop_filter_mmx; c->h263_h_loop_filter= h263_h_loop_filter_mmx;
c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx; c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx;
c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
if (mm_flags & MM_MMXEXT) { if (mm_flags & MM_MMXEXT) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
...@@ -2825,6 +2827,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2825,6 +2827,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#undef dspfunc #undef dspfunc
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2; c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2;
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
...@@ -2936,6 +2939,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2936,6 +2939,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
dspfunc(avg_h264_qpel, 2, 4); dspfunc(avg_h264_qpel, 2, 4);
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
} }
} }
......
...@@ -892,22 +892,42 @@ H264_MC(avg_, 16,mmx2) ...@@ -892,22 +892,42 @@ H264_MC(avg_, 16,mmx2)
#define H264_CHROMA_OP(S,D) #define H264_CHROMA_OP(S,D)
#define H264_CHROMA_OP4(S,D,T)
#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx #define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx
#define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_mmx
#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
#include "dsputil_h264_template_mmx.c" #include "dsputil_h264_template_mmx.c"
#undef H264_CHROMA_OP #undef H264_CHROMA_OP
#undef H264_CHROMA_OP4
#undef H264_CHROMA_MC8_TMPL #undef H264_CHROMA_MC8_TMPL
#undef H264_CHROMA_MC4_TMPL
#undef H264_CHROMA_MC8_MV0
#define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t" #define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
"pavgb " #T ", " #D " \n\t"
#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2
#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_mmx2
#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
#include "dsputil_h264_template_mmx.c" #include "dsputil_h264_template_mmx.c"
#undef H264_CHROMA_OP #undef H264_CHROMA_OP
#undef H264_CHROMA_OP4
#undef H264_CHROMA_MC8_TMPL #undef H264_CHROMA_MC8_TMPL
#undef H264_CHROMA_MC4_TMPL
#undef H264_CHROMA_MC8_MV0
#define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t" #define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
"pavgusb " #T ", " #D " \n\t"
#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow
#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_3dnow
#define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow
#include "dsputil_h264_template_mmx.c" #include "dsputil_h264_template_mmx.c"
#undef H264_CHROMA_OP #undef H264_CHROMA_OP
#undef H264_CHROMA_OP4
#undef H264_CHROMA_MC8_TMPL #undef H264_CHROMA_MC8_TMPL
#undef H264_CHROMA_MC4_TMPL
#undef H264_CHROMA_MC8_MV0
/***********************************/ /***********************************/
/* weighted prediction */ /* weighted prediction */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment