Commit 4ddf8c5a authored by darkshikari's avatar darkshikari

Port x264 deblocking code to libavcodec. This includes SSE2 luma deblocking...

Port x264 deblocking code to libavcodec.  This includes SSE2 luma deblocking code and both MMXEXT and SSE2 luma intra deblocking code for H.264 decoding.  This assembly is available under --enable-gpl and speeds decoding of Cathedral by 7%.

git-svn-id: file:///var/local/repositories/ffmpeg/trunk@16239 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 7c78a00c
...@@ -404,7 +404,8 @@ MMX-OBJS-$(CONFIG_VP6_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o ...@@ -404,7 +404,8 @@ MMX-OBJS-$(CONFIG_VP6_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
MMX-OBJS-$(CONFIG_VP6A_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o MMX-OBJS-$(CONFIG_VP6A_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
MMX-OBJS-$(CONFIG_VP6F_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o MMX-OBJS-$(CONFIG_VP6F_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
MMX-OBJS-$(CONFIG_WMV3_DECODER) += i386/vc1dsp_mmx.o MMX-OBJS-$(CONFIG_WMV3_DECODER) += i386/vc1dsp_mmx.o
MMX-OBJS-$(HAVE_YASM) += i386/dsputil_yasm.o MMX-OBJS-$(HAVE_YASM) += i386/dsputil_yasm.o \
i386/h264_deblock_sse2.o
OBJS-$(HAVE_MMX) += i386/cpuid.o \ OBJS-$(HAVE_MMX) += i386/cpuid.o \
i386/dnxhd_mmx.o \ i386/dnxhd_mmx.o \
......
...@@ -2327,6 +2327,17 @@ static void float_to_int16_sse2(int16_t *dst, const float *src, long len){ ...@@ -2327,6 +2327,17 @@ static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta)
{
ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
}
void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
#else #else
#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6) #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6) #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
...@@ -2853,6 +2864,21 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2853,6 +2864,21 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
} }
#endif #endif
#if defined(CONFIG_GPL) && defined(HAVE_YASM)
if( mm_flags&FF_MM_MMXEXT ){
#ifndef ARCH_X86_64
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
#endif
if( mm_flags&FF_MM_SSE2 ){
c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
}
}
#endif
#ifdef CONFIG_SNOW_DECODER #ifdef CONFIG_SNOW_DECODER
if(mm_flags & FF_MM_SSE2 & 0){ if(mm_flags & FF_MM_SSE2 & 0){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment