Commit dffc61c6 authored by mru's avatar mru

ARM: NEON avg_pixels8 and avg_h264_qpel8_mc00

git-svn-id: file:///var/local/repositories/ffmpeg/trunk@20190 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent d6439e09
...@@ -49,6 +49,7 @@ void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); ...@@ -49,6 +49,7 @@ void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int);
void ff_avg_pixels8_neon(uint8_t *, const uint8_t *, int, int);
void ff_add_pixels_clamped_neon(const DCTELEM *, uint8_t *, int); void ff_add_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const DCTELEM *, uint8_t *, int); void ff_put_pixels_clamped_neon(const DCTELEM *, uint8_t *, int);
...@@ -90,6 +91,8 @@ void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int); ...@@ -90,6 +91,8 @@ void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int); void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
void ff_avg_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
...@@ -230,6 +233,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) ...@@ -230,6 +233,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon; c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon; c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
c->add_pixels_clamped = ff_add_pixels_clamped_neon; c->add_pixels_clamped = ff_add_pixels_clamped_neon;
c->put_pixels_clamped = ff_put_pixels_clamped_neon; c->put_pixels_clamped = ff_put_pixels_clamped_neon;
...@@ -278,6 +282,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) ...@@ -278,6 +282,8 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon; c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
c->avg_h264_qpel_pixels_tab[1][ 0] = ff_avg_h264_qpel8_mc00_neon;
c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
......
...@@ -139,7 +139,7 @@ ...@@ -139,7 +139,7 @@
bx lr bx lr
.endm .endm
.macro pixels8 .macro pixels8 avg=0
1: vld1.64 {d0}, [r1], r2 1: vld1.64 {d0}, [r1], r2
vld1.64 {d1}, [r1], r2 vld1.64 {d1}, [r1], r2
vld1.64 {d2}, [r1], r2 vld1.64 {d2}, [r1], r2
...@@ -148,6 +148,17 @@ ...@@ -148,6 +148,17 @@
pld [r1] pld [r1]
pld [r1, r2] pld [r1, r2]
pld [r1, r2, lsl #1] pld [r1, r2, lsl #1]
.if \avg
vld1.64 {d4}, [r0,:64], r2
vrhadd.u8 d0, d0, d4
vld1.64 {d5}, [r0,:64], r2
vrhadd.u8 d1, d1, d5
vld1.64 {d6}, [r0,:64], r2
vrhadd.u8 d2, d2, d6
vld1.64 {d7}, [r0,:64], r2
vrhadd.u8 d3, d3, d7
sub r0, r0, r2, lsl #2
.endif
subs r3, r3, #4 subs r3, r3, #4
vst1.64 {d0}, [r0,:64], r2 vst1.64 {d0}, [r0,:64], r2
vst1.64 {d1}, [r0,:64], r2 vst1.64 {d1}, [r0,:64], r2
...@@ -261,6 +272,12 @@ function ff_put_h264_qpel8_mc00_neon, export=1 ...@@ -261,6 +272,12 @@ function ff_put_h264_qpel8_mc00_neon, export=1
pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8 pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8
pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1 pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1
function ff_avg_h264_qpel8_mc00_neon, export=1
mov r3, #8
.endfunc
pixfunc avg_ pixels8,, 1
function ff_put_pixels_clamped_neon, export=1 function ff_put_pixels_clamped_neon, export=1
vld1.64 {d16-d19}, [r0,:128]! vld1.64 {d16-d19}, [r0,:128]!
vqmovun.s16 d0, q8 vqmovun.s16 d0, q8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment