Commit 99068c8f authored by mru's avatar mru

Alpha: fix pix_abs16

git-svn-id: file:///var/local/repositories/ffmpeg/trunk@16675 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent c6cb5c8c
...@@ -42,7 +42,7 @@ void get_pixels_mvi(DCTELEM *restrict block, ...@@ -42,7 +42,7 @@ void get_pixels_mvi(DCTELEM *restrict block,
void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
int stride); int stride);
int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
...@@ -287,11 +287,6 @@ void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, ...@@ -287,11 +287,6 @@ void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
} }
static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
{
return pix_abs16x16_mvi_asm(a, b, stride);
}
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
{ {
c->put_pixels_tab[0][0] = put_pixels16_axp_asm; c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
...@@ -343,10 +338,9 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) ...@@ -343,10 +338,9 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
c->get_pixels = get_pixels_mvi; c->get_pixels = get_pixels_mvi;
c->diff_pixels = diff_pixels_mvi; c->diff_pixels = diff_pixels_mvi;
c->sad[0] = sad16x16_mvi; c->sad[0] = pix_abs16x16_mvi_asm;
c->sad[1] = pix_abs8x8_mvi; c->sad[1] = pix_abs8x8_mvi;
// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed c->pix_abs[0][0] = pix_abs16x16_mvi_asm;
c->pix_abs[0][0] = sad16x16_mvi;
c->pix_abs[1][0] = pix_abs8x8_mvi; c->pix_abs[1][0] = pix_abs8x8_mvi;
c->pix_abs[0][1] = pix_abs16x16_x2_mvi; c->pix_abs[0][1] = pix_abs16x16_x2_mvi;
c->pix_abs[0][2] = pix_abs16x16_y2_mvi; c->pix_abs[0][2] = pix_abs16x16_y2_mvi;
......
...@@ -60,9 +60,8 @@ pix_abs16x16_mvi_asm: ...@@ -60,9 +60,8 @@ pix_abs16x16_mvi_asm:
jsr AT, (AT), _mcount jsr AT, (AT), _mcount
#endif #endif
and a1, 7, t0 and a2, 7, t0
clr v0 clr v0
lda a3, 16
beq t0, $aligned beq t0, $aligned
.align 4 .align 4
$unaligned: $unaligned:
...@@ -86,80 +85,80 @@ $unaligned: ...@@ -86,80 +85,80 @@ $unaligned:
td: error right */ td: error right */
/* load line 0 */ /* load line 0 */
ldq_u t0, 0(a1) # left_u ldq_u t0, 0(a2) # left_u
ldq_u t1, 8(a1) # mid ldq_u t1, 8(a2) # mid
ldq_u t2, 16(a1) # right_u ldq_u t2, 16(a2) # right_u
ldq t3, 0(a0) # ref left ldq t3, 0(a1) # ref left
ldq t4, 8(a0) # ref right ldq t4, 8(a1) # ref right
addq a0, a2, a0 # pix1 addq a1, a3, a1 # pix1
addq a1, a2, a1 # pix2 addq a2, a3, a2 # pix2
/* load line 1 */ /* load line 1 */
ldq_u t5, 0(a1) # left_u ldq_u t5, 0(a2) # left_u
ldq_u t6, 8(a1) # mid ldq_u t6, 8(a2) # mid
ldq_u t7, 16(a1) # right_u ldq_u t7, 16(a2) # right_u
ldq t8, 0(a0) # ref left ldq t8, 0(a1) # ref left
ldq t9, 8(a0) # ref right ldq t9, 8(a1) # ref right
addq a0, a2, a0 # pix1 addq a1, a3, a1 # pix1
addq a1, a2, a1 # pix2 addq a2, a3, a2 # pix2
/* calc line 0 */ /* calc line 0 */
extql t0, a1, t0 # left lo extql t0, a2, t0 # left lo
extqh t1, a1, ta # left hi extqh t1, a2, ta # left hi
extql t1, a1, tb # right lo extql t1, a2, tb # right lo
or t0, ta, t0 # left or t0, ta, t0 # left
extqh t2, a1, t2 # right hi extqh t2, a2, t2 # right hi
perr t3, t0, tc # error left perr t3, t0, tc # error left
or t2, tb, t2 # right or t2, tb, t2 # right
perr t4, t2, td # error right perr t4, t2, td # error right
addq v0, tc, v0 # add error left addq v0, tc, v0 # add error left
addq v0, td, v0 # add error left addq v0, td, v0 # add error left
/* calc line 1 */ /* calc line 1 */
extql t5, a1, t5 # left lo extql t5, a2, t5 # left lo
extqh t6, a1, ta # left hi extqh t6, a2, ta # left hi
extql t6, a1, tb # right lo extql t6, a2, tb # right lo
or t5, ta, t5 # left or t5, ta, t5 # left
extqh t7, a1, t7 # right hi extqh t7, a2, t7 # right hi
perr t8, t5, tc # error left perr t8, t5, tc # error left
or t7, tb, t7 # right or t7, tb, t7 # right
perr t9, t7, td # error right perr t9, t7, td # error right
addq v0, tc, v0 # add error left addq v0, tc, v0 # add error left
addq v0, td, v0 # add error left addq v0, td, v0 # add error left
/* loop */ /* loop */
subq a3, 2, a3 # h -= 2 subq a4, 2, a4 # h -= 2
bne a3, $unaligned bne a4, $unaligned
ret ret
.align 4 .align 4
$aligned: $aligned:
/* load line 0 */ /* load line 0 */
ldq t0, 0(a1) # left ldq t0, 0(a2) # left
ldq t1, 8(a1) # right ldq t1, 8(a2) # right
addq a1, a2, a1 # pix2 addq a2, a3, a2 # pix2
ldq t2, 0(a0) # ref left ldq t2, 0(a1) # ref left
ldq t3, 8(a0) # ref right ldq t3, 8(a1) # ref right
addq a0, a2, a0 # pix1 addq a1, a3, a1 # pix1
/* load line 1 */ /* load line 1 */
ldq t4, 0(a1) # left ldq t4, 0(a2) # left
ldq t5, 8(a1) # right ldq t5, 8(a2) # right
addq a1, a2, a1 # pix2 addq a2, a3, a2 # pix2
ldq t6, 0(a0) # ref left ldq t6, 0(a1) # ref left
ldq t7, 8(a0) # ref right ldq t7, 8(a1) # ref right
addq a0, a2, a0 # pix1 addq a1, a3, a1 # pix1
/* load line 2 */ /* load line 2 */
ldq t8, 0(a1) # left ldq t8, 0(a2) # left
ldq t9, 8(a1) # right ldq t9, 8(a2) # right
addq a1, a2, a1 # pix2 addq a2, a3, a2 # pix2
ldq ta, 0(a0) # ref left ldq ta, 0(a1) # ref left
ldq tb, 8(a0) # ref right ldq tb, 8(a1) # ref right
addq a0, a2, a0 # pix1 addq a1, a3, a1 # pix1
/* load line 3 */ /* load line 3 */
ldq tc, 0(a1) # left ldq tc, 0(a2) # left
ldq td, 8(a1) # right ldq td, 8(a2) # right
addq a1, a2, a1 # pix2 addq a2, a3, a2 # pix2
ldq te, 0(a0) # ref left ldq te, 0(a1) # ref left
ldq tf, 8(a0) # ref right ldq a0, 8(a1) # ref right
/* calc line 0 */ /* calc line 0 */
perr t0, t2, t0 # error left perr t0, t2, t0 # error left
addq a0, a2, a0 # pix1 addq a1, a3, a1 # pix1
perr t1, t3, t1 # error right perr t1, t3, t1 # error right
addq v0, t0, v0 # add error left addq v0, t0, v0 # add error left
/* calc line 1 */ /* calc line 1 */
...@@ -175,11 +174,11 @@ $aligned: ...@@ -175,11 +174,11 @@ $aligned:
/* calc line 3 */ /* calc line 3 */
perr tc, te, t0 # error left perr tc, te, t0 # error left
addq v0, t1, v0 # add error right addq v0, t1, v0 # add error right
perr td, tf, t1 # error right perr td, a0, t1 # error right
addq v0, t0, v0 # add error left addq v0, t0, v0 # add error left
addq v0, t1, v0 # add error right addq v0, t1, v0 # add error right
/* loop */ /* loop */
subq a3, 4, a3 # h -= 4 subq a4, 4, a4 # h -= 4
bne a3, $aligned bne a4, $aligned
ret ret
.end pix_abs16x16_mvi_asm .end pix_abs16x16_mvi_asm
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment