Commit 08dcf80e authored by lu_zero's avatar lu_zero

Partially address issue299, no performance change apparently

git-svn-id: file:///var/local/repositories/ffmpeg/trunk@11303 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 9dc31657
...@@ -51,6 +51,27 @@ ...@@ -51,6 +51,27 @@
dst += stride;\ dst += stride;\
src += stride; src += stride;
#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\
\
psum = vec_mladd(vA, vsrc0ssH, v32ss);\
psum = vec_mladd(vB, vsrc1ssH, psum);\
psum = vec_mladd(vC, vsrc2ssH, psum);\
psum = vec_sr(psum, v6us);\
\
vdst = vec_ld(0, dst);\
ppsum = (vec_u8_t)vec_pack(psum, psum);\
vfdst = vec_perm(vdst, ppsum, fperm);\
\
OP_U8_ALTIVEC(fsum, vfdst, vdst);\
\
vec_st(fsum, 0, dst);\
\
vsrc0ssH = vsrc1ssH;\
vsrc1ssH = vsrc2ssH;\
\
dst += stride;\
src += stride;
void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
...@@ -109,6 +130,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in ...@@ -109,6 +130,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc); vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);
vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc); vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);
if (ABCD[3]) {
if (!loadSecond) {// -> !reallyBadAlign if (!loadSecond) {// -> !reallyBadAlign
for (i = 0 ; i < h ; i++) { for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src); vsrcCuc = vec_ld(stride + 0, src);
...@@ -130,6 +152,26 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in ...@@ -130,6 +152,26 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
CHROMA_MC8_ALTIVEC_CORE CHROMA_MC8_ALTIVEC_CORE
} }
}
} else {
if (!loadSecond) {// -> !reallyBadAlign
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
CHROMA_MC8_ALTIVEC_CORE_SIMPLE
}
} else {
vec_u8_t vsrcDuc;
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
vsrcDuc = vec_ld(stride + 16, src);
vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
CHROMA_MC8_ALTIVEC_CORE_SIMPLE
}
}
} }
POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment