Commit 4f66e6e9 authored by michaelni's avatar michaelni

faster 8x8 & 16x16 plane prediction by skal (massimin at planet-d dot net)


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@1769 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 63db1b4c
...@@ -1681,41 +1681,34 @@ static void pred16x16_128_dc_c(uint8_t *src, int stride){ ...@@ -1681,41 +1681,34 @@ static void pred16x16_128_dc_c(uint8_t *src, int stride){
} }
static void pred16x16_plane_c(uint8_t *src, int stride){ static void pred16x16_plane_c(uint8_t *src, int stride){
int i, j, k;
int a;
uint8_t *cm = cropTbl + MAX_NEG_CROP; uint8_t *cm = cropTbl + MAX_NEG_CROP;
int i, dx, dy, dc; const uint8_t * const src0 = src+7-stride;
int temp[16]; const uint8_t *src1 = src+8*stride-1;
const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
dc= 16*(src[15-stride] + src[-1+15*stride]); int H = src0[1] - src0[-1];
int V = src1[0] - src2[ 0];
dx=dy=0; for(k=2; k<=8; ++k) {
for(i=1; i<9; i++){ src1 += stride; src2 -= stride;
dx += i*(src[7+i-stride] - src[7-i-stride]); H += k*(src0[k] - src0[-k]);
dy += i*(src[-1+(7+i)*stride] - src[-1+(7-i)*stride]); V += k*(src1[0] - src2[ 0]);
} }
dx= (5*dx+32)>>6; H = ( 5*H+32 ) >> 6;
dy= (5*dy+32)>>6; V = ( 5*V+32 ) >> 6;
dc += 16; a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
for(j=16; j>0; --j) {
//FIXME modifiy dc,dx,dy to avoid -7 int b = a;
a += V;
for(i=0; i<16; i++) for(i=-16; i<0; i+=4) {
temp[i]= dx*(i-7) + dc; src[16+i] = cm[ (b ) >> 5 ];
src[17+i] = cm[ (b+ H) >> 5 ];
if( (dc - ABS(dx)*8 - ABS(dy)*8)>>5 < 0 src[18+i] = cm[ (b+2*H) >> 5 ];
|| (dc + ABS(dx)*8 + ABS(dy)*8)>>5 > 255){ src[19+i] = cm[ (b+3*H) >> 5 ];
b += 4*H;
for(i=0; i<16; i++){ }
int j; src += stride;
for(j=0; j<16; j++)
src[j + i*stride]= cm[ (temp[j] + dy*(i-7))>>5 ];
}
}else{
for(i=0; i<16; i++){
int j;
for(j=0; j<16; j++)
src[j + i*stride]= (temp[j] + dy*(i-7))>>5;
}
} }
} }
...@@ -1823,41 +1816,35 @@ static void pred8x8_dc_c(uint8_t *src, int stride){ ...@@ -1823,41 +1816,35 @@ static void pred8x8_dc_c(uint8_t *src, int stride){
} }
static void pred8x8_plane_c(uint8_t *src, int stride){ static void pred8x8_plane_c(uint8_t *src, int stride){
int j, k;
int a;
uint8_t *cm = cropTbl + MAX_NEG_CROP; uint8_t *cm = cropTbl + MAX_NEG_CROP;
int i, dx, dy, dc; const uint8_t * const src0 = src+3-stride;
int temp[8]; const uint8_t *src1 = src+4*stride-1;
const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
dc= 16*(src[7-stride] + src[-1+7*stride]); int H = src0[1] - src0[-1];
int V = src1[0] - src2[ 0];
dx=dy=0; for(k=2; k<=4; ++k) {
for(i=1; i<5; i++){ src1 += stride; src2 -= stride;
dx += i*(src[3+i-stride] - src[3-i-stride]); H += k*(src0[k] - src0[-k]);
dy += i*(src[-1+(3+i)*stride] - src[-1+(3-i)*stride]); V += k*(src1[0] - src2[ 0]);
} }
dx= (17*dx+16)>>5; H = ( 17*H+16 ) >> 5;
dy= (17*dy+16)>>5; V = ( 17*V+16 ) >> 5;
dc += 16; a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
for(j=8; j>0; --j) {
//FIXME modifiy dc,dx,dy to avoid -3 int b = a;
a += V;
for(i=0; i<8; i++) src[0] = cm[ (b ) >> 5 ];
temp[i]= dx*(i-3) + dc; src[1] = cm[ (b+ H) >> 5 ];
src[2] = cm[ (b+2*H) >> 5 ];
if( (dc - ABS(dx)*4 - ABS(dy)*4)>>5 < 0 src[3] = cm[ (b+3*H) >> 5 ];
|| (dc + ABS(dx)*4 + ABS(dy)*4)>>5 > 255){ src[4] = cm[ (b+4*H) >> 5 ];
src[5] = cm[ (b+5*H) >> 5 ];
for(i=0; i<8; i++){ src[6] = cm[ (b+6*H) >> 5 ];
int j; src[7] = cm[ (b+7*H) >> 5 ];
for(j=0; j<8; j++) src += stride;
src[j + i*stride]= cm[ (temp[j] + dy*(i-3))>>5 ];
}
}else{
for(i=0; i<8; i++){
int j;
for(j=0; j<8; j++)
src[j + i*stride]= (temp[j] + dy*(i-3))>>5;
}
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment