Commit 90f66668 authored by lorenm's avatar lorenm

faster iterative_me: avoid duplicate mc of neighboring blocks.


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@4839 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 50958332
...@@ -2471,7 +2471,18 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, ...@@ -2471,7 +2471,18 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp,
int x, y; int x, y;
const int color = block->color[plane_index]; const int color = block->color[plane_index];
const int color4= color*0x01010101; const int color4= color*0x01010101;
if(b_w==16){ if(b_w==32){
for(y=0; y < b_h; y++){
*(uint32_t*)&dst[0 + y*stride]= color4;
*(uint32_t*)&dst[4 + y*stride]= color4;
*(uint32_t*)&dst[8 + y*stride]= color4;
*(uint32_t*)&dst[12+ y*stride]= color4;
*(uint32_t*)&dst[16+ y*stride]= color4;
*(uint32_t*)&dst[20+ y*stride]= color4;
*(uint32_t*)&dst[24+ y*stride]= color4;
*(uint32_t*)&dst[28+ y*stride]= color4;
}
}else if(b_w==16){
for(y=0; y < b_h; y++){ for(y=0; y < b_h; y++){
*(uint32_t*)&dst[0 + y*stride]= color4; *(uint32_t*)&dst[0 + y*stride]= color4;
*(uint32_t*)&dst[4 + y*stride]= color4; *(uint32_t*)&dst[4 + y*stride]= color4;
...@@ -2512,10 +2523,16 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, ...@@ -2512,10 +2523,16 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp,
assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
assert(!(b_w&(b_w-1))); assert(!(b_w&(b_w-1)));
assert(b_w>1 && b_h>1); assert(b_w>1 && b_h>1);
assert(tab_index>=0 && tab_index<4); assert(tab_index>=0 && tab_index<4 || b_w==32);
if((dx&3) || (dy&3)) if((dx&3) || (dy&3))
mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
else if(b_w==b_h) else if(b_w==32){
int y;
for(y=0; y<b_h; y+=16){
s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
}
}else if(b_w==b_h)
s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
else if(b_w==2*b_h){ else if(b_w==2*b_h){
s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
...@@ -2688,7 +2705,7 @@ assert(src_stride > 2*MB_SIZE + 5); ...@@ -2688,7 +2705,7 @@ assert(src_stride > 2*MB_SIZE + 5);
} }
//FIXME name clenup (b_w, block_w, b_width stuff) //FIXME name clenup (b_w, block_w, b_width stuff)
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
const int b_width = s->b_width << s->block_max_depth; const int b_width = s->b_width << s->block_max_depth;
const int b_height= s->b_height << s->block_max_depth; const int b_height= s->b_height << s->block_max_depth;
const int b_stride= b_width; const int b_stride= b_width;
...@@ -2720,6 +2737,8 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8 ...@@ -2720,6 +2737,8 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8
if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
obmc -= src_x; obmc -= src_x;
b_w += src_x; b_w += src_x;
if(!offset_dst)
dst -= src_x;
src_x=0; src_x=0;
}else if(src_x + b_w > w){ }else if(src_x + b_w > w){
b_w = w - src_x; b_w = w - src_x;
...@@ -2727,6 +2746,8 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8 ...@@ -2727,6 +2746,8 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8
if(src_y<0){ if(src_y<0){
obmc -= src_y*obmc_stride; obmc -= src_y*obmc_stride;
b_h += src_y; b_h += src_y;
if(!offset_dst)
dst -= src_y*dst_stride;
src_y=0; src_y=0;
}else if(src_y + b_h> h){ }else if(src_y + b_h> h){
b_h = h - src_y; b_h = h - src_y;
...@@ -2735,7 +2756,8 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8 ...@@ -2735,7 +2756,8 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8
if(b_w<=0 || b_h<=0) return; if(b_w<=0 || b_h<=0) return;
assert(src_stride > 2*MB_SIZE + 5); assert(src_stride > 2*MB_SIZE + 5);
dst += src_x + src_y*dst_stride; if(offset_dst)
dst += src_x + src_y*dst_stride;
dst8+= src_x + src_y*src_stride; dst8+= src_x + src_y*src_stride;
// src += src_x + src_y*src_stride; // src += src_x + src_y*src_stride;
...@@ -2954,7 +2976,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_ ...@@ -2954,7 +2976,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_
w, h, w, h,
w, ref_stride, obmc_stride, w, ref_stride, obmc_stride,
mb_x - 1, mb_y - 1, mb_x - 1, mb_y - 1,
add, plane_index); add, 1, plane_index);
STOP_TIMER("add_yblock") STOP_TIMER("add_yblock")
} }
...@@ -2978,9 +3000,8 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ ...@@ -2978,9 +3000,8 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
const int obmc_stride= plane_index ? block_size : 2*block_size; const int obmc_stride= plane_index ? block_size : 2*block_size;
const int ref_stride= s->current_picture.linesize[plane_index]; const int ref_stride= s->current_picture.linesize[plane_index];
uint8_t *ref= s-> last_picture.data[plane_index]; uint8_t *ref= s-> last_picture.data[plane_index];
uint8_t *dst= s->current_picture.data[plane_index];
uint8_t *src= s-> input_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index];
const static DWTELEM zero_dst[4096]; //FIXME DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
const int b_stride = s->b_width << s->block_max_depth; const int b_stride = s->b_width << s->block_max_depth;
const int w= p->width; const int w= p->width;
const int h= p->height; const int h= p->height;
...@@ -2992,6 +3013,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ ...@@ -2992,6 +3013,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
b->type|= BLOCK_INTRA; b->type|= BLOCK_INTRA;
b->color[plane_index]= 0; b->color[plane_index]= 0;
memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
for(i=0; i<4; i++){ for(i=0; i<4; i++){
int mb_x2= mb_x + (i &1) - 1; int mb_x2= mb_x + (i &1) - 1;
...@@ -2999,20 +3021,23 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ ...@@ -2999,20 +3021,23 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
int x= block_w*mb_x2 + block_w/2; int x= block_w*mb_x2 + block_w/2;
int y= block_w*mb_y2 + block_w/2; int y= block_w*mb_y2 + block_w/2;
add_yblock(s, zero_dst, dst, ref, obmc, add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc,
x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index); x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
int obmc_v= obmc[index]; int obmc_v= obmc[index];
int d;
if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
if(x<0) obmc_v += obmc[index + block_w]; if(x<0) obmc_v += obmc[index + block_w];
if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
if(x+block_w>w) obmc_v += obmc[index - block_w]; if(x+block_w>w) obmc_v += obmc[index - block_w];
//FIXME precalc this or simplify it somehow else //FIXME precalc this or simplify it somehow else
ab += (src[x2 + y2*ref_stride] - dst[x2 + y2*ref_stride]) * obmc_v; d = -dst[index] + (1<<(FRAC_BITS-1));
dst[index] = d;
ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
aa += obmc_v * obmc_v; //FIXME precalclate this aa += obmc_v * obmc_v; //FIXME precalclate this
} }
} }
...@@ -3022,8 +3047,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ ...@@ -3022,8 +3047,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
} }
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
int i, y2;
Plane *p= &s->plane[plane_index]; Plane *p= &s->plane[plane_index];
const int block_size = MB_SIZE >> s->block_max_depth; const int block_size = MB_SIZE >> s->block_max_depth;
const int block_w = plane_index ? block_size/2 : block_size; const int block_w = plane_index ? block_size/2 : block_size;
...@@ -3032,41 +3056,50 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ ...@@ -3032,41 +3056,50 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
const int ref_stride= s->current_picture.linesize[plane_index]; const int ref_stride= s->current_picture.linesize[plane_index];
uint8_t *ref= s-> last_picture.data[plane_index]; uint8_t *ref= s-> last_picture.data[plane_index];
uint8_t *dst= s->current_picture.data[plane_index]; uint8_t *dst= s->current_picture.data[plane_index];
uint8_t *src= s-> input_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index];
const static DWTELEM zero_dst[4096]; //FIXME DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
const int b_stride = s->b_width << s->block_max_depth; const int b_stride = s->b_width << s->block_max_depth;
const int b_height = s->b_height<< s->block_max_depth; const int b_height = s->b_height<< s->block_max_depth;
const int w= p->width; const int w= p->width;
const int h= p->height; const int h= p->height;
int distortion= 0; int distortion;
int rate= 0; int rate= 0;
const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
int sx= block_w*mb_x - block_w/2;
for(i=0; i<4; i++){ int sy= block_w*mb_y - block_w/2;
int mb_x2= mb_x + (i &1) - 1; const int x0= FFMAX(0,-sx);
int mb_y2= mb_y + (i>>1) - 1; const int y0= FFMAX(0,-sy);
int x= block_w*mb_x2 + block_w/2; const int x1= FFMIN(block_w*2, w-sx);
int y= block_w*mb_y2 + block_w/2; const int y1= FFMIN(block_w*2, h-sy);
int i,x,y;
add_yblock(s, zero_dst, dst, ref, obmc,
x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index); pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
//FIXME find a cleaner/simpler way to skip the outside stuff for(y=y0; y<y1; y++){
for(y2= y; y2<0; y2++) const uint8_t *obmc1= obmc_edged + y*obmc_stride;
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); const DWTELEM *pred1 = pred + y*obmc_stride;
for(y2= h; y2<y+block_w; y2++) uint8_t *cur1 = cur + y*ref_stride;
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
if(x<0){ for(x=x0; x<x1; x++){
for(y2= y; y2<y+block_w; y2++) int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); v = (v + pred1[x]) >> FRAC_BITS;
} if(v&(~255)) v= ~(v>>31);
if(x+block_w > w){ dst1[x] = v;
for(y2= y; y2<y+block_w; y2++)
memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
} }
}
assert(block_w== 8 || block_w==16); //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); if(block_w==16){
distortion = 0;
for(i=0; i<4; i++){
int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
}
}else{
assert(block_w==8);
distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
} }
if(plane_index==0){ if(plane_index==0){
...@@ -3112,7 +3145,7 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ ...@@ -3112,7 +3145,7 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
return distortion + rate*penalty_factor; return distortion + rate*penalty_factor;
} }
static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, int *best_rd){ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
const int b_stride= s->b_width << s->block_max_depth; const int b_stride= s->b_width << s->block_max_depth;
BlockNode *block= &s->block[mb_x + mb_y * b_stride]; BlockNode *block= &s->block[mb_x + mb_y * b_stride];
BlockNode backup= *block; BlockNode backup= *block;
...@@ -3138,7 +3171,7 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3 ...@@ -3138,7 +3171,7 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3
block->type &= ~BLOCK_INTRA; block->type &= ~BLOCK_INTRA;
} }
rd= get_block_rd(s, mb_x, mb_y, 0); rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
//FIXME chroma //FIXME chroma
if(rd < *best_rd){ if(rd < *best_rd){
...@@ -3151,9 +3184,9 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3 ...@@ -3151,9 +3184,9 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3
} }
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, int *best_rd){ static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){
int p[2] = {p0, p1}; int p[2] = {p0, p1};
return check_block(s, mb_x, mb_y, p, intra, best_rd); return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd);
} }
static void iterative_me(SnowContext *s){ static void iterative_me(SnowContext *s){
...@@ -3181,6 +3214,8 @@ static void iterative_me(SnowContext *s){ ...@@ -3181,6 +3214,8 @@ static void iterative_me(SnowContext *s){
BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block; BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block; BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block;
BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block; BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block;
const int b_w= (MB_SIZE >> s->block_max_depth);
uint8_t obmc_edged[b_w*2][b_w*2];
if(pass && (block->type & BLOCK_OPT)) if(pass && (block->type & BLOCK_OPT))
continue; continue;
...@@ -3192,13 +3227,68 @@ static void iterative_me(SnowContext *s){ ...@@ -3192,13 +3227,68 @@ static void iterative_me(SnowContext *s){
memset(s->me_cache, 0, sizeof(s->me_cache)); memset(s->me_cache, 0, sizeof(s->me_cache));
s->me_cache_generation += 1<<22; s->me_cache_generation += 1<<22;
//FIXME precalc
{
int x, y;
memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
if(mb_x==0)
for(y=0; y<b_w*2; y++)
memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
if(mb_x==b_stride-1)
for(y=0; y<b_w*2; y++)
memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
if(mb_y==0){
for(x=0; x<b_w*2; x++)
obmc_edged[0][x] += obmc_edged[b_w-1][x];
for(y=1; y<b_w; y++)
memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
}
if(mb_y==b_height-1){
for(x=0; x<b_w*2; x++)
obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
for(y=b_w; y<b_w*2-1; y++)
memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
}
}
//skip stuff outside the picture
if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
{
uint8_t *src= s-> input_picture.data[0];
uint8_t *dst= s->current_picture.data[0];
const int stride= s->current_picture.linesize[0];
const int block_w= MB_SIZE >> s->block_max_depth;
const int sx= block_w*mb_x - block_w/2;
const int sy= block_w*mb_y - block_w/2;
const int w= s->plane[0].width;
const int h= s->plane[0].height;
int y;
for(y=sy; y<0; y++)
memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
for(y=h; y<sy+block_w*2; y++)
memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
if(sx<0){
for(y=sy; y<sy+block_w*2; y++)
memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
}
if(sx+block_w*2 > w){
for(y=sy; y<sy+block_w*2; y++)
memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
}
}
// intra(black) = neighbors' contribution to the current block
for(i=0; i<3; i++)
color[i]= get_dc(s, mb_x, mb_y, i);
// get previous score (cant be cached due to OBMC) // get previous score (cant be cached due to OBMC)
check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, &best_rd); check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd);
check_block_inter(s, mb_x, mb_y, 0, 0, 0, &best_rd); check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd);
check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, &best_rd); check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd);
check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, &best_rd); check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd);
check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, &best_rd); check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd);
check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, &best_rd); check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd);
/* fullpel ME */ /* fullpel ME */
//FIXME avoid subpel interpol / round to nearest integer //FIXME avoid subpel interpol / round to nearest integer
...@@ -3206,10 +3296,10 @@ static void iterative_me(SnowContext *s){ ...@@ -3206,10 +3296,10 @@ static void iterative_me(SnowContext *s){
dia_change=0; dia_change=0;
for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
for(j=0; j<i; j++){ for(j=0; j<i; j++){
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, &best_rd); dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, &best_rd); dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, &best_rd); dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, &best_rd); dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
} }
} }
}while(dia_change); }while(dia_change);
...@@ -3218,14 +3308,11 @@ static void iterative_me(SnowContext *s){ ...@@ -3218,14 +3308,11 @@ static void iterative_me(SnowContext *s){
static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
dia_change=0; dia_change=0;
for(i=0; i<8; i++) for(i=0; i<8; i++)
dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, &best_rd); dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd);
}while(dia_change); }while(dia_change);
//FIXME or try the standard 2 pass qpel or similar //FIXME or try the standard 2 pass qpel or similar
#if 1 #if 1
for(i=0; i<3; i++){ check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
color[i]= get_dc(s, mb_x, mb_y, i);
}
check_block(s, mb_x, mb_y, color, 1, &best_rd);
//FIXME RD style color selection //FIXME RD style color selection
#endif #endif
if(!same_block(block, &backup)){ if(!same_block(block, &backup)){
...@@ -3719,6 +3806,7 @@ static int encode_init(AVCodecContext *avctx) ...@@ -3719,6 +3806,7 @@ static int encode_init(AVCodecContext *avctx)
s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
h263_encode_init(&s->m); //mv_penalty h263_encode_init(&s->m); //mv_penalty
if(avctx->flags&CODEC_FLAG_PASS1){ if(avctx->flags&CODEC_FLAG_PASS1){
...@@ -3999,6 +4087,7 @@ static void common_end(SnowContext *s){ ...@@ -3999,6 +4087,7 @@ static void common_end(SnowContext *s){
av_freep(&s->m.me.scratchpad); av_freep(&s->m.me.scratchpad);
av_freep(&s->m.me.map); av_freep(&s->m.me.map);
av_freep(&s->m.me.score_map); av_freep(&s->m.me.score_map);
av_freep(&s->m.obmc_scratchpad);
av_freep(&s->block); av_freep(&s->block);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment