Commit 04ce9d46 authored by michaelni's avatar michaelni

moving the svq3 motion compensation stuff to dsputil (this also means that...

moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@1885 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent f15b68fc
This diff is collapsed.
...@@ -77,6 +77,7 @@ void clear_blocks_c(DCTELEM *blocks); ...@@ -77,6 +77,7 @@ void clear_blocks_c(DCTELEM *blocks);
/* add and put pixel (decoding) */ /* add and put pixel (decoding) */
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
...@@ -146,18 +147,18 @@ typedef struct DSPContext { ...@@ -146,18 +147,18 @@ typedef struct DSPContext {
me_cmp_func me_sub_cmp[11]; me_cmp_func me_sub_cmp[11];
me_cmp_func mb_cmp[11]; me_cmp_func mb_cmp[11];
/* maybe create an array for 16/8 functions */ /* maybe create an array for 16/8/4/2 functions */
/** /**
* Halfpel motion compensation with rounding (a+b+1)>>1. * Halfpel motion compensation with rounding (a+b+1)>>1.
* this is an array[2][4] of motion compensation funcions for 2 * this is an array[4][4] of motion compensation funcions for 4
* horizontal blocksizes (8,16) and the 4 halfpel positions<br> * horizontal blocksizes (2,4,8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination where the result is stored * @param block destination where the result is stored
* @param pixels source * @param pixels source
* @param line_size number of bytes in a horizontal line of block * @param line_size number of bytes in a horizontal line of block
* @param h height * @param h height
*/ */
op_pixels_func put_pixels_tab[2][4]; op_pixels_func put_pixels_tab[4][4];
/** /**
* Halfpel motion compensation with rounding (a+b+1)>>1. * Halfpel motion compensation with rounding (a+b+1)>>1.
...@@ -194,6 +195,18 @@ typedef struct DSPContext { ...@@ -194,6 +195,18 @@ typedef struct DSPContext {
* @param h height * @param h height
*/ */
op_pixels_func avg_no_rnd_pixels_tab[2][4]; op_pixels_func avg_no_rnd_pixels_tab[2][4];
/**
* Thirdpel motion compensation with rounding (a+b+1)>>1.
* this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
* *pixels_tab[ xthirdpel + 4*ythirdpel ]
* @param block destination where the result is stored
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
qpel_mc_func put_qpel_pixels_tab[2][16]; qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16]; qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
...@@ -380,7 +393,9 @@ void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); ...@@ -380,7 +393,9 @@ void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
struct unaligned_64 { uint64_t l; } __attribute__((packed)); struct unaligned_64 { uint64_t l; } __attribute__((packed));
struct unaligned_32 { uint32_t l; } __attribute__((packed)); struct unaligned_32 { uint32_t l; } __attribute__((packed));
struct unaligned_16 { uint16_t l; } __attribute__((packed));
#define LD16(a) (((const struct unaligned_16 *) (a))->l)
#define LD32(a) (((const struct unaligned_32 *) (a))->l) #define LD32(a) (((const struct unaligned_32 *) (a))->l)
#define LD64(a) (((const struct unaligned_64 *) (a))->l) #define LD64(a) (((const struct unaligned_64 *) (a))->l)
...@@ -388,6 +403,7 @@ struct unaligned_32 { uint32_t l; } __attribute__((packed)); ...@@ -388,6 +403,7 @@ struct unaligned_32 { uint32_t l; } __attribute__((packed));
#else /* __GNUC__ */ #else /* __GNUC__ */
#define LD16(a) (*((uint16_t*)(a)))
#define LD32(a) (*((uint32_t*)(a))) #define LD32(a) (*((uint32_t*)(a)))
#define LD64(a) (*((uint64_t*)(a))) #define LD64(a) (*((uint64_t*)(a)))
......
...@@ -262,125 +262,11 @@ static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block, ...@@ -262,125 +262,11 @@ static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
return 0; return 0;
} }
static void sixpel_mc_put (MpegEncContext *s,
uint8_t *src, uint8_t *dst, int stride,
int dxy, int width, int height) {
int i, j;
switch (dxy) {
case 6*0+0:
for (i=0; i < height; i++) {
memcpy (dst, src, width);
src += stride;
dst += stride;
}
break;
case 6*0+2:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
}
src += stride;
dst += stride;
}
break;
case 6*0+3:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (src[j] + src[j+1] + 1) >> 1;
}
src += stride;
dst += stride;
}
break;
case 6*0+4:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
}
src += stride;
dst += stride;
}
break;
case 6*2+0:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
}
src += stride;
dst += stride;
}
break;
case 6*2+2:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
}
src += stride;
dst += stride;
}
break;
case 6*2+4:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
}
src += stride;
dst += stride;
}
break;
case 6*3+0:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (src[j] + src[j+stride]+1) >> 1;
}
src += stride;
dst += stride;
}
break;
case 6*3+3:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (src[j] + src[j+1] + src[j+stride] + src[j+stride+1] + 2) >> 2;
}
src += stride;
dst += stride;
}
break;
case 6*4+0:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
}
src += stride;
dst += stride;
}
break;
case 6*4+2:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
}
src += stride;
dst += stride;
}
break;
case 6*4+4:
for (i=0; i < height; i++) {
for (j=0; j < width; j++) {
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
}
src += stride;
dst += stride;
}
break;
}
}
static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y, static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y,
int width, int height, int mx, int my, int dxy) { int width, int height, int mx, int my, int dxy, int thirdpel) {
uint8_t *src, *dest; uint8_t *src, *dest;
int i, emu = 0; int i, emu = 0;
int blocksize= 2 - (width>>3); //16->0, 8->1, 4->2
mx += x; mx += x;
my += y; my += y;
...@@ -405,13 +291,17 @@ static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y, ...@@ -405,13 +291,17 @@ static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y,
mx, my, s->width, s->height); mx, my, s->width, s->height);
src = s->edge_emu_buffer; src = s->edge_emu_buffer;
} }
sixpel_mc_put (s, src, dest, s->linesize, dxy, width, height); if(thirdpel)
s->dsp.put_tpel_pixels_tab[dxy](dest, src, s->linesize, width, height);
else
s->dsp.put_pixels_tab[blocksize][dxy](dest, src, s->linesize, height);
if (!(s->flags & CODEC_FLAG_GRAY)) { if (!(s->flags & CODEC_FLAG_GRAY)) {
mx = (mx + (mx < (int) x)) >> 1; mx = (mx + (mx < (int) x)) >> 1;
my = (my + (my < (int) y)) >> 1; my = (my + (my < (int) y)) >> 1;
width = (width >> 1); width = (width >> 1);
height = (height >> 1); height = (height >> 1);
blocksize++;
for (i=1; i < 3; i++) { for (i=1; i < 3; i++) {
dest = s->current_picture.data[i] + (x >> 1) + (y >> 1)*s->uvlinesize; dest = s->current_picture.data[i] + (x >> 1) + (y >> 1)*s->uvlinesize;
...@@ -422,7 +312,10 @@ static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y, ...@@ -422,7 +312,10 @@ static inline void svq3_mc_dir_part (MpegEncContext *s, int x, int y,
mx, my, (s->width >> 1), (s->height >> 1)); mx, my, (s->width >> 1), (s->height >> 1));
src = s->edge_emu_buffer; src = s->edge_emu_buffer;
} }
sixpel_mc_put (s, src, dest, s->uvlinesize, dxy, width, height); if(thirdpel)
s->dsp.put_tpel_pixels_tab[dxy](dest, src, s->uvlinesize, width, height);
else
s->dsp.put_pixels_tab[blocksize][dxy](dest, src, s->uvlinesize, height);
} }
} }
} }
...@@ -441,7 +334,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { ...@@ -441,7 +334,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
h->topright_samples_available = 0xFFFF; h->topright_samples_available = 0xFFFF;
if (mb_type == 0) { /* SKIP */ if (mb_type == 0) { /* SKIP */
svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0); svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0);
cbp = 0; cbp = 0;
mb_type = MB_TYPE_SKIP; mb_type = MB_TYPE_SKIP;
...@@ -521,17 +414,17 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { ...@@ -521,17 +414,17 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
my = ((my + 1)>>1) + dy; my = ((my + 1)>>1) + dy;
fx= ((unsigned)(mx + 0x3000))/3 - 0x1000; fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
fy= ((unsigned)(my + 0x3000))/3 - 0x1000; fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
dxy= 2*(mx - 3*fx) + 2*6*(my - 3*fy); dxy= (mx - 3*fx) + 4*(my - 3*fy);
svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy); svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy, 1);
mx += mx; mx += mx;
my += my; my += my;
} else if (mode == HALFPEL_MODE) { } else if (mode == HALFPEL_MODE) {
mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000; mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000; my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
dxy= 3*(mx&1) + 6*3*(my&1); dxy= (mx&1) + 2*(my&1);
svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy); svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0);
mx *= 3; mx *= 3;
my *= 3; my *= 3;
} else { } else {
...@@ -539,7 +432,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { ...@@ -539,7 +432,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000; mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000; my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0); svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0, 0);
mx *= 6; mx *= 6;
my *= 6; my *= 6;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment