Commit 7301ffcb authored by lorenm's avatar lorenm

H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@4271 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent baaa8578
...@@ -2643,7 +2643,7 @@ static void h261_loop_filter_c(uint8_t *src, int stride){ ...@@ -2643,7 +2643,7 @@ static void h261_loop_filter_c(uint8_t *src, int stride){
} }
} }
static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int *tc0) static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
{ {
int i, d; int i, d;
for( i = 0; i < 4; i++ ) { for( i = 0; i < 4; i++ ) {
...@@ -2683,16 +2683,16 @@ static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystrid ...@@ -2683,16 +2683,16 @@ static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystrid
} }
} }
} }
static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int *tc0) static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{ {
h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
} }
static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int *tc0) static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{ {
h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
} }
static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int *tc0) static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
{ {
int i, d; int i, d;
for( i = 0; i < 4; i++ ) { for( i = 0; i < 4; i++ ) {
...@@ -2720,15 +2720,43 @@ static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystr ...@@ -2720,15 +2720,43 @@ static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystr
} }
} }
} }
static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int *tc0) static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{ {
h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
} }
static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int *tc0) static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{ {
h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
} }
static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
{
int d;
for( d = 0; d < 8; d++ ) {
const int p0 = pix[-1*xstride];
const int p1 = pix[-2*xstride];
const int q0 = pix[0];
const int q1 = pix[1*xstride];
if( ABS( p0 - q0 ) < alpha &&
ABS( p1 - p0 ) < beta &&
ABS( q1 - q0 ) < beta ) {
pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
}
pix += ystride;
}
}
static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
{
h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
}
static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
{
h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
}
static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{ {
int s, i; int s, i;
...@@ -3834,6 +3862,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -3834,6 +3862,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
c->h263_h_loop_filter= h263_h_loop_filter_c; c->h263_h_loop_filter= h263_h_loop_filter_c;
c->h263_v_loop_filter= h263_v_loop_filter_c; c->h263_v_loop_filter= h263_v_loop_filter_c;
......
...@@ -267,10 +267,12 @@ typedef struct DSPContext { ...@@ -267,10 +267,12 @@ typedef struct DSPContext {
void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w); void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w);
void (*h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int *tc0); void (*h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int *tc0); void (*h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int *tc0); void (*h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int *tc0); void (*h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta);
void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta);
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
......
...@@ -5665,7 +5665,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4] ...@@ -5665,7 +5665,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4]
const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
if( bS[0] < 4 ) { if( bS[0] < 4 ) {
int tc[4]; int8_t tc[4];
for(i=0; i<4; i++) for(i=0; i<4; i++)
tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
...@@ -5726,28 +5726,12 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4 ...@@ -5726,28 +5726,12 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4
const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
if( bS[0] < 4 ) { if( bS[0] < 4 ) {
int tc[4]; int8_t tc[4];
for(i=0; i<4; i++) for(i=0; i<4; i++)
tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else { } else {
/* 8px edge length, see filter_mb_edgev */ h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
for( d = 0; d < 8; d++ ){
const int p0 = pix[-1];
const int p1 = pix[-2];
const int q0 = pix[0];
const int q1 = pix[1];
if( ABS( p0 - q0 ) < alpha &&
ABS( p1 - p0 ) < beta &&
ABS( q1 - q0 ) < beta ) {
pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
tprintf("filter_mb_edgecv i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
}
pix += stride;
}
} }
} }
...@@ -5913,7 +5897,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4] ...@@ -5913,7 +5897,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4]
const int pix_next = stride; const int pix_next = stride;
if( bS[0] < 4 ) { if( bS[0] < 4 ) {
int tc[4]; int8_t tc[4];
for(i=0; i<4; i++) for(i=0; i<4; i++)
tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
...@@ -5970,31 +5954,14 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4 ...@@ -5970,31 +5954,14 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
const int alpha = alpha_table[index_a]; const int alpha = alpha_table[index_a];
const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
const int pix_next = stride;
if( bS[0] < 4 ) { if( bS[0] < 4 ) {
int tc[4]; int8_t tc[4];
for(i=0; i<4; i++) for(i=0; i<4; i++)
tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else { } else {
/* 8px edge length, see filter_mb_edgev */ h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
for( d = 0; d < 8; d++ ) {
const int p0 = pix[-1*pix_next];
const int p1 = pix[-2*pix_next];
const int q0 = pix[0];
const int q1 = pix[1*pix_next];
if( ABS( p0 - q0 ) < alpha &&
ABS( p1 - p0 ) < beta &&
ABS( q1 - q0 ) < beta ) {
pix[-pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
}
pix++;
}
} }
} }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment