Commit c437a556 authored by michael's avatar michael

merge clip() into the alpha/beta/tc0 tables (10% faster filter_mb_fast() on P3)


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@6863 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 2b566df1
...@@ -6739,14 +6739,14 @@ decode_intra_mb: ...@@ -6739,14 +6739,14 @@ decode_intra_mb:
static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
int i, d; int i, d;
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int index_a = qp + h->slice_alpha_c0_offset;
const int alpha = alpha_table[index_a]; const int alpha = (alpha_table+52)[index_a];
const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; const int beta = (beta_table+52)[qp + h->slice_beta_offset];
if( bS[0] < 4 ) { if( bS[0] < 4 ) {
int8_t tc[4]; int8_t tc[4];
for(i=0; i<4; i++) for(i=0; i<4; i++)
tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
} else { } else {
/* 16px edge length, because bS=4 is triggered by being at /* 16px edge length, because bS=4 is triggered by being at
...@@ -6800,14 +6800,14 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t b ...@@ -6800,14 +6800,14 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t b
} }
static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
int i; int i;
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int index_a = qp + h->slice_alpha_c0_offset;
const int alpha = alpha_table[index_a]; const int alpha = (alpha_table+52)[index_a];
const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; const int beta = (beta_table+52)[qp + h->slice_beta_offset];
if( bS[0] < 4 ) { if( bS[0] < 4 ) {
int8_t tc[4]; int8_t tc[4];
for(i=0; i<4; i++) for(i=0; i<4; i++)
tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else { } else {
h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
...@@ -6833,12 +6833,12 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int ...@@ -6833,12 +6833,12 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int
} }
qp_index = MB_FIELD ? (i >> 3) : (i & 1); qp_index = MB_FIELD ? (i >> 3) : (i & 1);
index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); index_a = qp[qp_index] + h->slice_alpha_c0_offset;
alpha = alpha_table[index_a]; alpha = (alpha_table+52)[index_a];
beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
if( bS[bS_index] < 4 ) { if( bS[bS_index] < 4 ) {
const int tc0 = tc0_table[index_a][bS[bS_index] - 1]; const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
const int p0 = pix[-1]; const int p0 = pix[-1];
const int p1 = pix[-2]; const int p1 = pix[-2];
const int p2 = pix[-3]; const int p2 = pix[-3];
...@@ -6927,12 +6927,12 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in ...@@ -6927,12 +6927,12 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in
} }
qp_index = MB_FIELD ? (i >> 2) : (i & 1); qp_index = MB_FIELD ? (i >> 2) : (i & 1);
index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); index_a = qp[qp_index] + h->slice_alpha_c0_offset;
alpha = alpha_table[index_a]; alpha = (alpha_table+52)[index_a];
beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
if( bS[bS_index] < 4 ) { if( bS[bS_index] < 4 ) {
const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1; const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
const int p0 = pix[-1]; const int p0 = pix[-1];
const int p1 = pix[-2]; const int p1 = pix[-2];
const int q0 = pix[0]; const int q0 = pix[0];
...@@ -6967,15 +6967,15 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in ...@@ -6967,15 +6967,15 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in
static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
int i, d; int i, d;
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int index_a = qp + h->slice_alpha_c0_offset;
const int alpha = alpha_table[index_a]; const int alpha = (alpha_table+52)[index_a];
const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; const int beta = (beta_table+52)[qp + h->slice_beta_offset];
const int pix_next = stride; const int pix_next = stride;
if( bS[0] < 4 ) { if( bS[0] < 4 ) {
int8_t tc[4]; int8_t tc[4];
for(i=0; i<4; i++) for(i=0; i<4; i++)
tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
} else { } else {
/* 16px edge length, see filter_mb_edgev */ /* 16px edge length, see filter_mb_edgev */
...@@ -7027,14 +7027,14 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t b ...@@ -7027,14 +7027,14 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t b
static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
int i; int i;
const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int index_a = qp + h->slice_alpha_c0_offset;
const int alpha = alpha_table[index_a]; const int alpha = (alpha_table+52)[index_a];
const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; const int beta = (beta_table+52)[qp + h->slice_beta_offset];
if( bS[0] < 4 ) { if( bS[0] < 4 ) {
int8_t tc[4]; int8_t tc[4];
for(i=0; i<4; i++) for(i=0; i<4; i++)
tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else { } else {
h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
......
...@@ -609,23 +609,48 @@ static const int quant_coeff[52][16]={ ...@@ -609,23 +609,48 @@ static const int quant_coeff[52][16]={
/* Deblocking filter (p153) */ /* Deblocking filter (p153) */
static const int alpha_table[52] = { static const int alpha_table[52*3] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 4, 4, 5, 6, 0, 0, 0, 0, 0, 0, 4, 4, 5, 6,
7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 7, 8, 9, 10, 12, 13, 15, 17, 20, 22,
25, 28, 32, 36, 40, 45, 50, 56, 63, 71, 25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
80, 90,101,113,127,144,162,182,203,226, 80, 90,101,113,127,144,162,182,203,226,
255, 255 255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,
}; };
static const int beta_table[52] = { static const int beta_table[52*3] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 2, 2, 3, 0, 0, 0, 0, 0, 0, 2, 2, 2, 3,
3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7,
8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
18, 18 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
}; };
static const int tc0_table[52][3] = { static const int tc0_table[52*3][3] = {
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
{ 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
...@@ -634,7 +659,16 @@ static const int tc0_table[52][3] = { ...@@ -634,7 +659,16 @@ static const int tc0_table[52][3] = {
{ 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 }, { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
{ 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 }, { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
{ 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 }, { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
{ 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 } { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
{13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
}; };
/* Cabac pre state table */ /* Cabac pre state table */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment