Commit e808a641 authored by michaelni's avatar michaelni

optimize block_permute()

optimize dct_quantize_c()
dont permute s->q_inter/intra_matrix


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@1067 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 7a455a1f
...@@ -1553,16 +1553,25 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) ...@@ -1553,16 +1553,25 @@ int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s; return s;
} }
/* permute block according so that it corresponds to the MMX idct void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
order */
void block_permute(INT16 *block, UINT8 *permutation)
{ {
int i; int i;
INT16 temp[64]; INT16 temp[64];
for(i=0; i<64; i++) temp[ permutation[i] ] = block[i]; if(last<=0) return;
if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
for(i=0; i<64; i++) block[i] = temp[i]; for(i=0; i<=last; i++){
const int j= scantable[i];
temp[j]= block[j];
block[j]=0;
}
for(i=0; i<=last; i++){
const int j= scantable[i];
const int perm_j= permutation[j];
block[perm_j]= temp[j];
}
} }
void clear_blocks_c(DCTELEM *blocks) void clear_blocks_c(DCTELEM *blocks)
......
...@@ -115,7 +115,11 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); ...@@ -115,7 +115,11 @@ int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
void block_permute(INT16 *block, UINT8 *permutation); /**
* permute block according to permuatation.
* @param last last non zero element in scantable order
*/
void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
#if defined(HAVE_MMX) #if defined(HAVE_MMX)
......
...@@ -94,7 +94,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 ...@@ -94,7 +94,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
/* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) / qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
(qscale * quant_matrix[j])); (qscale * quant_matrix[j]));
} }
} else if (s->fdct == fdct_ifast) { } else if (s->fdct == fdct_ifast) {
...@@ -105,7 +105,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 ...@@ -105,7 +105,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
/* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
(aanscales[i] * qscale * quant_matrix[j])); (aanscales[i] * qscale * quant_matrix[j]));
} }
} else { } else {
...@@ -138,6 +138,8 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 ...@@ -138,6 +138,8 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){ void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){
int i; int i;
int end; int end;
st->scantable= src_scantable;
for(i=0; i<64; i++){ for(i=0; i<64; i++){
int j; int j;
...@@ -2968,18 +2970,13 @@ static int dct_quantize_c(MpegEncContext *s, ...@@ -2968,18 +2970,13 @@ static int dct_quantize_c(MpegEncContext *s,
{ {
int i, j, level, last_non_zero, q; int i, j, level, last_non_zero, q;
const int *qmat; const int *qmat;
const UINT8 *scantable= s->intra_scantable.scantable;
int bias; int bias;
int max=0; int max=0;
unsigned int threshold1, threshold2; unsigned int threshold1, threshold2;
s->fdct (block); s->fdct (block);
#ifndef ARCH_ALPHA /* Alpha uses unpermuted matrix */
/* we need this permutation so that we correct the IDCT
permutation. will be moved into DCT code */
block_permute(block, s->idct_permutation); //FIXME remove
#endif
if (s->mb_intra) { if (s->mb_intra) {
if (!s->h263_aic) { if (!s->h263_aic) {
if (n < 4) if (n < 4)
...@@ -3007,7 +3004,7 @@ static int dct_quantize_c(MpegEncContext *s, ...@@ -3007,7 +3004,7 @@ static int dct_quantize_c(MpegEncContext *s,
threshold2= (threshold1<<1); threshold2= (threshold1<<1);
for(;i<64;i++) { for(;i<64;i++) {
j = s->intra_scantable.permutated[i]; j = scantable[i];
level = block[j]; level = block[j];
level = level * qmat[j]; level = level * qmat[j];
...@@ -3029,6 +3026,9 @@ static int dct_quantize_c(MpegEncContext *s, ...@@ -3029,6 +3026,9 @@ static int dct_quantize_c(MpegEncContext *s,
} }
*overflow= s->max_qcoeff < max; //overflow might have happend *overflow= s->max_qcoeff < max; //overflow might have happend
/* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
ff_block_permute(block, s->idct_permutation, scantable, last_non_zero);
return last_non_zero; return last_non_zero;
} }
......
...@@ -100,6 +100,7 @@ typedef struct ReorderBuffer{ ...@@ -100,6 +100,7 @@ typedef struct ReorderBuffer{
} ReorderBuffer; } ReorderBuffer;
typedef struct ScanTable{ typedef struct ScanTable{
const UINT8 *scantable;
UINT8 permutated[64]; UINT8 permutated[64];
UINT8 raster_end[64]; UINT8 raster_end[64];
} ScanTable; } ScanTable;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment