Commit d067e74f authored by michaelni's avatar michaelni

dct cleanup

more accurate mmx dct (dont discard bits for fun)
fixing mmx quantizer bug for qscale%2==1 (bias was slightly wrong)


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@895 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 56699293
...@@ -101,8 +101,12 @@ void dct_error(const char *name, int is_idct, ...@@ -101,8 +101,12 @@ void dct_error(const char *name, int is_idct,
case 0: case 0:
for(i=0;i<64;i++) for(i=0;i<64;i++)
block1[i] = (random() % 512) -256; block1[i] = (random() % 512) -256;
if (is_idct) if (is_idct){
fdct(block1); fdct(block1);
for(i=0;i<64;i++)
block1[i]>>=3;
}
break; break;
case 1:{ case 1:{
int num= (random()%10)+1; int num= (random()%10)+1;
...@@ -153,17 +157,11 @@ void dct_error(const char *name, int is_idct, ...@@ -153,17 +157,11 @@ void dct_error(const char *name, int is_idct,
if (fdct_func == fdct_ifast) { if (fdct_func == fdct_ifast) {
for(i=0; i<64; i++) { for(i=0; i<64; i++) {
scale = (1 << (AANSCALE_BITS + 11)) / aanscales[i]; scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
} }
} }
if (fdct_func == ff_jpeg_fdct_islow) {
for(i=0; i<64; i++) {
block[i] = (block[i]+3)>>3;
}
}
fdct_ref(block1); fdct_ref(block1);
blockSumErr=0; blockSumErr=0;
...@@ -212,8 +210,12 @@ void dct_error(const char *name, int is_idct, ...@@ -212,8 +210,12 @@ void dct_error(const char *name, int is_idct,
case 0: case 0:
for(i=0;i<64;i++) for(i=0;i<64;i++)
block1[i] = (random() % 512) -256; block1[i] = (random() % 512) -256;
if (is_idct) if (is_idct){
fdct(block1); fdct(block1);
for(i=0;i<64;i++)
block1[i]>>=3;
}
break; break;
case 1:{ case 1:{
case 2: case 2:
......
...@@ -103,6 +103,7 @@ short *block; ...@@ -103,6 +103,7 @@ short *block;
s += c[i][5] * tmp[8 * 5 + j]; s += c[i][5] * tmp[8 * 5 + j];
s += c[i][6] * tmp[8 * 6 + j]; s += c[i][6] * tmp[8 * 6 + j];
s += c[i][7] * tmp[8 * 7 + j]; s += c[i][7] * tmp[8 * 7 + j];
s*=8.0;
block[8 * i + j] = (short)floor(s + 0.499999); block[8 * i + j] = (short)floor(s + 0.499999);
/* /*
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
#define SHIFT_FRW_COL BITS_FRW_ACC #define SHIFT_FRW_COL BITS_FRW_ACC
#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) #define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3)
//#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1) //#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1)
#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1))
//#define RND_FRW_COL (2 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_COL-1) //#define RND_FRW_COL (2 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_COL-1)
......
...@@ -46,9 +46,9 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -46,9 +46,9 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if (s->mb_intra) { if (s->mb_intra) {
int dummy; int dummy;
if (n < 4) if (n < 4)
q = s->y_dc_scale; q = s->y_dc_scale<<3;
else else
q = s->c_dc_scale; q = s->c_dc_scale<<3;
/* note: block[0] is assumed to be positive */ /* note: block[0] is assumed to be positive */
if (!s->h263_aic) { if (!s->h263_aic) {
#if 1 #if 1
...@@ -70,7 +70,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -70,7 +70,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
#endif #endif
} else } else
/* For AIC we skip quant/dequant of INTRADC */ /* For AIC we skip quant/dequant of INTRADC */
level = block[0]; level = block[0]>>3;
block[0]=0; //avoid fake overflow block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q; // temp_block[0] = (block[0] + (q >> 1)) / q;
......
...@@ -90,7 +90,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 ...@@ -90,7 +90,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
/* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT-3)) / qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) /
(qscale * quant_matrix[j])); (qscale * quant_matrix[j]));
} }
} else if (s->fdct == fdct_ifast) { } else if (s->fdct == fdct_ifast) {
...@@ -101,7 +101,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 ...@@ -101,7 +101,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
/* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
/* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
(aanscales[i] * qscale * quant_matrix[j])); (aanscales[i] * qscale * quant_matrix[j]));
} }
} else { } else {
...@@ -115,7 +115,6 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16 ...@@ -115,7 +115,6 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
} }
} }
...@@ -2487,15 +2486,15 @@ static int dct_quantize_c(MpegEncContext *s, ...@@ -2487,15 +2486,15 @@ static int dct_quantize_c(MpegEncContext *s,
i = 1; i = 1;
last_non_zero = 0; last_non_zero = 0;
qmat = s->q_intra_matrix[qscale]; qmat = s->q_intra_matrix[qscale];
bias= s->intra_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT); bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
} else { } else {
i = 0; i = 0;
last_non_zero = -1; last_non_zero = -1;
qmat = s->q_inter_matrix[qscale]; qmat = s->q_inter_matrix[qscale];
bias= s->inter_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT); bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
} }
threshold1= (1<<(QMAT_SHIFT - 3)) - bias - 1; threshold1= (1<<QMAT_SHIFT) - bias - 1;
threshold2= threshold1<<1; threshold2= (threshold1<<1);
for(;i<64;i++) { for(;i<64;i++) {
j = zigzag_direct[i]; j = zigzag_direct[i];
...@@ -2506,10 +2505,10 @@ static int dct_quantize_c(MpegEncContext *s, ...@@ -2506,10 +2505,10 @@ static int dct_quantize_c(MpegEncContext *s,
// || bias-level >= (1<<(QMAT_SHIFT - 3))){ // || bias-level >= (1<<(QMAT_SHIFT - 3))){
if(((unsigned)(level+threshold1))>threshold2){ if(((unsigned)(level+threshold1))>threshold2){
if(level>0){ if(level>0){
level= (bias + level)>>(QMAT_SHIFT - 3); level= (bias + level)>>QMAT_SHIFT;
block[j]= level; block[j]= level;
}else{ }else{
level= (bias - level)>>(QMAT_SHIFT - 3); level= (bias - level)>>QMAT_SHIFT;
block[j]= -level; block[j]= -level;
} }
max |=level; max |=level;
......
...@@ -30,8 +30,8 @@ enum OutputFormat { ...@@ -30,8 +30,8 @@ enum OutputFormat {
#define MPEG_BUF_SIZE (16 * 1024) #define MPEG_BUF_SIZE (16 * 1024)
#define QMAT_SHIFT_MMX 19 #define QMAT_SHIFT_MMX 16
#define QMAT_SHIFT 25 #define QMAT_SHIFT 22
#define MAX_FCODE 7 #define MAX_FCODE 7
#define MAX_MV 2048 #define MAX_MV 2048
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment