Commit 13f21995 authored by mru's avatar mru

AAC: optimise bitstream reading in decode_spectrum_and_dequant()

Using the low-level macros directly avoids redundant open/update/close
cycles.

2-3% faster on ARM, PPC, and Core i7.

git-svn-id: file:///var/local/repositories/ffmpeg/trunk@21224 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent ede926ea
...@@ -993,6 +993,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -993,6 +993,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
const int cb_size = ff_aac_spectral_sizes[cbt_m1]; const int cb_size = ff_aac_spectral_sizes[cbt_m1];
OPEN_READER(re, gb);
switch (cbt_m1 >> 1) { switch (cbt_m1 >> 1) {
case 0: case 0:
...@@ -1001,15 +1002,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1001,15 +1002,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
int len = off_len; int len = off_len;
do { do {
const int index = get_vlc2(gb, vlc_tab, 8, 2); int code;
unsigned cb_idx; unsigned cb_idx;
if (index >= cb_size) { UPDATE_CACHE(re, gb);
err_idx = index; GET_VLC(code, re, gb, vlc_tab, 8, 2);
if (code >= cb_size) {
err_idx = code;
goto err_cb_overflow; goto err_cb_overflow;
} }
cb_idx = cb_vector_idx[index]; cb_idx = cb_vector_idx[code];
cf = VMUL4(cf, vq, cb_idx, sf + idx); cf = VMUL4(cf, vq, cb_idx, sf + idx);
} while (len -= 4); } while (len -= 4);
} }
...@@ -1021,19 +1025,26 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1021,19 +1025,26 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
int len = off_len; int len = off_len;
do { do {
const int index = get_vlc2(gb, vlc_tab, 8, 2); int code;
unsigned nnz; unsigned nnz;
unsigned cb_idx; unsigned cb_idx;
uint32_t bits; uint32_t bits;
if (index >= cb_size) { UPDATE_CACHE(re, gb);
err_idx = index; GET_VLC(code, re, gb, vlc_tab, 8, 2);
if (code >= cb_size) {
err_idx = code;
goto err_cb_overflow; goto err_cb_overflow;
} }
cb_idx = cb_vector_idx[index]; #if MIN_CACHE_BITS < 20
UPDATE_CACHE(re, gb);
#endif
cb_idx = cb_vector_idx[code];
nnz = cb_idx >> 8 & 15; nnz = cb_idx >> 8 & 15;
bits = get_bits(gb, nnz) << (32-nnz); bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
LAST_SKIP_BITS(re, gb, nnz);
cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
} while (len -= 4); } while (len -= 4);
} }
...@@ -1045,15 +1056,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1045,15 +1056,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
int len = off_len; int len = off_len;
do { do {
const int index = get_vlc2(gb, vlc_tab, 8, 2); int code;
unsigned cb_idx; unsigned cb_idx;
if (index >= cb_size) { UPDATE_CACHE(re, gb);
err_idx = index; GET_VLC(code, re, gb, vlc_tab, 8, 2);
if (code >= cb_size) {
err_idx = code;
goto err_cb_overflow; goto err_cb_overflow;
} }
cb_idx = cb_vector_idx[index]; cb_idx = cb_vector_idx[code];
cf = VMUL2(cf, vq, cb_idx, sf + idx); cf = VMUL2(cf, vq, cb_idx, sf + idx);
} while (len -= 2); } while (len -= 2);
} }
...@@ -1066,19 +1080,23 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1066,19 +1080,23 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
int len = off_len; int len = off_len;
do { do {
const int index = get_vlc2(gb, vlc_tab, 8, 2); int code;
unsigned nnz; unsigned nnz;
unsigned cb_idx; unsigned cb_idx;
unsigned sign; unsigned sign;
if (index >= cb_size) { UPDATE_CACHE(re, gb);
err_idx = index; GET_VLC(code, re, gb, vlc_tab, 8, 2);
if (code >= cb_size) {
err_idx = code;
goto err_cb_overflow; goto err_cb_overflow;
} }
cb_idx = cb_vector_idx[index]; cb_idx = cb_vector_idx[code];
nnz = cb_idx >> 8 & 15; nnz = cb_idx >> 8 & 15;
sign = get_bits(gb, nnz) << (cb_idx >> 12); sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
LAST_SKIP_BITS(re, gb, nnz);
cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
} while (len -= 2); } while (len -= 2);
} }
...@@ -1091,39 +1109,56 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1091,39 +1109,56 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
int len = off_len; int len = off_len;
do { do {
const int index = get_vlc2(gb, vlc_tab, 8, 2); int code;
unsigned nzt, nnz; unsigned nzt, nnz;
unsigned cb_idx; unsigned cb_idx;
uint32_t bits; uint32_t bits;
int j; int j;
if (!index) { UPDATE_CACHE(re, gb);
GET_VLC(code, re, gb, vlc_tab, 8, 2);
if (!code) {
*icf++ = 0; *icf++ = 0;
*icf++ = 0; *icf++ = 0;
continue; continue;
} }
if (index >= cb_size) { if (code >= cb_size) {
err_idx = index; err_idx = code;
goto err_cb_overflow; goto err_cb_overflow;
} }
cb_idx = cb_vector_idx[index]; cb_idx = cb_vector_idx[code];
nnz = cb_idx >> 12; nnz = cb_idx >> 12;
nzt = cb_idx >> 8; nzt = cb_idx >> 8;
bits = get_bits(gb, nnz) << (32-nnz); bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
LAST_SKIP_BITS(re, gb, nnz);
for (j = 0; j < 2; j++) { for (j = 0; j < 2; j++) {
if (nzt & 1<<j) { if (nzt & 1<<j) {
int n = 4; uint32_t b;
int n;
/* The total length of escape_sequence must be < 22 bits according /* The total length of escape_sequence must be < 22 bits according
to the specification (i.e. max is 111111110xxxxxxxxxxxx). */ to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
while (get_bits1(gb) && n < 13) n++; UPDATE_CACHE(re, gb);
if (n == 13) { b = GET_CACHE(re, gb);
b = 31 - av_log2(~b);
if (b > 8) {
av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
return -1; return -1;
} }
n = (1 << n) + get_bits(gb, n);
#if MIN_CACHE_BITS < 21
LAST_SKIP_BITS(re, gb, b + 1);
UPDATE_CACHE(re, gb);
#else
SKIP_BITS(re, gb, b + 1);
#endif
b += 4;
n = (1 << b) + SHOW_UBITS(re, gb, b);
LAST_SKIP_BITS(re, gb, b);
*icf++ = cbrt_tab[n] | (bits & 1<<31); *icf++ = cbrt_tab[n] | (bits & 1<<31);
bits <<= 1; bits <<= 1;
} else { } else {
...@@ -1138,6 +1173,8 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ...@@ -1138,6 +1173,8 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
} }
} }
CLOSE_READER(re, gb);
} }
} }
coef += g_len << 7; coef += g_len << 7;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment