Commit 4c0bd362 authored by banan's avatar banan

Optimize by building the mdct window and multipying/adding at the same time.

Patch by Ian Braithwaite ian .. braithwaite . dk

[Ffmpeg-devel] WMA decoder speedup 2007-03-22 22:56


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@8526 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 0579355a
...@@ -302,7 +302,7 @@ int ff_wma_init(AVCodecContext * avctx, int flags2) ...@@ -302,7 +302,7 @@ int ff_wma_init(AVCodecContext * avctx, int flags2)
window = av_malloc(sizeof(float) * n); window = av_malloc(sizeof(float) * n);
alpha = M_PI / (2.0 * n); alpha = M_PI / (2.0 * n);
for(j=0;j<n;j++) { for(j=0;j<n;j++) {
window[n - j - 1] = sin((j + 0.5) * alpha); window[j] = sin((j + 0.5) * alpha);
} }
s->windows[i] = window; s->windows[i] = window;
} }
......
...@@ -112,7 +112,6 @@ typedef struct WMACodecContext { ...@@ -112,7 +112,6 @@ typedef struct WMACodecContext {
int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
MDCTContext mdct_ctx[BLOCK_NB_SIZES]; MDCTContext mdct_ctx[BLOCK_NB_SIZES];
float *windows[BLOCK_NB_SIZES]; float *windows[BLOCK_NB_SIZES];
DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); ///< temporary storage for imdct DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); ///< temporary storage for imdct
......
...@@ -316,6 +316,61 @@ static int decode_exp_vlc(WMACodecContext *s, int ch) ...@@ -316,6 +316,61 @@ static int decode_exp_vlc(WMACodecContext *s, int ch)
return 0; return 0;
} }
/**
* Apply MDCT window and add into output.
*
* We ensure that when the windows overlap their squared sum
* is always 1 (MDCT reconstruction rule).
*/
static void wma_window(WMACodecContext *s, float *out)
{
float *in = s->output;
int block_len, bsize, n;
/* left part */
if (s->block_len_bits <= s->prev_block_len_bits) {
block_len = s->block_len;
bsize = s->frame_len_bits - s->block_len_bits;
s->dsp.vector_fmul_add_add(out, in, s->windows[bsize],
out, 0, block_len, 1);
} else {
block_len = 1 << s->prev_block_len_bits;
n = (s->block_len - block_len) / 2;
bsize = s->frame_len_bits - s->prev_block_len_bits;
s->dsp.vector_fmul_add_add(out+n, in+n, s->windows[bsize],
out+n, 0, block_len, 1);
memcpy(out+n+block_len, in+n+block_len, n*sizeof(float));
}
out += s->block_len;
in += s->block_len;
/* right part */
if (s->block_len_bits <= s->next_block_len_bits) {
block_len = s->block_len;
bsize = s->frame_len_bits - s->block_len_bits;
s->dsp.vector_fmul_reverse(out, in, s->windows[bsize], block_len);
} else {
block_len = 1 << s->next_block_len_bits;
n = (s->block_len - block_len) / 2;
bsize = s->frame_len_bits - s->next_block_len_bits;
memcpy(out, in, n*sizeof(float));
s->dsp.vector_fmul_reverse(out+n, in+n, s->windows[bsize], block_len);
memset(out+n+block_len, 0, n*sizeof(float));
}
}
/** /**
* @return 0 if OK. 1 if last block of frame. return -1 if * @return 0 if OK. 1 if last block of frame. return -1 if
* unrecorrable error. * unrecorrable error.
...@@ -657,54 +712,8 @@ static int wma_decode_block(WMACodecContext *s) ...@@ -657,54 +712,8 @@ static int wma_decode_block(WMACodecContext *s)
} }
} }
/* build the window : we ensure that when the windows overlap
their squared sum is always 1 (MDCT reconstruction rule) */
/* XXX: merge with output */
{
int i, next_block_len, block_len, prev_block_len, n;
float *wptr;
block_len = s->block_len;
prev_block_len = 1 << s->prev_block_len_bits;
next_block_len = 1 << s->next_block_len_bits;
/* right part */
wptr = s->window + block_len;
if (block_len <= next_block_len) {
for(i=0;i<block_len;i++)
*wptr++ = s->windows[bsize][i];
} else {
/* overlap */
n = (block_len / 2) - (next_block_len / 2);
for(i=0;i<n;i++)
*wptr++ = 1.0;
for(i=0;i<next_block_len;i++)
*wptr++ = s->windows[s->frame_len_bits - s->next_block_len_bits][i];
for(i=0;i<n;i++)
*wptr++ = 0.0;
}
/* left part */
wptr = s->window + block_len;
if (block_len <= prev_block_len) {
for(i=0;i<block_len;i++)
*--wptr = s->windows[bsize][i];
} else {
/* overlap */
n = (block_len / 2) - (prev_block_len / 2);
for(i=0;i<n;i++)
*--wptr = 1.0;
for(i=0;i<prev_block_len;i++)
*--wptr = s->windows[s->frame_len_bits - s->prev_block_len_bits][i];
for(i=0;i<n;i++)
*--wptr = 0.0;
}
}
for(ch = 0; ch < s->nb_channels; ch++) { for(ch = 0; ch < s->nb_channels; ch++) {
if (s->channel_coded[ch]) { if (s->channel_coded[ch]) {
float *ptr;
int n4, index, n; int n4, index, n;
n = s->block_len; n = s->block_len;
...@@ -712,19 +721,14 @@ static int wma_decode_block(WMACodecContext *s) ...@@ -712,19 +721,14 @@ static int wma_decode_block(WMACodecContext *s)
s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
s->output, s->coefs[ch], s->mdct_tmp); s->output, s->coefs[ch], s->mdct_tmp);
/* XXX: optimize all that by build the window and
multipying/adding at the same time */
/* multiply by the window and add in the frame */ /* multiply by the window and add in the frame */
index = (s->frame_len / 2) + s->block_pos - n4; index = (s->frame_len / 2) + s->block_pos - n4;
ptr = &s->frame_out[ch][index]; wma_window(s, &s->frame_out[ch][index]);
s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
/* specific fast case for ms-stereo : add to second /* specific fast case for ms-stereo : add to second
channel if it is not coded */ channel if it is not coded */
if (s->ms_stereo && !s->channel_coded[1]) { if (s->ms_stereo && !s->channel_coded[1]) {
ptr = &s->frame_out[1][index]; wma_window(s, &s->frame_out[1][index]);
s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
} }
} }
} }
...@@ -779,9 +783,6 @@ static int wma_decode_frame(WMACodecContext *s, int16_t *samples) ...@@ -779,9 +783,6 @@ static int wma_decode_frame(WMACodecContext *s, int16_t *samples)
/* prepare for next block */ /* prepare for next block */
memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
s->frame_len * sizeof(float)); s->frame_len * sizeof(float));
/* XXX: suppress this */
memset(&s->frame_out[ch][s->frame_len], 0,
s->frame_len * sizeof(float));
} }
#ifdef TRACE #ifdef TRACE
......
...@@ -92,8 +92,8 @@ static void apply_window_and_mdct(AVCodecContext * avctx, signed short * audio, ...@@ -92,8 +92,8 @@ static void apply_window_and_mdct(AVCodecContext * avctx, signed short * audio,
memcpy(s->output, s->frame_out[channel], sizeof(float)*window_len); memcpy(s->output, s->frame_out[channel], sizeof(float)*window_len);
j = channel; j = channel;
for (i = 0; i < len; i++, j += avctx->channels){ for (i = 0; i < len; i++, j += avctx->channels){
s->output[i+window_len] = audio[j] / n * win[i]; s->output[i+window_len] = audio[j] / n * win[window_len - i - 1];
s->frame_out[channel][i] = audio[j] / n * win[window_len - i - 1]; s->frame_out[channel][i] = audio[j] / n * win[i];
} }
ff_mdct_calc(&s->mdct_ctx[window_index], s->coefs[channel], s->output, s->mdct_tmp); ff_mdct_calc(&s->mdct_ctx[window_index], s->coefs[channel], s->output, s->mdct_tmp);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment