Commit 4bc5f516 authored by vitor's avatar vitor

Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@20884 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 874508af
...@@ -93,7 +93,102 @@ void ff_celp_lp_synthesis_filterf(float *out, ...@@ -93,7 +93,102 @@ void ff_celp_lp_synthesis_filterf(float *out,
{ {
int i,n; int i,n;
for (n = 0; n < buffer_length; n++) { float out0, out1, out2, out3;
float old_out0, old_out1, old_out2, old_out3;
float a,b,c;
a = filter_coeffs[0];
b = filter_coeffs[1];
c = filter_coeffs[2];
b -= filter_coeffs[0] * filter_coeffs[0];
c -= filter_coeffs[1] * filter_coeffs[0];
c -= filter_coeffs[0] * b;
old_out0 = out[-4];
old_out1 = out[-3];
old_out2 = out[-2];
old_out3 = out[-1];
for (n = 0; n <= buffer_length - 4; n+=4) {
float tmp0,tmp1,tmp2,tmp3;
float val;
out0 = in[0];
out1 = in[1];
out2 = in[2];
out3 = in[3];
out0 -= filter_coeffs[2] * old_out1;
out1 -= filter_coeffs[2] * old_out2;
out2 -= filter_coeffs[2] * old_out3;
out0 -= filter_coeffs[1] * old_out2;
out1 -= filter_coeffs[1] * old_out3;
out0 -= filter_coeffs[0] * old_out3;
val = filter_coeffs[3];
out0 -= val * old_out0;
out1 -= val * old_out1;
out2 -= val * old_out2;
out3 -= val * old_out3;
old_out3 = out[-5];
for (i = 5; i <= filter_length; i += 2) {
val = filter_coeffs[i-1];
out0 -= val * old_out3;
out1 -= val * old_out0;
out2 -= val * old_out1;
out3 -= val * old_out2;
old_out2 = out[-i-1];
val = filter_coeffs[i];
out0 -= val * old_out2;
out1 -= val * old_out3;
out2 -= val * old_out0;
out3 -= val * old_out1;
FFSWAP(float, old_out0, old_out2);
old_out1 = old_out3;
old_out3 = out[-i-2];
}
tmp0 = out0;
tmp1 = out1;
tmp2 = out2;
tmp3 = out3;
out3 -= a * tmp2;
out2 -= a * tmp1;
out1 -= a * tmp0;
out3 -= b * tmp1;
out2 -= b * tmp0;
out3 -= c * tmp0;
out[0] = out0;
out[1] = out1;
out[2] = out2;
out[3] = out3;
old_out0 = out0;
old_out1 = out1;
old_out2 = out2;
old_out3 = out3;
out += 4;
in += 4;
}
out -= n;
in -= n;
for (; n < buffer_length; n++) {
out[n] = in[n]; out[n] = in[n];
for (i = 1; i <= filter_length; i++) for (i = 1; i <= filter_length; i++)
out[n] -= filter_coeffs[i-1] * out[n-i]; out[n] -= filter_coeffs[i-1] * out[n-i];
......
...@@ -90,7 +90,8 @@ int ff_celp_lp_synthesis_filter(int16_t *out, ...@@ -90,7 +90,8 @@ int ff_celp_lp_synthesis_filter(int16_t *out,
* @param filter_coeffs filter coefficients. * @param filter_coeffs filter coefficients.
* @param in input signal * @param in input signal
* @param buffer_length amount of data to process * @param buffer_length amount of data to process
* @param filter_length filter length (10 for 10th order LP filter) * @param filter_length filter length (10 for 10th order LP filter). Must be
* greater than 4 and even.
* *
* @note Output buffer must contain filter_length samples of past * @note Output buffer must contain filter_length samples of past
* speech data before pointer. * speech data before pointer.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment