Commit 807563ba authored by Jean-Paul Saman's avatar Jean-Paul Saman

wma-fixed: Forward port of fixed integer WMA v1/2 decoder.

The fixed integer WMA v1/2 decoder originates from the ffmpeg project and has been changed into a fixed integer decoder by the RockBox project. It was then adapted for vlc in the 0.8.6-neuros branch.
parent a98b4976
...@@ -15,6 +15,7 @@ New Decoders: ...@@ -15,6 +15,7 @@ New Decoders:
* Blu-Ray Linear PCM * Blu-Ray Linear PCM
* QCELP (Qualcomm PureVoice) * QCELP (Qualcomm PureVoice)
* Real Video 3.0 & 4.0 * Real Video 3.0 & 4.0
* WMA v1/2 fixed point integer
Demuxers: Demuxers:
* Support for Dirac and RealVideo in Matroska files * Support for Dirac and RealVideo in Matroska files
......
...@@ -31,7 +31,7 @@ dnl automake behavior, you've likely never heard of maintainer mode, so we ...@@ -31,7 +31,7 @@ dnl automake behavior, you've likely never heard of maintainer mode, so we
dnl can't expect you to enable it manually. dnl can't expect you to enable it manually.
AS_IF([test "x${enable_maintainer_mode}" != "xno"], AS_IF([test "x${enable_maintainer_mode}" != "xno"],
[enable_maintainer_mode="yes"]) [enable_maintainer_mode="yes"])
AM_MAINTAINER_MODE AM_MAINTAINER_MODE
dnl dnl
...@@ -212,8 +212,7 @@ case "${host_os}" in ...@@ -212,8 +212,7 @@ case "${host_os}" in
VLC_ADD_LDFLAGS([libvlc],[-Wl,-framework,CoreFoundation]) VLC_ADD_LDFLAGS([libvlc],[-Wl,-framework,CoreFoundation])
VLC_ADD_LDFLAGS([motion],[-Wl,-framework,IOKit,-framework,CoreFoundation]) VLC_ADD_LDFLAGS([motion],[-Wl,-framework,IOKit,-framework,CoreFoundation])
AC_ARG_ENABLE(macosx-defaults, AC_ARG_ENABLE(macosx-defaults,
AS_HELP_STRING([--enable-macosx-defaults],[Build the default configuration AS_HELP_STRING([--enable-macosx-defaults],[Build the default configuration on Mac OS X (default enabled)]))
on Mac OS X (default enabled)]))
if test "x${enable_macosx_defaults}" != "xno" if test "x${enable_macosx_defaults}" != "xno"
then then
echo "" echo ""
...@@ -1086,6 +1085,11 @@ case "${host_cpu}" in ...@@ -1086,6 +1085,11 @@ case "${host_cpu}" in
"") "")
ARCH=unknown ARCH=unknown
;; ;;
arm*)
dnl use arm assembly
VLC_ADD_CFLAGS([wma_fixed],[-DCPU_ARM])
ARCH="${host_cpu}"
;;
*) *)
ARCH="${host_cpu}" ARCH="${host_cpu}"
;; ;;
...@@ -1471,7 +1475,7 @@ dnl - Others: test should fail ...@@ -1471,7 +1475,7 @@ dnl - Others: test should fail
AC_CHECK_HEADERS(altivec.h) AC_CHECK_HEADERS(altivec.h)
CPPFLAGS="${CPPFLAGS_save}" CPPFLAGS="${CPPFLAGS_save}"
AS_IF([test "${ac_cv_c_altivec}" != "no"], [ AS_IF([test "${ac_cv_c_altivec}" != "no"], [
AC_DEFINE(CAN_COMPILE_C_ALTIVEC, 1, AC_DEFINE(CAN_COMPILE_C_ALTIVEC, 1,
[Define to 1 if C AltiVec extensions are available.]) [Define to 1 if C AltiVec extensions are available.])
...@@ -2202,9 +2206,9 @@ then ...@@ -2202,9 +2206,9 @@ then
if test "${SYS}" = "mingw32" -o "${SYS}" = "cygwin" if test "${SYS}" = "mingw32" -o "${SYS}" = "cygwin"
then then
test -z "${with_opencv_tree}" && AC_MSG_ERROR([You have to specify --with-opencv-tree]) test -z "${with_opencv_tree}" && AC_MSG_ERROR([You have to specify --with-opencv-tree])
AC_MSG_CHECKING(for opencv in ${with_opencv_tree}) AC_MSG_CHECKING(for opencv in ${with_opencv_tree})
if test -f ${with_opencv_tree}/cv/include/cv.h -a -f ${with_opencv_tree}/cxcore/include/cxcore.h \ if test -f ${with_opencv_tree}/cv/include/cv.h -a -f ${with_opencv_tree}/cxcore/include/cxcore.h \
-a -f ${with_opencv_tree}/cvaux/include/cvaux.h -a -f ${with_opencv_tree}/otherlibs/highgui/highgui.h -a -f ${with_opencv_tree}/cvaux/include/cvaux.h -a -f ${with_opencv_tree}/otherlibs/highgui/highgui.h
then then
AC_MSG_RESULT(yes) AC_MSG_RESULT(yes)
VLC_ADD_PLUGIN([opencv_wrapper]) VLC_ADD_PLUGIN([opencv_wrapper])
...@@ -2420,7 +2424,6 @@ then ...@@ -2420,7 +2424,6 @@ then
then then
AC_DEFINE_UNQUOTED(VIDEODEV2_H_FILE, "${with_videodev2}", [Location of videodev2.h]) AC_DEFINE_UNQUOTED(VIDEODEV2_H_FILE, "${with_videodev2}", [Location of videodev2.h])
fi fi
AC_CACHE_CHECK([for new linux/videodev2.h], AC_CACHE_CHECK([for new linux/videodev2.h],
[ac_cv_new_linux_videodev2_h], [ac_cv_new_linux_videodev2_h],
...@@ -2889,6 +2892,16 @@ dnl ...@@ -2889,6 +2892,16 @@ dnl
AC_ARG_WITH(,[Codec plugins:]) AC_ARG_WITH(,[Codec plugins:])
dnl
dnl wmafixed plugin
dnl
AC_ARG_ENABLE(wma-fixed,
[ --enable-wma-fixed libwma-fixed module (default disabled)])
if test "${enable_wma_fixed}" = "yes"
then
VLC_ADD_PLUGIN([wma_fixed])
fi
dnl dnl
dnl mad plugin dnl mad plugin
dnl dnl
...@@ -5906,6 +5919,7 @@ AC_CONFIG_FILES([ ...@@ -5906,6 +5919,7 @@ AC_CONFIG_FILES([
modules/codec/dmo/Makefile modules/codec/dmo/Makefile
modules/codec/subtitles/Makefile modules/codec/subtitles/Makefile
modules/codec/spudec/Makefile modules/codec/spudec/Makefile
modules/codec/wmafixed/Makefile
modules/codec/xvmc/Makefile modules/codec/xvmc/Makefile
modules/control/Makefile modules/control/Makefile
modules/control/http/Makefile modules/control/http/Makefile
......
SUBDIRS = cmml dmo avcodec subtitles spudec xvmc SUBDIRS = cmml dmo avcodec subtitles spudec wmafixed xvmc
SOURCES_a52 = a52.c SOURCES_a52 = a52.c
SOURCES_dts = dts.c SOURCES_dts = dts.c
SOURCES_flac = flac.c SOURCES_flac = flac.c
......
SOURCES_wma_fixed = asf.h bswap.h fft.h mdct.h wma.c wmadeci.c bitstream.c \
wmadata.h wmafixed.c bitstream.h fft.c mdct.c \
wmadec.h wmafixed.h
/*****************************************************************************
* wma.c: wma decoder using integer decoder from Rockbox, based on FFmpeg
*****************************************************************************
* Copyright (C) 2008 M2X
*
* Authors: Rafaël Carré <rcarre@m2x.nl>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef _ASF_H
#define _ASF_H
#include <inttypes.h>
/* ASF codec IDs */
#define ASF_CODEC_ID_WMAV1 0x160
#define ASF_CODEC_ID_WMAV2 0x161
struct asf_waveformatex_s {
uint32_t packet_size;
int audiostream;
uint16_t codec_id;
uint16_t channels;
uint32_t rate;
uint32_t bitrate;
uint16_t blockalign;
uint16_t bitspersample;
uint16_t datalen;
uint8_t data[6];
};
typedef struct asf_waveformatex_s asf_waveformatex_t;
#endif
/*
* Common bit i/o utils
* Copyright (c) 2000, 2001 Fabrice Bellard.
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
*/
/**
* @file bitstream.c
* bitstream api.
*/
#include "bitstream.h"
#include <stdio.h>
#define DEBUGF printf
/**
* Same as av_mallocz_static(), but does a realloc.
*
* @param[in] ptr The block of memory to reallocate.
* @param[in] size The requested size.
* @return Block of memory of requested size.
* @deprecated. Code which uses ff_realloc_static is broken/missdesigned
* and should correctly use static arrays
*/
attribute_deprecated void *ff_realloc_static(void *ptr, unsigned int size);
const uint8_t ff_sqrt_tab[128]={
0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11
};
const uint8_t ff_log2_tab[256]={
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
void align_put_bits(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
put_bits(s,( - s->index) & 7,0);
#else
put_bits(s,s->bit_left & 7,0);
#endif
}
void ff_put_string(PutBitContext * pbc, char *s, int put_zero)
{
while(*s){
put_bits(pbc, 8, *s);
s++;
}
if(put_zero)
put_bits(pbc, 8, 0);
}
/* VLC decoding */
#define GET_DATA(v, table, i, wrap, size) \
{\
const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
switch(size) {\
case 1:\
v = *(const uint8_t *)ptr;\
break;\
case 2:\
v = *(const uint16_t *)ptr;\
break;\
default:\
v = *(const uint32_t *)ptr;\
break;\
}\
}
static int alloc_table(VLC *vlc, int size)
{
int index;
index = vlc->table_size;
vlc->table_size += size;
if (vlc->table_size > vlc->table_allocated) {
DEBUGF("Tried to allocate past the end of a Huffman table: %d/%d\n",
vlc->table_allocated, vlc->table_allocated+(1 << vlc->bits));
vlc->table_allocated += (1 << vlc->bits);
if (!vlc->table)
return -1;
}
return index;
}
static int build_table(VLC *vlc, int table_nb_bits,
int nb_codes,
const void *bits, int bits_wrap, int bits_size,
const void *codes, int codes_wrap, int codes_size,
uint32_t code_prefix, int n_prefix)
{
int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2;
uint32_t code;
int flags = 0;
VLC_TYPE (*table)[2];
table_size = 1 << table_nb_bits;
table_index = alloc_table(vlc, table_size);
#ifdef DEBUG_VLC
printf("new table index=%d size=%d code_prefix=%x n=%d\n",
table_index, table_size, code_prefix, n_prefix);
#endif
if (table_index < 0)
return -1;
table = &vlc->table[table_index];
for(i=0;i<table_size;i++) {
table[i][1] = 0; //bits
table[i][0] = -1; //codes
}
/* first pass: map codes and compute auxillary table sizes */
for(i=0;i<nb_codes;i++) {
GET_DATA(n, bits, i, bits_wrap, bits_size);
GET_DATA(code, codes, i, codes_wrap, codes_size);
/* we accept tables with holes */
if (n <= 0)
continue;
#if defined(DEBUG_VLC) && 0
printf("i=%d n=%d code=0x%x\n", i, n, code);
#endif
/* if code matches the prefix, it is in the table */
n -= n_prefix;
if(flags & INIT_VLC_LE)
code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (uint32_t)(1 << n_prefix)-1);
else
code_prefix2= code >> n;
if (n > 0 && (int)code_prefix2 == (int)code_prefix) {
if (n <= table_nb_bits) {
/* no need to add another table */
j = (code << (table_nb_bits - n)) & (table_size - 1);
nb = 1 << (table_nb_bits - n);
for(k=0;k<nb;k++) {
if(flags & INIT_VLC_LE)
j = (code >> n_prefix) + (k<<n);
#ifdef DEBUG_VLC
av_log(NULL, 0, "%4x: code=%d n=%d\n",
j, i, n);
#endif
if (table[j][1] /*bits*/ != 0) {
return -1;
}
table[j][1] = n; //bits
table[j][0] = i; //code
j++;
}
} else {
n -= table_nb_bits;
j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1);
#ifdef DEBUG_VLC
av_log(NULL, 0,"%4x: n=%d (subtable)\n",
j, n);
#endif
/* compute table size */
n1 = -table[j][1]; //bits
if (n > n1)
n1 = n;
table[j][1] = -n1; //bits
}
}
}
/* second pass : fill auxillary tables recursively */
for(i=0;i<table_size;i++) {
n = table[i][1]; //bits
if (n < 0) {
n = -n;
if (n > table_nb_bits) {
n = table_nb_bits;
table[i][1] = -n; //bits
}
index = build_table(vlc, n, nb_codes,
bits, bits_wrap, bits_size,
codes, codes_wrap, codes_size,
(flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i),
n_prefix + table_nb_bits);
if (index < 0)
return -1;
/* note: realloc has been done, so reload tables */
table = &vlc->table[table_index];
table[i][0] = index; //code
}
}
return table_index;
}
/* Build VLC decoding tables suitable for use with get_vlc().
'nb_bits' set thee decoding table size (2^nb_bits) entries. The
bigger it is, the faster is the decoding. But it should not be too
big to save memory and L1 cache. '9' is a good compromise.
'nb_codes' : number of vlcs codes
'bits' : table which gives the size (in bits) of each vlc code.
'codes' : table which gives the bit pattern of of each vlc code.
'xxx_wrap' : give the number of bytes between each entry of the
'bits' or 'codes' tables.
'xxx_size' : gives the number of bytes of each entry of the 'bits'
or 'codes' tables.
'wrap' and 'size' allows to use any memory configuration and types
(byte/word/long) to store the 'bits' and 'codes' tables.
'use_static' should be set to 1 for tables, which should be freed
with av_free_static(), 0 if free_vlc() will be used.
*/
int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
const void *bits, int bits_wrap, int bits_size,
const void *codes, int codes_wrap, int codes_size,
int flags)
{
vlc->bits = nb_bits;
vlc->table_size = 0;
#ifdef DEBUG_VLC
printf("build table nb_codes=%d\n", nb_codes);
#endif
if (build_table(vlc, nb_bits, nb_codes,
bits, bits_wrap, bits_size,
codes, codes_wrap, codes_size,
0, 0) < 0) {
//av_free(vlc->table);
return -1;
}
/* return flags to block gcc warning while allowing us to keep
* consistent with ffmpeg's function parameters
*/
return flags;
}
This diff is collapsed.
/*
* copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file bswap.h
* byte swap.
*/
#ifndef __BSWAP_H__
#define __BSWAP_H__
#ifdef HAVE_BYTESWAP_H
#include <byteswap.h>
#else
#undef ROCKBOX
#ifdef ROCKBOX
/* rockbox' optimised inline functions */
#define bswap_16(x) swap16(x)
#define bswap_32(x) swap32(x)
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
struct {
uint32_t l,h;
} l;
} r;
r.l.l = bswap_32 (x);
r.l.h = bswap_32 (x>>32);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#elif defined(ARCH_X86)
static inline unsigned short ByteSwap16(unsigned short x)
{
__asm("xchgb %b0,%h0" :
"=q" (x) :
"0" (x));
return x;
}
#define bswap_16(x) ByteSwap16(x)
static inline unsigned int ByteSwap32(unsigned int x)
{
#if __CPU__ > 386
__asm("bswap %0":
"=r" (x) :
#else
__asm("xchgb %b0,%h0\n"
" rorl $16,%0\n"
" xchgb %b0,%h0":
"=q" (x) :
#endif
"0" (x));
return x;
}
#define bswap_32(x) ByteSwap32(x)
static inline unsigned long long int ByteSwap64(unsigned long long int x)
{
register union { __extension__ uint64_t __ll;
uint32_t __l[2]; } __x;
asm("xchgl %0,%1":
"=r"(__x.__l[0]),"=r"(__x.__l[1]):
"0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
return __x.__ll;
}
#define bswap_64(x) ByteSwap64(x)
#elif defined(ARCH_SH4)
static inline uint16_t ByteSwap16(uint16_t x) {
__asm__("swap.b %0,%0":"=r"(x):"0"(x));
return x;
}
static inline uint32_t ByteSwap32(uint32_t x) {
__asm__(
"swap.b %0,%0\n"
"swap.w %0,%0\n"
"swap.b %0,%0\n"
:"=r"(x):"0"(x));
return x;
}
#define bswap_16(x) ByteSwap16(x)
#define bswap_32(x) ByteSwap32(x)
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
struct {
uint32_t l,h;
} l;
} r;
r.l.l = bswap_32 (x);
r.l.h = bswap_32 (x>>32);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#else
#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
#define bswap_32(x) \
((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
(((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
uint32_t l[2];
} w, r;
w.ll = x;
r.l[0] = bswap_32 (w.l[1]);
r.l[1] = bswap_32 (w.l[0]);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#endif /* !ARCH_X86 */
#endif /* !HAVE_BYTESWAP_H */
// be2me ... BigEndian to MachineEndian
// le2me ... LittleEndian to MachineEndian
#ifdef WORDS_BIGENDIAN
#define be2me_16(x) (x)
#define be2me_32(x) (x)
#define be2me_64(x) (x)
#define le2me_16(x) bswap_16(x)
#define le2me_32(x) bswap_32(x)
#define le2me_64(x) bswap_64(x)
#else
#define be2me_16(x) bswap_16(x)
#define be2me_32(x) bswap_32(x)
#define be2me_64(x) bswap_64(x)
#define le2me_16(x) (x)
#define le2me_32(x) (x)
#define le2me_64(x) (x)
#endif
#endif /* __BSWAP_H__ */
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <inttypes.h>
#include "fft.h"
#include "wmafixed.h"
#define IBSS_ATTR
#define ICONST_ATTR
#define ICODE_ATTR
FFTComplex exptab0[512] IBSS_ATTR;
/* butter fly op */
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
{ \
int32_t ax, ay, bx, by; \
bx=pre1; \
by=pim1; \
ax=qre1; \
ay=qim1; \
pre = (bx + ax); \
pim = (by + ay); \
qre = (bx - ax); \
qim = (by - ay); \
}
int fft_calc_unscaled(FFTContext *s, FFTComplex *z)
{
int ln = s->nbits;
int j, np, np2;
int nblocks, nloops;
register FFTComplex *p, *q;
int l;
int32_t tmp_re, tmp_im;
int tabshift = 10-ln;
np = 1 << ln;
/* pass 0 */
p=&z[0];
j=(np >> 1);
do
{
BF(p[0].re, p[0].im, p[1].re, p[1].im,
p[0].re, p[0].im, p[1].re, p[1].im);
p+=2;
}
while (--j != 0);
/* pass 1 */
p=&z[0];
j=np >> 2;
if (s->inverse)
{
do
{
BF(p[0].re, p[0].im, p[2].re, p[2].im,
p[0].re, p[0].im, p[2].re, p[2].im);
BF(p[1].re, p[1].im, p[3].re, p[3].im,
p[1].re, p[1].im, -p[3].im, p[3].re);
p+=4;
}
while (--j != 0);
}
else
{
do
{
BF(p[0].re, p[0].im, p[2].re, p[2].im,
p[0].re, p[0].im, p[2].re, p[2].im);
BF(p[1].re, p[1].im, p[3].re, p[3].im,
p[1].re, p[1].im, p[3].im, -p[3].re);
p+=4;
}
while (--j != 0);
}
/* pass 2 .. ln-1 */
nblocks = np >> 3;
nloops = 1 << 2;
np2 = np >> 1;
do
{
p = z;
q = z + nloops;
for (j = 0; j < nblocks; ++j)
{
BF(p->re, p->im, q->re, q->im,
p->re, p->im, q->re, q->im);
p++;
q++;
for(l = nblocks; l < np2; l += nblocks)
{
CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im);
//CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
BF(p->re, p->im, q->re, q->im,
p->re, p->im, tmp_re, tmp_im);
p++;
q++;
}
p += nloops;
q += nloops;
}
nblocks = nblocks >> 1;
nloops = nloops << 1;
}
while (nblocks != 0);
return 0;
}
int fft_init_global(void)
{
int i, n;
int32_t c1, s1, s2;
n=1<<10;
s2 = 1 ? 1 : -1;
for(i=0;i<(n/2);++i)
{
int32_t ifix = itofix32(i);
int32_t nfix = itofix32(n);
int32_t res = fixdiv32(ifix,nfix);
s1 = fsincos(res<<16, &c1);
exptab0[i].re = c1;
exptab0[i].im = s1*s2;
}
return 0;
}
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef WMA_FFT_H
#define WMA_FFT_H 1
#include <inttypes.h>
typedef int32_t FFTSample;
typedef struct FFTComplex
{
int32_t re, im;
}
FFTComplex;
typedef struct FFTContext
{
int nbits;
int inverse;
uint16_t *revtab;
FFTComplex *exptab;
FFTComplex *exptab1; /* only used by SSE code */
int (*fft_calc)(struct FFTContext *s, FFTComplex *z);
}
FFTContext;
int fft_calc_unscaled(FFTContext *s, FFTComplex *z);
int fft_init_global(void);
#endif
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <string.h>
#include "wmafixed.h"
#include "mdct.h"
/*these are the sin and cos rotations used by the MDCT*/
/*accessed too infrequently to give much speedup in IRAM*/
int32_t *tcosarray[5], *tsinarray[5];
int32_t tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64];
int32_t tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
uint16_t revtab0[1024];
/**
* init MDCT or IMDCT computation.
*/
int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
{
int n, n4, i;
memset(s, 0, sizeof(*s));
n = 1 << nbits; /* nbits ranges from 12 to 8 inclusive */
s->nbits = nbits;
s->n = n;
n4 = n >> 2;
s->tcos = tcosarray[12-nbits];
s->tsin = tsinarray[12-nbits];
for(i=0;i<n4;i++)
{
int32_t ip = itofix32(i) + 0x2000;
ip = ip >> nbits;
/*I can't remember why this works, but it seems
to agree for ~24 bits, maybe more!*/
s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i]));
s->tcos[i] *=-1;
}
(&s->fft)->nbits = nbits-2;
(&s->fft)->inverse = inverse;
return 0;
}
/**
* Compute inverse MDCT of size N = 2^nbits
* @param output N samples
* @param input N/2 samples
* @param tmp N/2 samples
*/
void ff_imdct_calc(MDCTContext *s,
int32_t *output,
int32_t *input)
{
int k, n8, n4, n2, n, j,scale;
const int32_t *tcos = s->tcos;
const int32_t *tsin = s->tsin;
const int32_t *in1, *in2;
FFTComplex *z1 = (FFTComplex *)output;
FFTComplex *z2 = (FFTComplex *)input;
int revtabshift = 12 - s->nbits;
n = 1 << s->nbits;
n2 = n >> 1;
n4 = n >> 2;
n8 = n >> 3;
/* pre rotation */
in1 = input;
in2 = input + n2 - 1;
for(k = 0; k < n4; k++)
{
j=revtab0[k<<revtabshift];
CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
in1 += 2;
in2 -= 2;
}
scale = fft_calc_unscaled(&s->fft, z1);
/* post rotation + reordering */
for(k = 0; k < n4; k++)
{
CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
}
for(k = 0; k < n8; k++)
{
int32_t r1,r2,r3,r4,r1n,r2n,r3n;
r1 = z2[n8 + k].im;
r1n = r1 * -1;
r2 = z2[n8-1-k].re;
r2n = r2 * -1;
r3 = z2[k+n8].re;
r3n = r3 * -1;
r4 = z2[n8-k-1].im;
output[2*k] = r1n;
output[n2-1-2*k] = r1;
output[2*k+1] = r2;
output[n2-1-2*k-1] = r2n;
output[n2 + 2*k]= r3n;
output[n-1- 2*k]= r3n;
output[n2 + 2*k+1]= r4;
output[n-2 - 2 * k] = r4;
}
}
/* init MDCT */
int mdct_init_global(void)
{
int i,j,m;
/* although seemingly degenerate, these cannot actually be merged together without
a substantial increase in error which is unjustified by the tiny memory savings*/
tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
/* init the MDCT bit reverse table here rather then in fft_init */
for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/
{ /*smaller sizes can reuse the largest*/
m=0;
for(j=0;j<10;j++)
{
m |= ((i >> j) & 1) << (10-j-1);
}
revtab0[i]=m;
}
fft_init_global();
return 0;
}
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef WMA_MDCT_H
#define WMA_MDCT_H 1
#include "fft.h"
typedef struct MDCTContext
{
int n; /* size of MDCT (i.e. number of input data * 2) */
int nbits; /* n = 2^nbits */
/* pre/post rotation tables */
int32_t *tcos;
int32_t *tsin;
FFTContext fft;
}
MDCTContext;
int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
void ff_imdct_calc(MDCTContext *s, int32_t *output, int32_t *input);
int mdct_init_global(void);
#endif
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _WMADEC_H
#define _WMADEC_H
#include <inttypes.h>
#include "asf.h"
#include "bitstream.h" /* For GetBitContext */
#include "mdct.h"
#undef TRACE
/* size of blocks */
#define BLOCK_MIN_BITS 7
#define BLOCK_MAX_BITS 11
#define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS)
#define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1)
/* XXX: find exact max size */
#define HIGH_BAND_MAX_SIZE 16
#define NB_LSP_COEFS 10
/* XXX: is it a suitable value ? */
#define MAX_CODED_SUPERFRAME_SIZE 16384
#define M_PI 3.14159265358979323846
#define M_PI_F 0x3243f // in fixed 32 format
#define TWO_M_PI_F 0x6487f //in fixed 32
#define MAX_CHANNELS 2
#define NOISE_TAB_SIZE 8192
#define LSP_POW_BITS 7
typedef struct WMADecodeContext
{
GetBitContext gb;
int nb_block_sizes; /* number of block sizes */
int sample_rate;
int nb_channels;
int bit_rate;
int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */
int block_align;
int use_bit_reservoir;
int use_variable_block_len;
int use_exp_vlc; /* exponent coding: 0 = lsp, 1 = vlc + delta */
int use_noise_coding; /* true if perceptual noise is added */
int byte_offset_bits;
VLC exp_vlc;
int exponent_sizes[BLOCK_NB_SIZES];
uint16_t exponent_bands[BLOCK_NB_SIZES][25];
int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */
int coefs_start; /* first coded coef */
int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */
int exponent_high_sizes[BLOCK_NB_SIZES];
int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
VLC hgain_vlc;
/* coded values in high bands */
int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
/* there are two possible tables for spectral coefficients */
VLC coef_vlc[2];
uint16_t *run_table[2];
uint16_t *level_table[2];
/* frame info */
int frame_len; /* frame length in samples */
int frame_len_bits; /* frame_len = 1 << frame_len_bits */
/* block info */
int reset_block_lengths;
int block_len_bits; /* log2 of current block length */
int next_block_len_bits; /* log2 of next block length */
int prev_block_len_bits; /* log2 of prev block length */
int block_len; /* block length in samples */
int block_num; /* block number in current frame */
int block_pos; /* current position in frame */
uint8_t ms_stereo; /* true if mid/side stereo mode */
uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */
int exponents_bsize[MAX_CHANNELS]; // log2 ratio frame/exp. length
int32_t exponents[MAX_CHANNELS][BLOCK_MAX_SIZE];
int32_t max_exponent[MAX_CHANNELS];
int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
int32_t (*coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
MDCTContext mdct_ctx[BLOCK_NB_SIZES];
int32_t *windows[BLOCK_NB_SIZES];
/* output buffer for one frame and the last for IMDCT windowing */
int32_t frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
/* last frame info */
uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
int last_bitoffset;
int last_superframe_len;
int32_t *noise_table;
int noise_index;
int32_t noise_mult; /* XXX: suppress that and integrate it in the noise array */
/* lsp_to_curve tables */
int32_t lsp_cos_table[BLOCK_MAX_SIZE];
int64_t lsp_pow_e_table[256];
int32_t lsp_pow_m_table1[(1 << LSP_POW_BITS)];
int32_t lsp_pow_m_table2[(1 << LSP_POW_BITS)];
/* State of current superframe decoding */
int bit_offset;
int nb_frames;
int current_frame;
#ifdef TRACE
int frame_count;
#endif
}
WMADecodeContext;
int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx);
int wma_decode_superframe_init(WMADecodeContext* s,
uint8_t *buf, int buf_size);
int wma_decode_superframe_frame(WMADecodeContext* s,
int32_t *samples,
uint8_t *buf, int buf_size);
#endif
This diff is collapsed.
/****************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2007 Michael Giacomelli
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "wmadec.h"
#include "wmafixed.h"
int64_t IntTo64(int x){
int64_t res = 0;
unsigned char *p = (unsigned char *)&res;
#ifdef ROCKBOX_BIG_ENDIAN
p[5] = x & 0xff;
p[4] = (x & 0xff00)>>8;
p[3] = (x & 0xff0000)>>16;
p[2] = (x & 0xff000000)>>24;
#else
p[2] = x & 0xff;
p[3] = (x & 0xff00)>>8;
p[4] = (x & 0xff0000)>>16;
p[5] = (x & 0xff000000)>>24;
#endif
return res;
}
int IntFrom64(int64_t x)
{
int res = 0;
unsigned char *p = (unsigned char *)&x;
#ifdef ROCKBOX_BIG_ENDIAN
res = p[5] | (p[4]<<8) | (p[3]<<16) | (p[2]<<24);
#else
res = p[2] | (p[3]<<8) | (p[4]<<16) | (p[5]<<24);
#endif
return res;
}
int32_t Fixed32From64(int64_t x)
{
return x & 0xFFFFFFFF;
}
int64_t Fixed32To64(int32_t x)
{
return (int64_t)x;
}
/*
* Not performance senstitive code here
*/
int64_t fixmul64byfixed(int64_t x, int32_t y)
{
return (x * y);
/* return (int64_t) fixmul32(Fixed32From64(x),y); */
}
int32_t fixdiv32(int32_t x, int32_t y)
{
int64_t temp;
if(x == 0)
return 0;
if(y == 0)
return 0x7fffffff;
temp = x;
temp <<= PRECISION;
return (int32_t)(temp / y);
}
int64_t fixdiv64(int64_t x, int64_t y)
{
int64_t temp;
if(x == 0)
return 0;
if(y == 0)
return 0x07ffffffffffffffLL;
temp = x;
temp <<= PRECISION64;
return (int64_t)(temp / y);
}
int32_t fixsqrt32(int32_t x)
{
unsigned long r = 0, s, v = (unsigned long)x;
#define STEP(k) s = r + (1 << k * 2); r >>= 1; \
if (s <= v) { v -= s; r |= (1 << k * 2); }
STEP(15);
STEP(14);
STEP(13);
STEP(12);
STEP(11);
STEP(10);
STEP(9);
STEP(8);
STEP(7);
STEP(6);
STEP(5);
STEP(4);
STEP(3);
STEP(2);
STEP(1);
STEP(0);
#undef STEP
return (int32_t)(r << (PRECISION / 2));
}
/* Inverse gain of circular cordic rotation in s0.31 format. */
static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
/* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
static const unsigned long atan_table[] = {
0x1fffffff, /* +0.785398163 (or pi/4) */
0x12e4051d, /* +0.463647609 */
0x09fb385b, /* +0.244978663 */
0x051111d4, /* +0.124354995 */
0x028b0d43, /* +0.062418810 */
0x0145d7e1, /* +0.031239833 */
0x00a2f61e, /* +0.015623729 */
0x00517c55, /* +0.007812341 */
0x0028be53, /* +0.003906230 */
0x00145f2e, /* +0.001953123 */
0x000a2f98, /* +0.000976562 */
0x000517cc, /* +0.000488281 */
0x00028be6, /* +0.000244141 */
0x000145f3, /* +0.000122070 */
0x0000a2f9, /* +0.000061035 */
0x0000517c, /* +0.000030518 */
0x000028be, /* +0.000015259 */
0x0000145f, /* +0.000007629 */
0x00000a2f, /* +0.000003815 */
0x00000517, /* +0.000001907 */
0x0000028b, /* +0.000000954 */
0x00000145, /* +0.000000477 */
0x000000a2, /* +0.000000238 */
0x00000051, /* +0.000000119 */
0x00000028, /* +0.000000060 */
0x00000014, /* +0.000000030 */
0x0000000a, /* +0.000000015 */
0x00000005, /* +0.000000007 */
0x00000002, /* +0.000000004 */
0x00000001, /* +0.000000002 */
0x00000000, /* +0.000000001 */
0x00000000, /* +0.000000000 */
};
/*
* Below here functions do not use standard fixed precision!
*/
/**
* Implements sin and cos using CORDIC rotation.
*
* @param phase has range from 0 to 0xffffffff, representing 0 and
* 2*pi respectively.
* @param cos return address for cos
* @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
* representing -1 and 1 respectively.
*
* Gives at least 24 bits precision (last 2-8 bits or so are probably off)
*/
long fsincos(unsigned long phase, int32_t *cos)
{
int32_t x, x1, y, y1;
unsigned long z, z1;
int i;
/* Setup initial vector */
x = cordic_circular_gain;
y = 0;
z = phase;
/* The phase has to be somewhere between 0..pi for this to work right */
if (z < 0xffffffff / 4) {
/* z in first quadrant, z += pi/2 to correct */
x = -x;
z += 0xffffffff / 4;
} else if (z < 3 * (0xffffffff / 4)) {
/* z in third quadrant, z -= pi/2 to correct */
z -= 0xffffffff / 4;
} else {
/* z in fourth quadrant, z -= 3pi/2 to correct */
x = -x;
z -= 3 * (0xffffffff / 4);
}
/* Each iteration adds roughly 1-bit of extra precision */
for (i = 0; i < 31; i++) {
x1 = x >> i;
y1 = y >> i;
z1 = atan_table[i];
/* Decided which direction to rotate vector. Pivot point is pi/2 */
if (z >= 0xffffffff / 4) {
x -= y1;
y += x1;
z -= z1;
} else {
x += y1;
y -= x1;
z += z1;
}
}
if (cos)
*cos = x;
return y;
}
/****************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2007 Michael Giacomelli
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* fixed precision code. We use a combination of Sign 15.16 and Sign.31
precision here.
The WMA decoder does not always follow this convention, and occasionally
renormalizes values to other formats in order to maximize precision.
However, only the two precisions above are provided in this file.
*/
#include <inttypes.h>
#define PRECISION 16
#define PRECISION64 16
#define fixtof64(x) (float)((float)(x) / (float)(1 << PRECISION64)) //does not work on int64_t!
#define ftofix32(x) ((int32_t)((x) * (float)(1 << PRECISION) + ((x) < 0 ? -0.5 : 0.5)))
#define itofix64(x) (IntTo64(x))
#define itofix32(x) ((x) << PRECISION)
#define fixtoi32(x) ((x) >> PRECISION)
#define fixtoi64(x) (IntFrom64(x))
/*fixed functions*/
int64_t IntTo64(int x);
int IntFrom64(int64_t x);
int32_t Fixed32From64(int64_t x);
int64_t Fixed32To64(int32_t x);
int64_t fixmul64byfixed(int64_t x, int32_t y);
int32_t fixdiv32(int32_t x, int32_t y);
int64_t fixdiv64(int64_t x, int64_t y);
int32_t fixsqrt32(int32_t x);
long fsincos(unsigned long phase, int32_t *cos);
#ifdef CPU_ARM
/*Sign-15.16 format */
#define fixmul32(x, y) \
({ int32_t __hi; \
uint32_t __lo; \
int32_t __result; \
asm ("smull %0, %1, %3, %4\n\t" \
"movs %0, %0, lsr %5\n\t" \
"adc %2, %0, %1, lsl %6" \
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
: "%r" (x), "r" (y), \
"M" (PRECISION), "M" (32 - PRECISION) \
: "cc"); \
__result; \
})
#define fixmul32b(x, y) \
({ int32_t __hi; \
uint32_t __lo; \
int32_t __result; \
asm ("smull %0, %1, %3, %4\n\t" \
"movs %2, %1, lsl #1" \
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
: "%r" (x), "r" (y) \
: "cc"); \
__result; \
})
#elif defined(CPU_COLDFIRE)
static inline int32_t fixmul32(int32_t x, int32_t y)
{
#if PRECISION != 16
#warning Coldfire fixmul32() only works for PRECISION == 16
#endif
int32_t t1;
asm (
"mac.l %[x], %[y], %%acc0 \n" /* multiply */
"mulu.l %[y], %[x] \n" /* get lower half, avoid emac stall */
"movclr.l %%acc0, %[t1] \n" /* get higher half */
"lsr.l #1, %[t1] \n"
"move.w %[t1], %[x] \n"
"swap %[x] \n"
: [t1] "=&d" (t1), [x] "+d" (x)
: [y] "d" (y)
);
return x;
}
static inline int32_t fixmul32b(int32_t x, int32_t y)
{
asm (
"mac.l %[x], %[y], %%acc0 \n" /* multiply */
"movclr.l %%acc0, %[x] \n" /* get higher half */
: [x] "+d" (x)
: [y] "d" (y)
);
return x;
}
#else
static inline int32_t fixmul32(int32_t x, int32_t y)
{
int64_t temp;
temp = x;
temp *= y;
temp >>= PRECISION;
return (int32_t)temp;
}
static inline int32_t fixmul32b(int32_t x, int32_t y)
{
int64_t temp;
temp = x;
temp *= y;
temp >>= 31; //16+31-16 = 31 bits
return (int32_t)temp;
}
#endif
#ifdef CPU_ARM
static inline
void CMUL(int32_t *x, int32_t *y,
int32_t a, int32_t b,
int32_t t, int32_t v)
{
/* This version loses one bit of precision. Could be solved at the cost
* of 2 extra cycles if it becomes an issue. */
int x1, y1, l;
asm(
"smull %[l], %[y1], %[b], %[t] \n"
"smlal %[l], %[y1], %[a], %[v] \n"
"rsb %[b], %[b], #0 \n"
"smull %[l], %[x1], %[a], %[t] \n"
"smlal %[l], %[x1], %[b], %[v] \n"
: [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
: [a] "r" (a), [t] "r" (t), [v] "r" (v)
: "cc"
);
*x = x1 << 1;
*y = y1 << 1;
}
#elif defined CPU_COLDFIRE
static inline
void CMUL(int32_t *x, int32_t *y,
int32_t a, int32_t b,
int32_t t, int32_t v)
{
asm volatile ("mac.l %[a], %[t], %%acc0;"
"msac.l %[b], %[v], %%acc0;"
"mac.l %[b], %[t], %%acc1;"
"mac.l %[a], %[v], %%acc1;"
"movclr.l %%acc0, %[a];"
"move.l %[a], (%[x]);"
"movclr.l %%acc1, %[a];"
"move.l %[a], (%[y]);"
: [a] "+&r" (a)
: [x] "a" (x), [y] "a" (y),
[b] "r" (b), [t] "r" (t), [v] "r" (v)
: "cc", "memory");
}
#else
static inline
void CMUL(int32_t *pre,
int32_t *pim,
int32_t are,
int32_t aim,
int32_t bre,
int32_t bim)
{
//int64_t x,y;
int32_t _aref = are;
int32_t _aimf = aim;
int32_t _bref = bre;
int32_t _bimf = bim;
int32_t _r1 = fixmul32b(_bref, _aref);
int32_t _r2 = fixmul32b(_bimf, _aimf);
int32_t _r3 = fixmul32b(_bref, _aimf);
int32_t _r4 = fixmul32b(_bimf, _aref);
*pre = _r1 - _r2;
*pim = _r3 + _r4;
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment