wma-fixed: Forward port of fixed integer WMA v1/2 decoder.

The fixed integer WMA v1/2 decoder originates from the ffmpeg project and has been changed into a fixed integer decoder by the RockBox project. It was then adapted for vlc in the 0.8.6-neuros branch.

wma-fixed: Forward port of fixed integer WMA v1/2 decoder.
The fixed integer WMA v1/2 decoder originates from the ffmpeg project and has been changed into a fixed integer decoder by the RockBox project. It was then adapted for vlc in the 0.8.6-neuros branch.
807563ba · Jean-Paul Saman · a98b4976 · 807563ba · 807563ba · 807563ba
Commit 807563ba authored Jan 30, 2009 by Jean-Paul Saman
18 changed files
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,7 @@ New Decoders:
 * Blu-Ray Linear PCM
 * QCELP (Qualcomm PureVoice)
 * Real Video 3.0 & 4.0
+ * WMA v1/2 fixed point integer

 Demuxers:
 * Support for Dirac and RealVideo in Matroska files

--- a/configure.ac
+++ b/configure.ac
@@ -212,8 +212,7 @@ case "${host_os}" in
    VLC_ADD_LDFLAGS([libvlc],[-Wl,-framework,CoreFoundation])
    VLC_ADD_LDFLAGS([motion],[-Wl,-framework,IOKit,-framework,CoreFoundation])
    AC_ARG_ENABLE(macosx-defaults,
-      AS_HELP_STRING([--enable-macosx-defaults],[Build the default configuration
-		     on Mac OS X (default enabled)]))
+      AS_HELP_STRING([--enable-macosx-defaults],[Build the default configuration on Mac OS X (default enabled)]))
    if test "x${enable_macosx_defaults}" != "xno"
    then
        echo ""
@@ -1086,6 +1085,11 @@ case "${host_cpu}" in
  "")
    ARCH=unknown
    ;;
+  arm*)
+    dnl use arm assembly
+    VLC_ADD_CFLAGS([wma_fixed],[-DCPU_ARM])
+    ARCH="${host_cpu}"
+    ;;
  *)
    ARCH="${host_cpu}"
    ;;
@@ -2421,7 +2425,6 @@ then
    AC_DEFINE_UNQUOTED(VIDEODEV2_H_FILE, "${with_videodev2}", [Location of videodev2.h])
  fi

-
  AC_CACHE_CHECK([for new linux/videodev2.h],
      [ac_cv_new_linux_videodev2_h],
      [AC_TRY_COMPILE([
@@ -2889,6 +2892,16 @@ dnl

 AC_ARG_WITH(,[Codec plugins:])

+dnl
+dnl wmafixed plugin
+dnl
+AC_ARG_ENABLE(wma-fixed,
+  [  --enable-wma-fixed      libwma-fixed module (default disabled)])
+if test "${enable_wma_fixed}" = "yes"
+then
+  VLC_ADD_PLUGIN([wma_fixed])
+fi
+
 dnl
 dnl  mad plugin
 dnl
@@ -5906,6 +5919,7 @@ AC_CONFIG_FILES([
  modules/codec/dmo/Makefile
  modules/codec/subtitles/Makefile
  modules/codec/spudec/Makefile
+  modules/codec/wmafixed/Makefile
  modules/codec/xvmc/Makefile
  modules/control/Makefile
  modules/control/http/Makefile

--- a/modules/codec/Modules.am
+++ b/modules/codec/Modules.am
-SUBDIRS = cmml dmo avcodec subtitles spudec xvmc
+SUBDIRS = cmml dmo avcodec subtitles spudec wmafixed xvmc
 SOURCES_a52 = a52.c
 SOURCES_dts = dts.c
 SOURCES_flac = flac.c

--- a/modules/codec/wmafixed/Modules.am
+++ b/modules/codec/wmafixed/Modules.am
+SOURCES_wma_fixed = asf.h bswap.h fft.h mdct.h wma.c wmadeci.c bitstream.c \
+  wmadata.h wmafixed.c bitstream.h fft.c mdct.c \
+  wmadec.h wmafixed.h
--- a/modules/codec/wmafixed/asf.h
+++ b/modules/codec/wmafixed/asf.h
+/*****************************************************************************
+ * wma.c: wma decoder using integer decoder from Rockbox, based on FFmpeg
+ *****************************************************************************
+ * Copyright (C) 2008 M2X
+ *
+ * Authors: Rafaël Carré <rcarre@m2x.nl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifndef _ASF_H
+#define _ASF_H
+
+#include <inttypes.h>
+
+/* ASF codec IDs */
+#define ASF_CODEC_ID_WMAV1 0x160
+#define ASF_CODEC_ID_WMAV2 0x161
+
+struct asf_waveformatex_s {
+    uint32_t packet_size;
+    int audiostream;
+    uint16_t codec_id;
+    uint16_t channels;
+    uint32_t rate;
+    uint32_t bitrate;
+    uint16_t blockalign;
+    uint16_t bitspersample;
+    uint16_t datalen;
+    uint8_t data[6];
+};
+typedef struct asf_waveformatex_s asf_waveformatex_t;
+
+#endif
--- a/modules/codec/wmafixed/bitstream.c
+++ b/modules/codec/wmafixed/bitstream.c
+/*
+ * Common bit i/o utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file bitstream.c
+ * bitstream api.
+ */
+
+#include "bitstream.h"
+
+#include <stdio.h>
+
+#define DEBUGF printf
+
+/**
+ * Same as av_mallocz_static(), but does a realloc.
+ *
+ * @param[in] ptr The block of memory to reallocate.
+ * @param[in] size The requested size.
+ * @return Block of memory of requested size.
+ * @deprecated. Code which uses ff_realloc_static is broken/missdesigned
+ * and should correctly use static arrays
+ */
+attribute_deprecated void *ff_realloc_static(void *ptr, unsigned int size);
+
+const uint8_t ff_sqrt_tab[128]={
+        0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+        9, 9, 9, 9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11
+};
+
+const uint8_t ff_log2_tab[256]={
+        0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+        5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+        6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+        6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+void align_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    put_bits(s,(  - s->index) & 7,0);
+#else
+    put_bits(s,s->bit_left & 7,0);
+#endif
+}
+
+void ff_put_string(PutBitContext * pbc, char *s, int put_zero)
+{
+    while(*s){
+        put_bits(pbc, 8, *s);
+        s++;
+    }
+    if(put_zero)
+        put_bits(pbc, 8, 0);
+}
+
+/* VLC decoding */
+#define GET_DATA(v, table, i, wrap, size) \
+{\
+    const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
+    switch(size) {\
+    case 1:\
+        v = *(const uint8_t *)ptr;\
+        break;\
+    case 2:\
+        v = *(const uint16_t *)ptr;\
+        break;\
+    default:\
+        v = *(const uint32_t *)ptr;\
+        break;\
+    }\
+}
+
+static int alloc_table(VLC *vlc, int size)
+{
+    int index;
+    index = vlc->table_size;
+    vlc->table_size += size;
+    if (vlc->table_size > vlc->table_allocated) {
+        DEBUGF("Tried to allocate past the end of a Huffman table: %d/%d\n", 
+            vlc->table_allocated, vlc->table_allocated+(1 << vlc->bits));
+        vlc->table_allocated += (1 << vlc->bits);
+        if (!vlc->table)
+            return -1;
+    }
+    return index;
+}
+
+static int build_table(VLC *vlc, int table_nb_bits,
+                       int nb_codes,
+                       const void *bits, int bits_wrap, int bits_size,
+                       const void *codes, int codes_wrap, int codes_size,
+                       uint32_t code_prefix, int n_prefix)
+{
+    int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2;
+    uint32_t code;
+    int flags = 0;
+    VLC_TYPE (*table)[2];
+
+    table_size = 1 << table_nb_bits;
+    table_index = alloc_table(vlc, table_size);
+#ifdef DEBUG_VLC
+    printf("new table index=%d size=%d code_prefix=%x n=%d\n",
+           table_index, table_size, code_prefix, n_prefix);
+#endif
+    if (table_index < 0)
+        return -1;
+    table = &vlc->table[table_index];
+
+    for(i=0;i<table_size;i++) {
+        table[i][1] = 0; //bits
+        table[i][0] = -1; //codes
+    }
+
+    /* first pass: map codes and compute auxillary table sizes */
+    for(i=0;i<nb_codes;i++) {
+        GET_DATA(n, bits, i, bits_wrap, bits_size);
+        GET_DATA(code, codes, i, codes_wrap, codes_size);
+        /* we accept tables with holes */
+        if (n <= 0)
+            continue;
+#if defined(DEBUG_VLC) && 0
+        printf("i=%d n=%d code=0x%x\n", i, n, code);
+#endif
+        /* if code matches the prefix, it is in the table */
+        n -= n_prefix;
+        if(flags & INIT_VLC_LE)
+            code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (uint32_t)(1 << n_prefix)-1);
+        else
+            code_prefix2= code >> n;
+        if (n > 0 && (int)code_prefix2 == (int)code_prefix) {
+            if (n <= table_nb_bits) {
+                /* no need to add another table */
+                j = (code << (table_nb_bits - n)) & (table_size - 1);
+                nb = 1 << (table_nb_bits - n);
+                for(k=0;k<nb;k++) {
+                    if(flags & INIT_VLC_LE)
+                        j = (code >> n_prefix) + (k<<n);
+#ifdef DEBUG_VLC
+                    av_log(NULL, 0, "%4x: code=%d n=%d\n",
+                           j, i, n);
+#endif
+                    if (table[j][1] /*bits*/ != 0) {
+                        return -1;
+                    }
+                    table[j][1] = n; //bits
+                    table[j][0] = i; //code
+                    j++;
+                }
+            } else {
+                n -= table_nb_bits;
+                j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1);
+#ifdef DEBUG_VLC
+                av_log(NULL, 0,"%4x: n=%d (subtable)\n",
+                       j, n);
+#endif
+                /* compute table size */
+                n1 = -table[j][1]; //bits
+                if (n > n1)
+                    n1 = n;
+                table[j][1] = -n1; //bits
+            }
+        }
+    }
+
+    /* second pass : fill auxillary tables recursively */
+    for(i=0;i<table_size;i++) {
+        n = table[i][1]; //bits
+        if (n < 0) {
+            n = -n;
+            if (n > table_nb_bits) {
+                n = table_nb_bits;
+                table[i][1] = -n; //bits
+            }
+            index = build_table(vlc, n, nb_codes,
+                                bits, bits_wrap, bits_size,
+                                codes, codes_wrap, codes_size,
+                                (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i),
+                                n_prefix + table_nb_bits);
+            if (index < 0)
+                return -1;
+            /* note: realloc has been done, so reload tables */
+            table = &vlc->table[table_index];
+            table[i][0] = index; //code
+        }
+    }
+    return table_index;
+}
+
+/* Build VLC decoding tables suitable for use with get_vlc().
+
+   'nb_bits' set thee decoding table size (2^nb_bits) entries. The
+   bigger it is, the faster is the decoding. But it should not be too
+   big to save memory and L1 cache. '9' is a good compromise.
+
+   'nb_codes' : number of vlcs codes
+
+   'bits' : table which gives the size (in bits) of each vlc code.
+
+   'codes' : table which gives the bit pattern of of each vlc code.
+
+   'xxx_wrap' : give the number of bytes between each entry of the
+   'bits' or 'codes' tables.
+
+   'xxx_size' : gives the number of bytes of each entry of the 'bits'
+   or 'codes' tables.
+
+   'wrap' and 'size' allows to use any memory configuration and types
+   (byte/word/long) to store the 'bits' and 'codes' tables.
+
+   'use_static' should be set to 1 for tables, which should be freed
+   with av_free_static(), 0 if free_vlc() will be used.
+*/
+int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             int flags)
+{
+
+    vlc->bits = nb_bits;
+     vlc->table_size = 0;
+
+#ifdef DEBUG_VLC
+    printf("build table nb_codes=%d\n", nb_codes);
+#endif
+
+    if (build_table(vlc, nb_bits, nb_codes,
+                    bits, bits_wrap, bits_size,
+                    codes, codes_wrap, codes_size,
+                    0, 0) < 0) {
+        //av_free(vlc->table);
+        return -1;
+    }
+    /* return flags to block gcc warning while allowing us to keep
+     * consistent with ffmpeg's function parameters
+     */
+    return flags;
+}
--- a/modules/codec/wmafixed/bitstream.h
+++ b/modules/codec/wmafixed/bitstream.h
--- a/modules/codec/wmafixed/bswap.h
+++ b/modules/codec/wmafixed/bswap.h
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file bswap.h
+ * byte swap.
+ */
+
+#ifndef __BSWAP_H__
+#define __BSWAP_H__
+
+#ifdef HAVE_BYTESWAP_H
+#include <byteswap.h>
+#else
+
+#undef ROCKBOX
+#ifdef ROCKBOX
+
+/* rockbox' optimised inline functions */
+#define bswap_16(x) swap16(x)
+#define bswap_32(x) swap32(x)
+
+static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        struct {
+           uint32_t l,h;
+        } l;
+    } r;
+    r.l.l = bswap_32 (x);
+    r.l.h = bswap_32 (x>>32);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#elif defined(ARCH_X86)
+static inline unsigned short ByteSwap16(unsigned short x)
+{
+  __asm("xchgb %b0,%h0"	:
+        "=q" (x)	:
+        "0" (x));
+    return x;
+}
+#define bswap_16(x) ByteSwap16(x)
+
+static inline unsigned int ByteSwap32(unsigned int x)
+{
+#if __CPU__ > 386
+ __asm("bswap	%0":
+      "=r" (x)     :
+#else
+ __asm("xchgb	%b0,%h0\n"
+      "	rorl	$16,%0\n"
+      "	xchgb	%b0,%h0":
+      "=q" (x)		:
+#endif
+      "0" (x));
+  return x;
+}
+#define bswap_32(x) ByteSwap32(x)
+
+static inline unsigned long long int ByteSwap64(unsigned long long int x)
+{
+  register union { __extension__ uint64_t __ll;
+          uint32_t __l[2]; } __x;
+  asm("xchgl	%0,%1":
+      "=r"(__x.__l[0]),"=r"(__x.__l[1]):
+      "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
+  return __x.__ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#elif defined(ARCH_SH4)
+
+static inline uint16_t ByteSwap16(uint16_t x) {
+	__asm__("swap.b %0,%0":"=r"(x):"0"(x));
+	return x;
+}
+
+static inline uint32_t ByteSwap32(uint32_t x) {
+	__asm__(
+	"swap.b %0,%0\n"
+	"swap.w %0,%0\n"
+	"swap.b %0,%0\n"
+	:"=r"(x):"0"(x));
+	return x;
+}
+
+#define bswap_16(x) ByteSwap16(x)
+#define bswap_32(x) ByteSwap32(x)
+
+static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        struct {
+           uint32_t l,h;
+        } l;
+    } r;
+    r.l.l = bswap_32 (x);
+    r.l.h = bswap_32 (x>>32);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#else
+
+#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
+
+// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
+#define bswap_32(x) \
+     ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) | \
+      (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
+
+static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        uint32_t l[2]; 
+    } w, r;
+    w.ll = x;
+    r.l[0] = bswap_32 (w.l[1]);
+    r.l[1] = bswap_32 (w.l[0]);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#endif	/* !ARCH_X86 */
+
+#endif	/* !HAVE_BYTESWAP_H */
+
+// be2me ... BigEndian to MachineEndian
+// le2me ... LittleEndian to MachineEndian
+
+#ifdef WORDS_BIGENDIAN
+#define be2me_16(x) (x)
+#define be2me_32(x) (x)
+#define be2me_64(x) (x)
+#define le2me_16(x) bswap_16(x)
+#define le2me_32(x) bswap_32(x)
+#define le2me_64(x) bswap_64(x)
+#else
+#define be2me_16(x) bswap_16(x)
+#define be2me_32(x) bswap_32(x)
+#define be2me_64(x) bswap_64(x)
+#define le2me_16(x) (x)
+#define le2me_32(x) (x)
+#define le2me_64(x) (x)
+#endif
+
+#endif /* __BSWAP_H__ */
--- a/modules/codec/wmafixed/fft.c
+++ b/modules/codec/wmafixed/fft.c
+/*
+ * WMA compatible decoder
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <inttypes.h>
+#include "fft.h"
+#include "wmafixed.h"
+
+#define IBSS_ATTR
+#define ICONST_ATTR
+#define ICODE_ATTR
+
+FFTComplex  exptab0[512] IBSS_ATTR;
+
+/* butter fly op */
+#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
+{ \
+  int32_t ax, ay, bx, by; \
+  bx=pre1; \
+  by=pim1; \
+  ax=qre1; \
+  ay=qim1; \
+  pre = (bx + ax); \
+  pim = (by + ay); \
+  qre = (bx - ax); \
+  qim = (by - ay); \
+}
+
+
+int fft_calc_unscaled(FFTContext *s, FFTComplex *z)
+{
+    int ln = s->nbits;
+    int j, np, np2;
+    int nblocks, nloops;
+    register FFTComplex *p, *q;
+    int l;
+    int32_t tmp_re, tmp_im;
+    int tabshift = 10-ln;
+
+    np = 1 << ln;
+
+    /* pass 0 */
+    p=&z[0];
+    j=(np >> 1);
+    do
+    {
+        BF(p[0].re, p[0].im, p[1].re, p[1].im,
+           p[0].re, p[0].im, p[1].re, p[1].im);
+        p+=2;
+    }
+    while (--j != 0);
+
+    /* pass 1 */
+    p=&z[0];
+    j=np >> 2;
+    if (s->inverse)
+    {
+        do
+        {
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
+               p[0].re, p[0].im, p[2].re, p[2].im);
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
+               p[1].re, p[1].im, -p[3].im, p[3].re);
+            p+=4;
+        }
+        while (--j != 0);
+    }
+    else
+    {
+        do
+        {
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
+               p[0].re, p[0].im, p[2].re, p[2].im);
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
+               p[1].re, p[1].im, p[3].im, -p[3].re);
+            p+=4;
+        }
+        while (--j != 0);
+    }
+
+    /* pass 2 .. ln-1 */
+    nblocks = np >> 3;
+    nloops = 1 << 2;
+    np2 = np >> 1;
+    do
+    {
+        p = z;
+        q = z + nloops;
+        for (j = 0; j < nblocks; ++j)
+        {
+            BF(p->re, p->im, q->re, q->im,
+               p->re, p->im, q->re, q->im);
+
+            p++;
+            q++;
+            for(l = nblocks; l < np2; l += nblocks)
+            {
+                CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im);
+                //CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
+                BF(p->re, p->im, q->re, q->im,
+                   p->re, p->im, tmp_re, tmp_im);
+                p++;
+                q++;
+            }
+
+            p += nloops;
+            q += nloops;
+        }
+        nblocks = nblocks >> 1;
+        nloops = nloops << 1;
+    }
+    while (nblocks != 0);
+
+    return 0;
+}
+
+int fft_init_global(void)
+{
+    int i, n;
+    int32_t c1, s1, s2;
+
+    n=1<<10;
+    s2 = 1 ? 1 : -1;
+
+    for(i=0;i<(n/2);++i)
+    {
+        int32_t ifix = itofix32(i);
+        int32_t nfix = itofix32(n);
+        int32_t res = fixdiv32(ifix,nfix);
+
+        s1 = fsincos(res<<16, &c1);
+
+        exptab0[i].re = c1;
+        exptab0[i].im = s1*s2;
+    }
+
+    return 0;
+}
--- a/modules/codec/wmafixed/fft.h
+++ b/modules/codec/wmafixed/fft.h
+/*
+ * WMA compatible decoder
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef WMA_FFT_H
+#define WMA_FFT_H 1
+
+#include <inttypes.h>
+
+typedef int32_t FFTSample;
+
+typedef struct FFTComplex
+{
+    int32_t re, im;
+}
+FFTComplex;
+
+typedef struct FFTContext
+{
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *exptab;
+    FFTComplex *exptab1; /* only used by SSE code */
+    int (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+}
+FFTContext;
+
+int fft_calc_unscaled(FFTContext *s, FFTComplex *z);
+int fft_init_global(void);
+
+#endif
--- a/modules/codec/wmafixed/mdct.c
+++ b/modules/codec/wmafixed/mdct.c
+/*
+ * WMA compatible decoder
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <string.h>
+#include "wmafixed.h"
+#include "mdct.h"
+
+/*these are the sin and cos rotations used by the MDCT*/
+
+/*accessed too infrequently to give much speedup in IRAM*/
+
+int32_t *tcosarray[5], *tsinarray[5];
+int32_t tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64];
+int32_t tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
+
+uint16_t revtab0[1024];
+
+/**
+ * init MDCT or IMDCT computation.
+ */
+int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
+{
+    int n, n4, i;
+
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;            /* nbits ranges from 12 to 8 inclusive */
+    s->nbits = nbits;
+    s->n = n;
+    n4 = n >> 2;
+    s->tcos = tcosarray[12-nbits];
+    s->tsin = tsinarray[12-nbits];
+    for(i=0;i<n4;i++)
+    {
+        int32_t ip = itofix32(i) + 0x2000;
+        ip = ip >> nbits;
+
+        /*I can't remember why this works, but it seems
+          to agree for ~24 bits, maybe more!*/
+        s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i]));
+        s->tcos[i] *=-1;
+    }
+
+    (&s->fft)->nbits = nbits-2;
+    (&s->fft)->inverse = inverse;
+
+    return 0;
+
+}
+
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ * @param tmp N/2 samples
+ */
+void ff_imdct_calc(MDCTContext *s,
+                   int32_t *output,
+                   int32_t *input)
+{
+    int k, n8, n4, n2, n, j,scale;
+    const int32_t *tcos = s->tcos;
+    const int32_t *tsin = s->tsin;
+    const int32_t *in1, *in2;
+    FFTComplex *z1 = (FFTComplex *)output;
+    FFTComplex *z2 = (FFTComplex *)input;
+    int revtabshift = 12 - s->nbits;
+
+    n = 1 << s->nbits;
+
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+
+    for(k = 0; k < n4; k++)
+    {
+        j=revtab0[k<<revtabshift];
+        CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
+        in1 += 2;
+        in2 -= 2;
+    }
+
+    scale = fft_calc_unscaled(&s->fft, z1);
+
+    /* post rotation + reordering */
+    for(k = 0; k < n4; k++)
+    {
+        CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
+    }
+
+    for(k = 0; k < n8; k++)
+    {
+        int32_t r1,r2,r3,r4,r1n,r2n,r3n;
+
+        r1 = z2[n8 + k].im;
+        r1n = r1 * -1;
+        r2 = z2[n8-1-k].re;
+        r2n = r2 * -1;
+        r3 = z2[k+n8].re;
+        r3n = r3 * -1;
+        r4 = z2[n8-k-1].im;
+
+        output[2*k] = r1n;
+        output[n2-1-2*k] = r1;
+
+        output[2*k+1] = r2;
+        output[n2-1-2*k-1] = r2n;
+
+        output[n2 + 2*k]= r3n;
+        output[n-1- 2*k]= r3n;
+
+        output[n2 + 2*k+1]= r4;
+        output[n-2 - 2 * k] = r4;
+    }
+}
+
+/* init MDCT */
+
+int mdct_init_global(void)
+{
+    int i,j,m;
+
+    /* although seemingly degenerate, these cannot actually be merged together without
+       a substantial increase in error which is unjustified by the tiny memory savings*/
+
+    tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
+    tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
+
+    /* init the MDCT bit reverse table here rather then in fft_init */
+
+    for(i=0;i<1024;i++)           /*hard coded to a 2048 bit rotation*/
+    {                             /*smaller sizes can reuse the largest*/
+        m=0;
+        for(j=0;j<10;j++)
+        {
+            m |= ((i >> j) & 1) << (10-j-1);
+        }
+
+       revtab0[i]=m;
+    }
+
+    fft_init_global();
+
+    return 0;
+}
--- a/modules/codec/wmafixed/mdct.h
+++ b/modules/codec/wmafixed/mdct.h
+/*
+ * WMA compatible decoder
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef WMA_MDCT_H
+#define WMA_MDCT_H 1
+
+#include "fft.h"
+
+typedef struct MDCTContext
+{
+    int n;     /* size of MDCT (i.e. number of input data * 2) */
+    int nbits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    int32_t *tcos;
+    int32_t *tsin;
+    FFTContext fft;
+}
+MDCTContext;
+
+int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
+void ff_imdct_calc(MDCTContext *s, int32_t *output, int32_t *input);
+int mdct_init_global(void);
+
+#endif
--- a/modules/codec/wmafixed/wma.c
+++ b/modules/codec/wmafixed/wma.c
--- a/modules/codec/wmafixed/wmadata.h
+++ b/modules/codec/wmafixed/wmadata.h
--- a/modules/codec/wmafixed/wmadec.h
+++ b/modules/codec/wmafixed/wmadec.h
+/*
+ * WMA compatible decoder
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _WMADEC_H
+#define _WMADEC_H
+
+#include <inttypes.h>
+
+#include "asf.h"
+#include "bitstream.h" /* For GetBitContext */
+#include "mdct.h"
+
+#undef TRACE
+
+/* size of blocks */
+#define BLOCK_MIN_BITS 7
+#define BLOCK_MAX_BITS 11
+#define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS)
+
+#define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1)
+
+/* XXX: find exact max size */
+#define HIGH_BAND_MAX_SIZE 16
+
+#define NB_LSP_COEFS 10
+
+/* XXX: is it a suitable value ? */
+#define MAX_CODED_SUPERFRAME_SIZE 16384
+
+#define M_PI    3.14159265358979323846
+
+#define M_PI_F  0x3243f // in fixed 32 format
+#define TWO_M_PI_F  0x6487f   //in fixed 32
+
+#define MAX_CHANNELS 2
+
+#define NOISE_TAB_SIZE 8192
+
+#define LSP_POW_BITS 7
+
+typedef struct WMADecodeContext
+{
+    GetBitContext gb;
+
+    int nb_block_sizes;  /* number of block sizes */
+
+    int sample_rate;
+    int nb_channels;
+    int bit_rate;
+    int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */
+    int block_align;
+    int use_bit_reservoir;
+    int use_variable_block_len;
+    int use_exp_vlc;  /* exponent coding: 0 = lsp, 1 = vlc + delta */
+    int use_noise_coding; /* true if perceptual noise is added */
+    int byte_offset_bits;
+    VLC exp_vlc;
+    int exponent_sizes[BLOCK_NB_SIZES];
+    uint16_t exponent_bands[BLOCK_NB_SIZES][25];
+    int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */
+    int coefs_start;               /* first coded coef */
+    int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */
+    int exponent_high_sizes[BLOCK_NB_SIZES];
+    int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
+    VLC hgain_vlc;
+
+    /* coded values in high bands */
+    int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
+    int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
+
+    /* there are two possible tables for spectral coefficients */
+    VLC coef_vlc[2];
+    uint16_t *run_table[2];
+    uint16_t *level_table[2];
+    /* frame info */
+    int frame_len;       /* frame length in samples */
+    int frame_len_bits;  /* frame_len = 1 << frame_len_bits */
+
+    /* block info */
+    int reset_block_lengths;
+    int block_len_bits; /* log2 of current block length */
+    int next_block_len_bits; /* log2 of next block length */
+    int prev_block_len_bits; /* log2 of prev block length */
+    int block_len; /* block length in samples */
+    int block_num; /* block number in current frame */
+    int block_pos; /* current position in frame */
+    uint8_t ms_stereo; /* true if mid/side stereo mode */
+    uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */
+    int exponents_bsize[MAX_CHANNELS];      // log2 ratio frame/exp. length
+    int32_t exponents[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    int32_t max_exponent[MAX_CHANNELS];
+    int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    int32_t (*coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    MDCTContext mdct_ctx[BLOCK_NB_SIZES];
+    int32_t *windows[BLOCK_NB_SIZES];
+    /* output buffer for one frame and the last for IMDCT windowing */
+    int32_t frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
+    /* last frame info */
+    uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
+    int last_bitoffset;
+    int last_superframe_len;
+    int32_t *noise_table;
+    int noise_index;
+    int32_t noise_mult; /* XXX: suppress that and integrate it in the noise array */
+    /* lsp_to_curve tables */
+    int32_t lsp_cos_table[BLOCK_MAX_SIZE];
+    int64_t lsp_pow_e_table[256];
+    int32_t lsp_pow_m_table1[(1 << LSP_POW_BITS)];
+    int32_t lsp_pow_m_table2[(1 << LSP_POW_BITS)];
+
+    /* State of current superframe decoding */
+    int bit_offset;
+    int nb_frames;
+    int current_frame;
+
+#ifdef TRACE
+
+    int frame_count;
+#endif
+}
+WMADecodeContext;
+
+int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx);
+int wma_decode_superframe_init(WMADecodeContext* s,
+                               uint8_t *buf, int buf_size);
+int wma_decode_superframe_frame(WMADecodeContext* s,
+                                int32_t *samples,
+                                uint8_t *buf, int buf_size);
+#endif
--- a/modules/codec/wmafixed/wmadeci.c
+++ b/modules/codec/wmafixed/wmadeci.c
--- a/modules/codec/wmafixed/wmafixed.c
+++ b/modules/codec/wmafixed/wmafixed.c
+/****************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ *
+ * Copyright (C) 2007 Michael Giacomelli
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "wmadec.h"
+#include "wmafixed.h"
+
+int64_t IntTo64(int x){
+    int64_t res = 0;
+    unsigned char *p = (unsigned char *)&res;
+
+#ifdef ROCKBOX_BIG_ENDIAN
+    p[5] = x & 0xff;
+    p[4] = (x & 0xff00)>>8;
+    p[3] = (x & 0xff0000)>>16;
+    p[2] = (x & 0xff000000)>>24;
+#else
+    p[2] = x & 0xff;
+    p[3] = (x & 0xff00)>>8;
+    p[4] = (x & 0xff0000)>>16;
+    p[5] = (x & 0xff000000)>>24;
+#endif
+    return res;
+}
+
+int IntFrom64(int64_t x)
+{
+    int res = 0;
+    unsigned char *p = (unsigned char *)&x;
+
+#ifdef ROCKBOX_BIG_ENDIAN
+    res = p[5] | (p[4]<<8) | (p[3]<<16) | (p[2]<<24);
+#else
+    res = p[2] | (p[3]<<8) | (p[4]<<16) | (p[5]<<24);
+#endif
+    return res;
+}
+
+int32_t Fixed32From64(int64_t x)
+{
+  return x & 0xFFFFFFFF;
+}
+
+int64_t Fixed32To64(int32_t x)
+{
+  return (int64_t)x;
+}
+
+/*
+ * Not performance senstitive code here
+ */
+
+int64_t fixmul64byfixed(int64_t x, int32_t y)
+{
+    return (x * y);
+/*  return (int64_t) fixmul32(Fixed32From64(x),y); */
+}
+
+int32_t fixdiv32(int32_t x, int32_t y)
+{
+    int64_t temp;
+
+    if(x == 0)
+        return 0;
+    if(y == 0)
+        return 0x7fffffff;
+    temp = x;
+    temp <<= PRECISION;
+    return (int32_t)(temp / y);
+}
+
+int64_t fixdiv64(int64_t x, int64_t y)
+{
+    int64_t temp;
+
+    if(x == 0)
+        return 0;
+    if(y == 0)
+        return 0x07ffffffffffffffLL;
+    temp = x;
+    temp <<= PRECISION64;
+    return (int64_t)(temp / y);
+}
+
+int32_t fixsqrt32(int32_t x)
+{
+    unsigned long r = 0, s, v = (unsigned long)x;
+
+#define STEP(k) s = r + (1 << k * 2); r >>= 1; \
+    if (s <= v) { v -= s; r |= (1 << k * 2); }
+
+    STEP(15);
+    STEP(14);
+    STEP(13);
+    STEP(12);
+    STEP(11);
+    STEP(10);
+    STEP(9);
+    STEP(8);
+    STEP(7);
+    STEP(6);
+    STEP(5);
+    STEP(4);
+    STEP(3);
+    STEP(2);
+    STEP(1);
+    STEP(0);
+
+#undef STEP
+
+    return (int32_t)(r << (PRECISION / 2));
+}
+
+/* Inverse gain of circular cordic rotation in s0.31 format. */
+static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
+
+/* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
+static const unsigned long atan_table[] = {
+    0x1fffffff, /* +0.785398163 (or pi/4) */
+    0x12e4051d, /* +0.463647609 */
+    0x09fb385b, /* +0.244978663 */
+    0x051111d4, /* +0.124354995 */
+    0x028b0d43, /* +0.062418810 */
+    0x0145d7e1, /* +0.031239833 */
+    0x00a2f61e, /* +0.015623729 */
+    0x00517c55, /* +0.007812341 */
+    0x0028be53, /* +0.003906230 */
+    0x00145f2e, /* +0.001953123 */
+    0x000a2f98, /* +0.000976562 */
+    0x000517cc, /* +0.000488281 */
+    0x00028be6, /* +0.000244141 */
+    0x000145f3, /* +0.000122070 */
+    0x0000a2f9, /* +0.000061035 */
+    0x0000517c, /* +0.000030518 */
+    0x000028be, /* +0.000015259 */
+    0x0000145f, /* +0.000007629 */
+    0x00000a2f, /* +0.000003815 */
+    0x00000517, /* +0.000001907 */
+    0x0000028b, /* +0.000000954 */
+    0x00000145, /* +0.000000477 */
+    0x000000a2, /* +0.000000238 */
+    0x00000051, /* +0.000000119 */
+    0x00000028, /* +0.000000060 */
+    0x00000014, /* +0.000000030 */
+    0x0000000a, /* +0.000000015 */
+    0x00000005, /* +0.000000007 */
+    0x00000002, /* +0.000000004 */
+    0x00000001, /* +0.000000002 */
+    0x00000000, /* +0.000000001 */
+    0x00000000, /* +0.000000000 */
+};
+
+/*
+ *   Below here functions do not use standard fixed precision!
+ */
+
+/**
+ * Implements sin and cos using CORDIC rotation.
+ *
+ * @param phase has range from 0 to 0xffffffff, representing 0 and
+ *        2*pi respectively.
+ * @param cos return address for cos
+ * @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
+ *         representing -1 and 1 respectively.
+ *
+ *        Gives at least 24 bits precision (last 2-8 bits or so are probably off)
+ */
+long fsincos(unsigned long phase, int32_t *cos)
+{
+    int32_t x, x1, y, y1;
+    unsigned long z, z1;
+    int i;
+
+    /* Setup initial vector */
+    x = cordic_circular_gain;
+    y = 0;
+    z = phase;
+
+    /* The phase has to be somewhere between 0..pi for this to work right */
+    if (z < 0xffffffff / 4) {
+        /* z in first quadrant, z += pi/2 to correct */
+        x = -x;
+        z += 0xffffffff / 4;
+    } else if (z < 3 * (0xffffffff / 4)) {
+        /* z in third quadrant, z -= pi/2 to correct */
+        z -= 0xffffffff / 4;
+    } else {
+        /* z in fourth quadrant, z -= 3pi/2 to correct */
+        x = -x;
+        z -= 3 * (0xffffffff / 4);
+    }
+
+    /* Each iteration adds roughly 1-bit of extra precision */
+    for (i = 0; i < 31; i++) {
+        x1 = x >> i;
+        y1 = y >> i;
+        z1 = atan_table[i];
+
+        /* Decided which direction to rotate vector. Pivot point is pi/2 */
+        if (z >= 0xffffffff / 4) {
+            x -= y1;
+            y += x1;
+            z -= z1;
+        } else {
+            x += y1;
+            y -= x1;
+            z += z1;
+        }
+    }
+
+    if (cos)
+        *cos = x;
+
+    return y;
+}
--- a/modules/codec/wmafixed/wmafixed.h
+++ b/modules/codec/wmafixed/wmafixed.h
+/****************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ *
+ * Copyright (C) 2007 Michael Giacomelli
+ *
+ * All files in this archive are subject to the GNU General Public License.
+ * See the file COPYING in the source tree root for full license agreement.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+/*  fixed precision code.  We use a combination of Sign 15.16 and Sign.31
+    precision here.
+
+    The WMA decoder does not always follow this convention, and occasionally
+    renormalizes values to other formats in order to maximize precision.
+    However, only the two precisions above are provided in this file.
+
+*/
+
+#include <inttypes.h>
+
+#define PRECISION       16
+#define PRECISION64     16
+
+#define fixtof64(x)       (float)((float)(x) / (float)(1 << PRECISION64))        //does not work on int64_t!
+#define ftofix32(x)       ((int32_t)((x) * (float)(1 << PRECISION) + ((x) < 0 ? -0.5 : 0.5)))
+#define itofix64(x)       (IntTo64(x))
+#define itofix32(x)       ((x) << PRECISION)
+#define fixtoi32(x)       ((x) >> PRECISION)
+#define fixtoi64(x)       (IntFrom64(x))
+
+/*fixed functions*/
+
+int64_t IntTo64(int x);
+int IntFrom64(int64_t x);
+int32_t Fixed32From64(int64_t x);
+int64_t Fixed32To64(int32_t x);
+int64_t fixmul64byfixed(int64_t x, int32_t y);
+int32_t fixdiv32(int32_t x, int32_t y);
+int64_t fixdiv64(int64_t x, int64_t y);
+int32_t fixsqrt32(int32_t x);
+long fsincos(unsigned long phase, int32_t *cos);
+
+#ifdef CPU_ARM
+
+/*Sign-15.16 format */
+
+#define fixmul32(x, y)  \
+    ({ int32_t __hi;  \
+       uint32_t __lo;  \
+       int32_t __result;  \
+       asm ("smull   %0, %1, %3, %4\n\t"  \
+            "movs    %0, %0, lsr %5\n\t"  \
+            "adc    %2, %0, %1, lsl %6"  \
+            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
+            : "%r" (x), "r" (y),  \
+              "M" (PRECISION), "M" (32 - PRECISION)  \
+            : "cc");  \
+       __result;  \
+    })
+
+#define fixmul32b(x, y)  \
+    ({ int32_t __hi;  \
+       uint32_t __lo;  \
+       int32_t __result;  \
+       asm ("smull   %0, %1, %3, %4\n\t"  \
+            "movs    %2, %1, lsl #1"  \
+            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
+            : "%r" (x), "r" (y)  \
+            : "cc");  \
+       __result;  \
+    })
+
+#elif defined(CPU_COLDFIRE)
+
+static inline int32_t fixmul32(int32_t x, int32_t y)
+{
+#if PRECISION != 16
+#warning Coldfire fixmul32() only works for PRECISION == 16
+#endif
+    int32_t t1;
+    asm (
+        "mac.l   %[x], %[y], %%acc0  \n" /* multiply */
+        "mulu.l  %[y], %[x]      \n"     /* get lower half, avoid emac stall */
+        "movclr.l %%acc0, %[t1]  \n"     /* get higher half */
+        "lsr.l   #1, %[t1]       \n"
+        "move.w  %[t1], %[x]     \n"
+        "swap    %[x]            \n"
+        : [t1] "=&d" (t1), [x] "+d" (x)
+        : [y] "d"  (y)
+    );
+    return x;
+}
+
+static inline int32_t fixmul32b(int32_t x, int32_t y)
+{
+    asm (
+        "mac.l   %[x], %[y], %%acc0  \n" /* multiply */
+        "movclr.l %%acc0, %[x]  \n"     /* get higher half */
+        : [x] "+d" (x)
+        : [y] "d"  (y)
+    );
+    return x;
+}
+
+#else
+
+static inline int32_t fixmul32(int32_t x, int32_t y)
+{
+    int64_t temp;
+    temp = x;
+    temp *= y;
+
+    temp >>= PRECISION;
+
+    return (int32_t)temp;
+}
+
+static inline int32_t fixmul32b(int32_t x, int32_t y)
+{
+    int64_t temp;
+
+    temp = x;
+    temp *= y;
+
+    temp >>= 31;        //16+31-16 = 31 bits
+
+    return (int32_t)temp;
+}
+
+#endif
+
+#ifdef CPU_ARM
+static inline
+void CMUL(int32_t *x, int32_t *y,
+          int32_t  a, int32_t  b,
+          int32_t  t, int32_t  v)
+{
+    /* This version loses one bit of precision. Could be solved at the cost
+     * of 2 extra cycles if it becomes an issue. */
+    int x1, y1, l;
+    asm(
+        "smull    %[l], %[y1], %[b], %[t] \n"
+        "smlal    %[l], %[y1], %[a], %[v] \n"
+        "rsb      %[b], %[b], #0          \n"
+        "smull    %[l], %[x1], %[a], %[t] \n"
+        "smlal    %[l], %[x1], %[b], %[v] \n"
+        : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
+        : [a] "r" (a),   [t] "r" (t),    [v] "r" (v)
+        : "cc"
+    );
+    *x = x1 << 1;
+    *y = y1 << 1;
+}
+#elif defined CPU_COLDFIRE
+static inline
+void CMUL(int32_t *x, int32_t *y,
+          int32_t  a, int32_t  b,
+          int32_t  t, int32_t  v)
+{
+  asm volatile ("mac.l %[a], %[t], %%acc0;"
+                "msac.l %[b], %[v], %%acc0;"
+                "mac.l %[b], %[t], %%acc1;"
+                "mac.l %[a], %[v], %%acc1;"
+                "movclr.l %%acc0, %[a];"
+                "move.l %[a], (%[x]);"
+                "movclr.l %%acc1, %[a];"
+                "move.l %[a], (%[y]);"
+                : [a] "+&r" (a)
+                : [x] "a" (x), [y] "a" (y),
+                  [b] "r" (b), [t] "r" (t), [v] "r" (v)
+                : "cc", "memory");
+}
+#else
+static inline
+void CMUL(int32_t *pre,
+          int32_t *pim,
+          int32_t are,
+          int32_t aim,
+          int32_t bre,
+          int32_t bim)
+{
+    //int64_t x,y;
+    int32_t _aref = are;
+    int32_t _aimf = aim;
+    int32_t _bref = bre;
+    int32_t _bimf = bim;
+    int32_t _r1 = fixmul32b(_bref, _aref);
+    int32_t _r2 = fixmul32b(_bimf, _aimf);
+    int32_t _r3 = fixmul32b(_bref, _aimf);
+    int32_t _r4 = fixmul32b(_bimf, _aref);
+    *pre = _r1 - _r2;
+    *pim = _r3 + _r4;
+
+}
+#endif