Commit 807563ba authored by Jean-Paul Saman's avatar Jean-Paul Saman

wma-fixed: Forward port of fixed integer WMA v1/2 decoder.

The fixed integer WMA v1/2 decoder originates from the ffmpeg project and has been changed into a fixed integer decoder by the RockBox project. It was then adapted for vlc in the 0.8.6-neuros branch.
parent a98b4976
......@@ -15,6 +15,7 @@ New Decoders:
* Blu-Ray Linear PCM
* QCELP (Qualcomm PureVoice)
* Real Video 3.0 & 4.0
* WMA v1/2 fixed point integer
Demuxers:
* Support for Dirac and RealVideo in Matroska files
......
......@@ -31,7 +31,7 @@ dnl automake behavior, you've likely never heard of maintainer mode, so we
dnl can't expect you to enable it manually.
AS_IF([test "x${enable_maintainer_mode}" != "xno"],
[enable_maintainer_mode="yes"])
[enable_maintainer_mode="yes"])
AM_MAINTAINER_MODE
dnl
......@@ -212,8 +212,7 @@ case "${host_os}" in
VLC_ADD_LDFLAGS([libvlc],[-Wl,-framework,CoreFoundation])
VLC_ADD_LDFLAGS([motion],[-Wl,-framework,IOKit,-framework,CoreFoundation])
AC_ARG_ENABLE(macosx-defaults,
AS_HELP_STRING([--enable-macosx-defaults],[Build the default configuration
on Mac OS X (default enabled)]))
AS_HELP_STRING([--enable-macosx-defaults],[Build the default configuration on Mac OS X (default enabled)]))
if test "x${enable_macosx_defaults}" != "xno"
then
echo ""
......@@ -1086,6 +1085,11 @@ case "${host_cpu}" in
"")
ARCH=unknown
;;
arm*)
dnl use arm assembly
VLC_ADD_CFLAGS([wma_fixed],[-DCPU_ARM])
ARCH="${host_cpu}"
;;
*)
ARCH="${host_cpu}"
;;
......@@ -1471,7 +1475,7 @@ dnl - Others: test should fail
AC_CHECK_HEADERS(altivec.h)
CPPFLAGS="${CPPFLAGS_save}"
AS_IF([test "${ac_cv_c_altivec}" != "no"], [
AC_DEFINE(CAN_COMPILE_C_ALTIVEC, 1,
[Define to 1 if C AltiVec extensions are available.])
......@@ -2202,9 +2206,9 @@ then
if test "${SYS}" = "mingw32" -o "${SYS}" = "cygwin"
then
test -z "${with_opencv_tree}" && AC_MSG_ERROR([You have to specify --with-opencv-tree])
AC_MSG_CHECKING(for opencv in ${with_opencv_tree})
if test -f ${with_opencv_tree}/cv/include/cv.h -a -f ${with_opencv_tree}/cxcore/include/cxcore.h \
-a -f ${with_opencv_tree}/cvaux/include/cvaux.h -a -f ${with_opencv_tree}/otherlibs/highgui/highgui.h
AC_MSG_CHECKING(for opencv in ${with_opencv_tree})
if test -f ${with_opencv_tree}/cv/include/cv.h -a -f ${with_opencv_tree}/cxcore/include/cxcore.h \
-a -f ${with_opencv_tree}/cvaux/include/cvaux.h -a -f ${with_opencv_tree}/otherlibs/highgui/highgui.h
then
AC_MSG_RESULT(yes)
VLC_ADD_PLUGIN([opencv_wrapper])
......@@ -2420,7 +2424,6 @@ then
then
AC_DEFINE_UNQUOTED(VIDEODEV2_H_FILE, "${with_videodev2}", [Location of videodev2.h])
fi
AC_CACHE_CHECK([for new linux/videodev2.h],
[ac_cv_new_linux_videodev2_h],
......@@ -2889,6 +2892,16 @@ dnl
AC_ARG_WITH(,[Codec plugins:])
dnl
dnl wmafixed plugin
dnl
AC_ARG_ENABLE(wma-fixed,
[ --enable-wma-fixed libwma-fixed module (default disabled)])
if test "${enable_wma_fixed}" = "yes"
then
VLC_ADD_PLUGIN([wma_fixed])
fi
dnl
dnl mad plugin
dnl
......@@ -5906,6 +5919,7 @@ AC_CONFIG_FILES([
modules/codec/dmo/Makefile
modules/codec/subtitles/Makefile
modules/codec/spudec/Makefile
modules/codec/wmafixed/Makefile
modules/codec/xvmc/Makefile
modules/control/Makefile
modules/control/http/Makefile
......
SUBDIRS = cmml dmo avcodec subtitles spudec xvmc
SUBDIRS = cmml dmo avcodec subtitles spudec wmafixed xvmc
SOURCES_a52 = a52.c
SOURCES_dts = dts.c
SOURCES_flac = flac.c
......
SOURCES_wma_fixed = asf.h bswap.h fft.h mdct.h wma.c wmadeci.c bitstream.c \
wmadata.h wmafixed.c bitstream.h fft.c mdct.c \
wmadec.h wmafixed.h
/*****************************************************************************
* wma.c: wma decoder using integer decoder from Rockbox, based on FFmpeg
*****************************************************************************
* Copyright (C) 2008 M2X
*
* Authors: Rafaël Carré <rcarre@m2x.nl>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef _ASF_H
#define _ASF_H
#include <inttypes.h>
/* ASF codec IDs */
#define ASF_CODEC_ID_WMAV1 0x160
#define ASF_CODEC_ID_WMAV2 0x161
struct asf_waveformatex_s {
uint32_t packet_size;
int audiostream;
uint16_t codec_id;
uint16_t channels;
uint32_t rate;
uint32_t bitrate;
uint16_t blockalign;
uint16_t bitspersample;
uint16_t datalen;
uint8_t data[6];
};
typedef struct asf_waveformatex_s asf_waveformatex_t;
#endif
/*
* Common bit i/o utils
* Copyright (c) 2000, 2001 Fabrice Bellard.
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
*/
/**
* @file bitstream.c
* bitstream api.
*/
#include "bitstream.h"
#include <stdio.h>
#define DEBUGF printf
/**
* Same as av_mallocz_static(), but does a realloc.
*
* @param[in] ptr The block of memory to reallocate.
* @param[in] size The requested size.
* @return Block of memory of requested size.
* @deprecated. Code which uses ff_realloc_static is broken/missdesigned
* and should correctly use static arrays
*/
attribute_deprecated void *ff_realloc_static(void *ptr, unsigned int size);
const uint8_t ff_sqrt_tab[128]={
0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11
};
const uint8_t ff_log2_tab[256]={
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
void align_put_bits(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
put_bits(s,( - s->index) & 7,0);
#else
put_bits(s,s->bit_left & 7,0);
#endif
}
void ff_put_string(PutBitContext * pbc, char *s, int put_zero)
{
while(*s){
put_bits(pbc, 8, *s);
s++;
}
if(put_zero)
put_bits(pbc, 8, 0);
}
/* VLC decoding */
#define GET_DATA(v, table, i, wrap, size) \
{\
const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
switch(size) {\
case 1:\
v = *(const uint8_t *)ptr;\
break;\
case 2:\
v = *(const uint16_t *)ptr;\
break;\
default:\
v = *(const uint32_t *)ptr;\
break;\
}\
}
static int alloc_table(VLC *vlc, int size)
{
int index;
index = vlc->table_size;
vlc->table_size += size;
if (vlc->table_size > vlc->table_allocated) {
DEBUGF("Tried to allocate past the end of a Huffman table: %d/%d\n",
vlc->table_allocated, vlc->table_allocated+(1 << vlc->bits));
vlc->table_allocated += (1 << vlc->bits);
if (!vlc->table)
return -1;
}
return index;
}
static int build_table(VLC *vlc, int table_nb_bits,
int nb_codes,
const void *bits, int bits_wrap, int bits_size,
const void *codes, int codes_wrap, int codes_size,
uint32_t code_prefix, int n_prefix)
{
int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2;
uint32_t code;
int flags = 0;
VLC_TYPE (*table)[2];
table_size = 1 << table_nb_bits;
table_index = alloc_table(vlc, table_size);
#ifdef DEBUG_VLC
printf("new table index=%d size=%d code_prefix=%x n=%d\n",
table_index, table_size, code_prefix, n_prefix);
#endif
if (table_index < 0)
return -1;
table = &vlc->table[table_index];
for(i=0;i<table_size;i++) {
table[i][1] = 0; //bits
table[i][0] = -1; //codes
}
/* first pass: map codes and compute auxillary table sizes */
for(i=0;i<nb_codes;i++) {
GET_DATA(n, bits, i, bits_wrap, bits_size);
GET_DATA(code, codes, i, codes_wrap, codes_size);
/* we accept tables with holes */
if (n <= 0)
continue;
#if defined(DEBUG_VLC) && 0
printf("i=%d n=%d code=0x%x\n", i, n, code);
#endif
/* if code matches the prefix, it is in the table */
n -= n_prefix;
if(flags & INIT_VLC_LE)
code_prefix2= code & (n_prefix>=32 ? 0xffffffff : (uint32_t)(1 << n_prefix)-1);
else
code_prefix2= code >> n;
if (n > 0 && (int)code_prefix2 == (int)code_prefix) {
if (n <= table_nb_bits) {
/* no need to add another table */
j = (code << (table_nb_bits - n)) & (table_size - 1);
nb = 1 << (table_nb_bits - n);
for(k=0;k<nb;k++) {
if(flags & INIT_VLC_LE)
j = (code >> n_prefix) + (k<<n);
#ifdef DEBUG_VLC
av_log(NULL, 0, "%4x: code=%d n=%d\n",
j, i, n);
#endif
if (table[j][1] /*bits*/ != 0) {
return -1;
}
table[j][1] = n; //bits
table[j][0] = i; //code
j++;
}
} else {
n -= table_nb_bits;
j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1);
#ifdef DEBUG_VLC
av_log(NULL, 0,"%4x: n=%d (subtable)\n",
j, n);
#endif
/* compute table size */
n1 = -table[j][1]; //bits
if (n > n1)
n1 = n;
table[j][1] = -n1; //bits
}
}
}
/* second pass : fill auxillary tables recursively */
for(i=0;i<table_size;i++) {
n = table[i][1]; //bits
if (n < 0) {
n = -n;
if (n > table_nb_bits) {
n = table_nb_bits;
table[i][1] = -n; //bits
}
index = build_table(vlc, n, nb_codes,
bits, bits_wrap, bits_size,
codes, codes_wrap, codes_size,
(flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i),
n_prefix + table_nb_bits);
if (index < 0)
return -1;
/* note: realloc has been done, so reload tables */
table = &vlc->table[table_index];
table[i][0] = index; //code
}
}
return table_index;
}
/* Build VLC decoding tables suitable for use with get_vlc().
'nb_bits' set thee decoding table size (2^nb_bits) entries. The
bigger it is, the faster is the decoding. But it should not be too
big to save memory and L1 cache. '9' is a good compromise.
'nb_codes' : number of vlcs codes
'bits' : table which gives the size (in bits) of each vlc code.
'codes' : table which gives the bit pattern of of each vlc code.
'xxx_wrap' : give the number of bytes between each entry of the
'bits' or 'codes' tables.
'xxx_size' : gives the number of bytes of each entry of the 'bits'
or 'codes' tables.
'wrap' and 'size' allows to use any memory configuration and types
(byte/word/long) to store the 'bits' and 'codes' tables.
'use_static' should be set to 1 for tables, which should be freed
with av_free_static(), 0 if free_vlc() will be used.
*/
int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
const void *bits, int bits_wrap, int bits_size,
const void *codes, int codes_wrap, int codes_size,
int flags)
{
vlc->bits = nb_bits;
vlc->table_size = 0;
#ifdef DEBUG_VLC
printf("build table nb_codes=%d\n", nb_codes);
#endif
if (build_table(vlc, nb_bits, nb_codes,
bits, bits_wrap, bits_size,
codes, codes_wrap, codes_size,
0, 0) < 0) {
//av_free(vlc->table);
return -1;
}
/* return flags to block gcc warning while allowing us to keep
* consistent with ffmpeg's function parameters
*/
return flags;
}
/*
* copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file bitstream.h
* bitstream api header.
*/
#ifndef BITSTREAM_H
#define BITSTREAM_H
#define av_always_inline inline
#define attribute_deprecated
#include <inttypes.h>
#include <stdlib.h>
#ifdef CPU_ARM
#define CONFIG_ALIGN 1
#endif
#ifdef ROCKBOX_BIG_ENDIAN
#define WORDS_BIGENDIAN
#endif
#include "bswap.h"
extern const uint8_t ff_log2_tab[256];
/*misc utility functions added to make it compile */
static inline int av_log2(unsigned int v)
{
int n;
n = 0;
if (v & 0xffff0000) {
v >>= 16;
n += 16;
}
if (v & 0xff00) {
v >>= 8;
n += 8;
}
n += ff_log2_tab[v];
return n;
}
#if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
#define ALT_BITSTREAM_READER
#endif
//#define ALT_BITSTREAM_WRITER
//#define ALIGNED_BITSTREAM_WRITER
#if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
# ifdef ARCH_ARMV4L
# define A32_BITSTREAM_READER
# else
#define ALT_BITSTREAM_READER
//#define LIBMPEG2_BITSTREAM_READER
//#define A32_BITSTREAM_READER
# endif
#endif
#define LIBMPEG2_BITSTREAM_READER_HACK //add BERO
extern const uint8_t ff_reverse[256];
#if defined(ARCH_X86)
// avoid +32 for shift optimization (gcc should do that ...)
static inline int32_t NEG_SSR32( int32_t a, int8_t s){
asm ("sarl %1, %0\n\t"
: "+r" (a)
: "ic" ((uint8_t)(-s))
);
return a;
}
static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
asm ("shrl %1, %0\n\t"
: "+r" (a)
: "ic" ((uint8_t)(-s))
);
return a;
}
#else
# define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
# define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
#endif
/* bit output */
/* buf and buf_end must be present and used by every alternative writer. */
typedef struct PutBitContext {
#ifdef ALT_BITSTREAM_WRITER
uint8_t *buf, *buf_end;
int index;
#else
uint32_t bit_buf;
int bit_left;
uint8_t *buf, *buf_ptr, *buf_end;
#endif
} PutBitContext;
static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
{
if(buffer_size < 0) {
buffer_size = 0;
buffer = NULL;
}
s->buf = buffer;
s->buf_end = s->buf + buffer_size;
#ifdef ALT_BITSTREAM_WRITER
s->index=0;
((uint32_t*)(s->buf))[0]=0;
// memset(buffer, 0, buffer_size);
#else
s->buf_ptr = s->buf;
s->bit_left=32;
s->bit_buf=0;
#endif
}
/* return the number of bits output */
static inline int put_bits_count(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
return s->index;
#else
return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
#endif
}
/* pad the end of the output stream with zeros */
static inline void flush_put_bits(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
align_put_bits(s);
#else
s->bit_buf<<= s->bit_left;
while (s->bit_left < 32) {
/* XXX: should test end of buffer */
*s->buf_ptr++=s->bit_buf >> 24;
s->bit_buf<<=8;
s->bit_left+=8;
}
s->bit_left=32;
s->bit_buf=0;
#endif
}
void align_put_bits(PutBitContext *s);
void ff_put_string(PutBitContext * pbc, char *s, int put_zero);
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
const uint8_t *buffer, *buffer_end;
#ifdef ALT_BITSTREAM_READER
int index;
#elif defined LIBMPEG2_BITSTREAM_READER
uint8_t *buffer_ptr;
uint32_t cache;
int bit_count;
#elif defined A32_BITSTREAM_READER
uint32_t *buffer_ptr;
uint32_t cache0;
uint32_t cache1;
int bit_count;
#endif
int size_in_bits;
} GetBitContext;
#define VLC_TYPE int16_t
typedef struct VLC {
int bits;
VLC_TYPE (*table)[2]; /* code, bits */
int table_size, table_allocated;
} VLC;
typedef struct RL_VLC_ELEM {
int16_t level;
int8_t len;
uint8_t run;
} RL_VLC_ELEM;
#if defined(ARCH_SPARC) || defined(ARCH_ARMV4L) || defined(ARCH_MIPS) || defined(ARCH_BFIN)
#define UNALIGNED_STORES_ARE_BAD
#endif
/* used to avoid missaligned exceptions on some archs (alpha, ...) */
#if defined(ARCH_X86) || defined(CPU_COLDFIRE)
# define unaligned16(a) (*(const uint16_t*)(a))
# define unaligned32(a) (*(const uint32_t*)(a))
# define unaligned64(a) (*(const uint64_t*)(a))
#else
# ifdef __GNUC__
# define unaligned(x) \
static inline uint##x##_t unaligned##x(const void *v) { \
struct Unaligned { \
uint##x##_t i; \
} __attribute__((packed)); \
\
return ((const struct Unaligned *) v)->i; \
}
# elif defined(__DECC)
# define unaligned(x) \
static inline uint##x##_t unaligned##x(const void *v) { \
return *(const __unaligned uint##x##_t *) v; \
}
# else
# define unaligned(x) \
static inline uint##x##_t unaligned##x(const void *v) { \
return *(const uint##x##_t *) v; \
}
# endif
unaligned(16)
unaligned(32)
unaligned(64)
#undef unaligned
#endif /* defined(ARCH_X86) */
#ifndef ALT_BITSTREAM_WRITER
static inline void put_bits(PutBitContext *s, int n, unsigned int value)
{
unsigned int bit_buf;
int bit_left;
// printf("put_bits=%d %x\n", n, value);
// assert(n == 32 || value < (1U << n));
bit_buf = s->bit_buf;
bit_left = s->bit_left;
// printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
/* XXX: optimize */
if (n < bit_left) {
bit_buf = (bit_buf<<n) | value;
bit_left-=n;
} else {
bit_buf<<=bit_left;
bit_buf |= value >> (n - bit_left);
#ifdef UNALIGNED_STORES_ARE_BAD
if (3 & (intptr_t) s->buf_ptr) {
s->buf_ptr[0] = bit_buf >> 24;
s->buf_ptr[1] = bit_buf >> 16;
s->buf_ptr[2] = bit_buf >> 8;
s->buf_ptr[3] = bit_buf ;
} else
#endif
*(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
//printf("bitbuf = %08x\n", bit_buf);
s->buf_ptr+=4;
bit_left+=32 - n;
bit_buf = value;
}
s->bit_buf = bit_buf;
s->bit_left = bit_left;
}
#endif
#ifdef ALT_BITSTREAM_WRITER
static inline void put_bits(PutBitContext *s, int n, unsigned int value)
{
# ifdef ALIGNED_BITSTREAM_WRITER
# if defined(ARCH_X86)
asm volatile(
"movl %0, %%ecx \n\t"
"xorl %%eax, %%eax \n\t"
"shrdl %%cl, %1, %%eax \n\t"
"shrl %%cl, %1 \n\t"
"movl %0, %%ecx \n\t"
"shrl $3, %%ecx \n\t"
"andl $0xFFFFFFFC, %%ecx \n\t"
"bswapl %1 \n\t"
"orl %1, (%2, %%ecx) \n\t"
"bswapl %%eax \n\t"
"addl %3, %0 \n\t"
"movl %%eax, 4(%2, %%ecx) \n\t"
: "=&r" (s->index), "=&r" (value)
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
: "%eax", "%ecx"
);
# else
int index= s->index;
uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
value<<= 32-n;
ptr[0] |= be2me_32(value>>(index&31));
ptr[1] = be2me_32(value<<(32-(index&31)));
//if(n>24) printf("%d %d\n", n, value);
index+= n;
s->index= index;
# endif
# else //ALIGNED_BITSTREAM_WRITER
# if defined(ARCH_X86)
asm volatile(
"movl $7, %%ecx \n\t"
"andl %0, %%ecx \n\t"
"addl %3, %%ecx \n\t"
"negl %%ecx \n\t"
"shll %%cl, %1 \n\t"
"bswapl %1 \n\t"
"movl %0, %%ecx \n\t"
"shrl $3, %%ecx \n\t"
"orl %1, (%%ecx, %2) \n\t"
"addl %3, %0 \n\t"
"movl $0, 4(%%ecx, %2) \n\t"
: "=&r" (s->index), "=&r" (value)
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
: "%ecx"
);
# else
int index= s->index;
uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
ptr[1] = 0;
//if(n>24) printf("%d %d\n", n, value);
index+= n;
s->index= index;
# endif
# endif //!ALIGNED_BITSTREAM_WRITER
}
#endif
static inline uint8_t* pbBufPtr(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
return s->buf + (s->index>>3);
#else
return s->buf_ptr;
#endif
}
/**
*
* PutBitContext must be flushed & aligned to a byte boundary before calling this.
*/
static inline void skip_put_bytes(PutBitContext *s, int n){
// assert((put_bits_count(s)&7)==0);
#ifdef ALT_BITSTREAM_WRITER
FIXME may need some cleaning of the buffer
s->index += n<<3;
#else
// assert(s->bit_left==32);
s->buf_ptr += n;
#endif
}
/**
* skips the given number of bits.
* must only be used if the actual values in the bitstream dont matter
*/
static inline void skip_put_bits(PutBitContext *s, int n){
#ifdef ALT_BITSTREAM_WRITER
s->index += n;
#else
s->bit_left -= n;
s->buf_ptr-= s->bit_left>>5;
s->bit_left &= 31;
#endif
}
/**
* Changes the end of the buffer.
*/
static inline void set_put_bits_buffer_size(PutBitContext *s, int size){
s->buf_end= s->buf + size;
}
/* Bitstream reader API docs:
name
abritary name which is used as prefix for the internal variables
gb
getbitcontext
OPEN_READER(name, gb)
loads gb into local variables
CLOSE_READER(name, gb)
stores local vars in gb
UPDATE_CACHE(name, gb)
refills the internal cache from the bitstream
after this call at least MIN_CACHE_BITS will be available,
GET_CACHE(name, gb)
will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
SHOW_UBITS(name, gb, num)
will return the next num bits
SHOW_SBITS(name, gb, num)
will return the next num bits and do sign extension
SKIP_BITS(name, gb, num)
will skip over the next num bits
note, this is equivalent to SKIP_CACHE; SKIP_COUNTER
SKIP_CACHE(name, gb, num)
will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
SKIP_COUNTER(name, gb, num)
will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
LAST_SKIP_CACHE(name, gb, num)
will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
LAST_SKIP_BITS(name, gb, num)
is equivalent to SKIP_LAST_CACHE; SKIP_COUNTER
for examples see get_bits, show_bits, skip_bits, get_vlc
*/
static inline int unaligned32_be(const void *v)
{
#ifdef CONFIG_ALIGN
const uint8_t *p=v;
return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
#else
return be2me_32( unaligned32(v)); //original
#endif
}
static inline int unaligned32_le(const void *v)
{
#ifdef CONFIG_ALIGN
const uint8_t *p=v;
return (((p[3]<<8) | p[2])<<16) | (p[1]<<8) | (p[0]);
#else
return le2me_32( unaligned32(v)); //original
#endif
}
#ifdef ALT_BITSTREAM_READER
# define MIN_CACHE_BITS 25
# define OPEN_READER(name, gb)\
int name##_index= (gb)->index;\
int name##_cache= 0;\
# define CLOSE_READER(name, gb)\
(gb)->index= name##_index;\
# ifdef ALT_BITSTREAM_READER_LE
# define UPDATE_CACHE(name, gb)\
name##_cache= unaligned32_le( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\
# define SKIP_CACHE(name, gb, num)\
name##_cache >>= (num);
# else
# define UPDATE_CACHE(name, gb)\
name##_cache= unaligned32_be( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
# define SKIP_CACHE(name, gb, num)\
name##_cache <<= (num);
# endif
// FIXME name?
# define SKIP_COUNTER(name, gb, num)\
name##_index += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) ;
# ifdef ALT_BITSTREAM_READER_LE
# define SHOW_UBITS(name, gb, num)\
((name##_cache) & (NEG_USR32(0xffffffff,num)))
# define SHOW_SBITS(name, gb, num)\
NEG_SSR32((name##_cache)<<(32-(num)), num)
# else
# define SHOW_UBITS(name, gb, num)\
NEG_USR32(name##_cache, num)
# define SHOW_SBITS(name, gb, num)\
NEG_SSR32(name##_cache, num)
# endif
# define GET_CACHE(name, gb)\
((uint32_t)name##_cache)
static inline int get_bits_count(GetBitContext *s){
return s->index;
}
static inline void skip_bits_long(GetBitContext *s, int n){
s->index += n;
}
#elif defined LIBMPEG2_BITSTREAM_READER
//libmpeg2 like reader
# define MIN_CACHE_BITS 17
# define OPEN_READER(name, gb)\
int name##_bit_count=(gb)->bit_count;\
int name##_cache= (gb)->cache;\
uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
# define CLOSE_READER(name, gb)\
(gb)->bit_count= name##_bit_count;\
(gb)->cache= name##_cache;\
(gb)->buffer_ptr= name##_buffer_ptr;\
#ifdef LIBMPEG2_BITSTREAM_READER_HACK
# define UPDATE_CACHE(name, gb)\
if(name##_bit_count >= 0){\
name##_cache+= (int)be2me_16(*(uint16_t*)name##_buffer_ptr) << name##_bit_count;\
name##_buffer_ptr += 2;\
name##_bit_count-= 16;\
}\
#else
# define UPDATE_CACHE(name, gb)\
if(name##_bit_count >= 0){\
name##_cache+= ((name##_buffer_ptr[0]<<8) + name##_buffer_ptr[1]) << name##_bit_count;\
name##_buffer_ptr+=2;\
name##_bit_count-= 16;\
}\
#endif
# define SKIP_CACHE(name, gb, num)\
name##_cache <<= (num);\
# define SKIP_COUNTER(name, gb, num)\
name##_bit_count += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
# define SHOW_UBITS(name, gb, num)\
NEG_USR32(name##_cache, num)
# define SHOW_SBITS(name, gb, num)\
NEG_SSR32(name##_cache, num)
# define GET_CACHE(name, gb)\
((uint32_t)name##_cache)
static inline int get_bits_count(GetBitContext *s){
return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
}
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s)
re_bit_count += n;
re_buffer_ptr += 2*(re_bit_count>>4);
re_bit_count &= 15;
re_cache = ((re_buffer_ptr[-2]<<8) + re_buffer_ptr[-1]) << (16+re_bit_count);
UPDATE_CACHE(re, s)
CLOSE_READER(re, s)
}
#elif defined A32_BITSTREAM_READER
# define MIN_CACHE_BITS 32
# define OPEN_READER(name, gb)\
int name##_bit_count=(gb)->bit_count;\
uint32_t name##_cache0= (gb)->cache0;\
uint32_t name##_cache1= (gb)->cache1;\
uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
# define CLOSE_READER(name, gb)\
(gb)->bit_count= name##_bit_count;\
(gb)->cache0= name##_cache0;\
(gb)->cache1= name##_cache1;\
(gb)->buffer_ptr= name##_buffer_ptr;\
# define UPDATE_CACHE(name, gb)\
if(name##_bit_count > 0){\
const uint32_t next= be2me_32( *name##_buffer_ptr );\
name##_cache0 |= NEG_USR32(next,name##_bit_count);\
name##_cache1 |= next<<name##_bit_count;\
name##_buffer_ptr++;\
name##_bit_count-= 32;\
}\
#if defined(ARCH_X86)
# define SKIP_CACHE(name, gb, num)\
asm(\
"shldl %2, %1, %0 \n\t"\
"shll %2, %1 \n\t"\
: "+r" (name##_cache0), "+r" (name##_cache1)\
: "Ic" ((uint8_t)(num))\
);
#else
# define SKIP_CACHE(name, gb, num)\
name##_cache0 <<= (num);\
name##_cache0 |= NEG_USR32(name##_cache1,num);\
name##_cache1 <<= (num);
#endif
# define SKIP_COUNTER(name, gb, num)\
name##_bit_count += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
# define SHOW_UBITS(name, gb, num)\
NEG_USR32(name##_cache0, num)
# define SHOW_SBITS(name, gb, num)\
NEG_SSR32(name##_cache0, num)
# define GET_CACHE(name, gb)\
(name##_cache0)
static inline int get_bits_count(GetBitContext *s){
return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
}
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s)
re_bit_count += n;
re_buffer_ptr += re_bit_count>>5;
re_bit_count &= 31;
re_cache0 = be2me_32( re_buffer_ptr[-1] ) << re_bit_count;
re_cache1 = 0;
UPDATE_CACHE(re, s)
CLOSE_READER(re, s)
}
#endif
/**
* read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
* if MSB not set it is negative
* @param n length in bits
* @author BERO
*/
static inline int get_xbits(GetBitContext *s, int n){
register int sign;
register int32_t cache;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
cache = GET_CACHE(re,s);
sign=(~cache)>>31;
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
}
static inline int get_sbits(GetBitContext *s, int n){
register int tmp;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
tmp= SHOW_SBITS(re, s, n);
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
return tmp;
}
/**
* reads 1-17 bits.
* Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
*/
static inline unsigned int get_bits(GetBitContext *s, int n){
register int tmp;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
tmp= SHOW_UBITS(re, s, n);
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
return tmp;
}
/**
* shows 1-17 bits.
* Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
*/
static inline unsigned int show_bits(GetBitContext *s, int n){
register int tmp;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
tmp= SHOW_UBITS(re, s, n);
// CLOSE_READER(re, s)
return tmp;
}
static inline void skip_bits(GetBitContext *s, int n){
/* Note: gcc seems to optimize this to s->index+=n for the ALT_READER :)) */
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
}
static inline unsigned int get_bits1(GetBitContext *s){
#ifdef ALT_BITSTREAM_READER
int index= s->index;
uint8_t result= s->buffer[ index>>3 ];
#ifdef ALT_BITSTREAM_READER_LE
result>>= (index&0x07);
result&= 1;
#else
result<<= (index&0x07);
result>>= 8 - 1;
#endif
index++;
s->index= index;
return result;
#else
return get_bits(s, 1);
#endif
}
static inline unsigned int show_bits1(GetBitContext *s){
return show_bits(s, 1);
}
static inline void skip_bits1(GetBitContext *s){
skip_bits(s, 1);
}
/**
* reads 0-32 bits.
*/
static inline unsigned int get_bits_long(GetBitContext *s, int n){
if(n<=17) return get_bits(s, n);
else{
#ifdef ALT_BITSTREAM_READER_LE
int ret= get_bits(s, 16);
return ret | (get_bits(s, n-16) << 16);
#else
int ret= get_bits(s, 16) << (n-16);
return ret | get_bits(s, n-16);
#endif
}
}
/**
* shows 0-32 bits.
*/
static inline unsigned int show_bits_long(GetBitContext *s, int n){
if(n<=17) return show_bits(s, n);
else{
GetBitContext gb= *s;
int ret= get_bits_long(s, n);
*s= gb;
return ret;
}
}
/*
static inline int check_marker(GetBitContext *s, const char *msg)
{
int bit= get_bits1(s);
if(!bit)
av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
return bit;
}
*/
/**
* init GetBitContext.
* @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
* because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
* @param bit_size the size of the buffer in bits
*/
static inline void init_get_bits(GetBitContext *s,
const uint8_t *buffer, int bit_size)
{
int buffer_size= (bit_size+7)>>3;
if(buffer_size < 0 || bit_size < 0) {
buffer_size = bit_size = 0;
buffer = NULL;
}
s->buffer= buffer;
s->size_in_bits= bit_size;
s->buffer_end= buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index=0;
#elif defined LIBMPEG2_BITSTREAM_READER
s->buffer_ptr = (uint8_t*)((intptr_t)buffer&(~1));
s->bit_count = 16 + 8*((intptr_t)buffer&1);
skip_bits_long(s, 0);
#elif defined A32_BITSTREAM_READER
s->buffer_ptr = (uint32_t*)((intptr_t)buffer&(~3));
s->bit_count = 32 + 8*((intptr_t)buffer&3);
skip_bits_long(s, 0);
#endif
}
static inline void align_get_bits(GetBitContext *s)
{
int n= (-get_bits_count(s)) & 7;
if(n) skip_bits(s, n);
}
int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
const void *bits, int bits_wrap, int bits_size,
const void *codes, int codes_wrap, int codes_size,
int flags);
#define INIT_VLC_USE_STATIC 1
#define INIT_VLC_LE 2
void free_vlc(VLC *vlc);
/**
*
* if the vlc code is invalid and max_depth=1 than no bits will be removed
* if the vlc code is invalid and max_depth>1 than the number of bits removed
* is undefined
*/
#define GET_VLC(code, name, gb, table, bits, max_depth)\
{\
int n, index, nb_bits;\
\
index= SHOW_UBITS(name, gb, bits);\
code = table[index][0];\
n = table[index][1];\
\
if(max_depth > 1 && n < 0){\
LAST_SKIP_BITS(name, gb, bits)\
UPDATE_CACHE(name, gb)\
\
nb_bits = -n;\
\
index= SHOW_UBITS(name, gb, nb_bits) + code;\
code = table[index][0];\
n = table[index][1];\
if(max_depth > 2 && n < 0){\
LAST_SKIP_BITS(name, gb, nb_bits)\
UPDATE_CACHE(name, gb)\
\
nb_bits = -n;\
\
index= SHOW_UBITS(name, gb, nb_bits) + code;\
code = table[index][0];\
n = table[index][1];\
}\
}\
SKIP_BITS(name, gb, n)\
}
#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
{\
int n, index, nb_bits;\
\
index= SHOW_UBITS(name, gb, bits);\
level = table[index].level;\
n = table[index].len;\
\
if(max_depth > 1 && n < 0){\
SKIP_BITS(name, gb, bits)\
if(need_update){\
UPDATE_CACHE(name, gb)\
}\
\
nb_bits = -n;\
\
index= SHOW_UBITS(name, gb, nb_bits) + level;\
level = table[index].level;\
n = table[index].len;\
}\
run= table[index].run;\
SKIP_BITS(name, gb, n)\
}
/**
* parses a vlc code, faster then get_vlc()
* @param bits is the number of bits which will be read at once, must be
* identical to nb_bits in init_vlc()
* @param max_depth is the number of times bits bits must be read to completely
* read the longest vlc code
* = (max_vlc_length + bits - 1) / bits
*/
static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
int bits, int max_depth)
{
int code;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
GET_VLC(code, re, s, table, bits, max_depth)
CLOSE_READER(re, s)
return code;
}
#ifdef TRACE
static inline void print_bin(int bits, int n){
int i;
for(i=n-1; i>=0; i--){
av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1);
}
for(i=n; i<24; i++)
av_log(NULL, AV_LOG_DEBUG, " ");
}
static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
int r= get_bits(s, n);
print_bin(r, n);
av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
return r;
}
static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){
int show= show_bits(s, 24);
int pos= get_bits_count(s);
int r= get_vlc2(s, table, bits, max_depth);
int len= get_bits_count(s) - pos;
int bits2= show>>(24-len);
print_bin(bits2, len);
av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
return r;
}
static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
int show= show_bits(s, n);
int r= get_xbits(s, n);
print_bin(show, n);
av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
return r;
}
#define get_bits(s, n) get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define get_bits1(s) get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define get_vlc(s, vlc) get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define tprintf(p, ...) av_log(p, AV_LOG_DEBUG, __VA_ARGS__)
#else //TRACE
#define tprintf(p, ...) {}
#endif
static inline int decode012(GetBitContext *gb){
int n;
n = get_bits1(gb);
if (n == 0)
return 0;
else
return get_bits1(gb) + 1;
}
#endif /* BITSTREAM_H */
/*
* copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file bswap.h
* byte swap.
*/
#ifndef __BSWAP_H__
#define __BSWAP_H__
#ifdef HAVE_BYTESWAP_H
#include <byteswap.h>
#else
#undef ROCKBOX
#ifdef ROCKBOX
/* rockbox' optimised inline functions */
#define bswap_16(x) swap16(x)
#define bswap_32(x) swap32(x)
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
struct {
uint32_t l,h;
} l;
} r;
r.l.l = bswap_32 (x);
r.l.h = bswap_32 (x>>32);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#elif defined(ARCH_X86)
static inline unsigned short ByteSwap16(unsigned short x)
{
__asm("xchgb %b0,%h0" :
"=q" (x) :
"0" (x));
return x;
}
#define bswap_16(x) ByteSwap16(x)
static inline unsigned int ByteSwap32(unsigned int x)
{
#if __CPU__ > 386
__asm("bswap %0":
"=r" (x) :
#else
__asm("xchgb %b0,%h0\n"
" rorl $16,%0\n"
" xchgb %b0,%h0":
"=q" (x) :
#endif
"0" (x));
return x;
}
#define bswap_32(x) ByteSwap32(x)
static inline unsigned long long int ByteSwap64(unsigned long long int x)
{
register union { __extension__ uint64_t __ll;
uint32_t __l[2]; } __x;
asm("xchgl %0,%1":
"=r"(__x.__l[0]),"=r"(__x.__l[1]):
"0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
return __x.__ll;
}
#define bswap_64(x) ByteSwap64(x)
#elif defined(ARCH_SH4)
static inline uint16_t ByteSwap16(uint16_t x) {
__asm__("swap.b %0,%0":"=r"(x):"0"(x));
return x;
}
static inline uint32_t ByteSwap32(uint32_t x) {
__asm__(
"swap.b %0,%0\n"
"swap.w %0,%0\n"
"swap.b %0,%0\n"
:"=r"(x):"0"(x));
return x;
}
#define bswap_16(x) ByteSwap16(x)
#define bswap_32(x) ByteSwap32(x)
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
struct {
uint32_t l,h;
} l;
} r;
r.l.l = bswap_32 (x);
r.l.h = bswap_32 (x>>32);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#else
#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
#define bswap_32(x) \
((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
(((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
uint32_t l[2];
} w, r;
w.ll = x;
r.l[0] = bswap_32 (w.l[1]);
r.l[1] = bswap_32 (w.l[0]);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#endif /* !ARCH_X86 */
#endif /* !HAVE_BYTESWAP_H */
// be2me ... BigEndian to MachineEndian
// le2me ... LittleEndian to MachineEndian
#ifdef WORDS_BIGENDIAN
#define be2me_16(x) (x)
#define be2me_32(x) (x)
#define be2me_64(x) (x)
#define le2me_16(x) bswap_16(x)
#define le2me_32(x) bswap_32(x)
#define le2me_64(x) bswap_64(x)
#else
#define be2me_16(x) bswap_16(x)
#define be2me_32(x) bswap_32(x)
#define be2me_64(x) bswap_64(x)
#define le2me_16(x) (x)
#define le2me_32(x) (x)
#define le2me_64(x) (x)
#endif
#endif /* __BSWAP_H__ */
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <inttypes.h>
#include "fft.h"
#include "wmafixed.h"
#define IBSS_ATTR
#define ICONST_ATTR
#define ICODE_ATTR
FFTComplex exptab0[512] IBSS_ATTR;
/* butter fly op */
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
{ \
int32_t ax, ay, bx, by; \
bx=pre1; \
by=pim1; \
ax=qre1; \
ay=qim1; \
pre = (bx + ax); \
pim = (by + ay); \
qre = (bx - ax); \
qim = (by - ay); \
}
int fft_calc_unscaled(FFTContext *s, FFTComplex *z)
{
int ln = s->nbits;
int j, np, np2;
int nblocks, nloops;
register FFTComplex *p, *q;
int l;
int32_t tmp_re, tmp_im;
int tabshift = 10-ln;
np = 1 << ln;
/* pass 0 */
p=&z[0];
j=(np >> 1);
do
{
BF(p[0].re, p[0].im, p[1].re, p[1].im,
p[0].re, p[0].im, p[1].re, p[1].im);
p+=2;
}
while (--j != 0);
/* pass 1 */
p=&z[0];
j=np >> 2;
if (s->inverse)
{
do
{
BF(p[0].re, p[0].im, p[2].re, p[2].im,
p[0].re, p[0].im, p[2].re, p[2].im);
BF(p[1].re, p[1].im, p[3].re, p[3].im,
p[1].re, p[1].im, -p[3].im, p[3].re);
p+=4;
}
while (--j != 0);
}
else
{
do
{
BF(p[0].re, p[0].im, p[2].re, p[2].im,
p[0].re, p[0].im, p[2].re, p[2].im);
BF(p[1].re, p[1].im, p[3].re, p[3].im,
p[1].re, p[1].im, p[3].im, -p[3].re);
p+=4;
}
while (--j != 0);
}
/* pass 2 .. ln-1 */
nblocks = np >> 3;
nloops = 1 << 2;
np2 = np >> 1;
do
{
p = z;
q = z + nloops;
for (j = 0; j < nblocks; ++j)
{
BF(p->re, p->im, q->re, q->im,
p->re, p->im, q->re, q->im);
p++;
q++;
for(l = nblocks; l < np2; l += nblocks)
{
CMUL(&tmp_re, &tmp_im, exptab0[(l<<tabshift)].re, exptab0[(l<<tabshift)].im, q->re, q->im);
//CMUL(&tmp_re, &tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
BF(p->re, p->im, q->re, q->im,
p->re, p->im, tmp_re, tmp_im);
p++;
q++;
}
p += nloops;
q += nloops;
}
nblocks = nblocks >> 1;
nloops = nloops << 1;
}
while (nblocks != 0);
return 0;
}
int fft_init_global(void)
{
int i, n;
int32_t c1, s1, s2;
n=1<<10;
s2 = 1 ? 1 : -1;
for(i=0;i<(n/2);++i)
{
int32_t ifix = itofix32(i);
int32_t nfix = itofix32(n);
int32_t res = fixdiv32(ifix,nfix);
s1 = fsincos(res<<16, &c1);
exptab0[i].re = c1;
exptab0[i].im = s1*s2;
}
return 0;
}
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef WMA_FFT_H
#define WMA_FFT_H 1
#include <inttypes.h>
typedef int32_t FFTSample;
typedef struct FFTComplex
{
int32_t re, im;
}
FFTComplex;
typedef struct FFTContext
{
int nbits;
int inverse;
uint16_t *revtab;
FFTComplex *exptab;
FFTComplex *exptab1; /* only used by SSE code */
int (*fft_calc)(struct FFTContext *s, FFTComplex *z);
}
FFTContext;
int fft_calc_unscaled(FFTContext *s, FFTComplex *z);
int fft_init_global(void);
#endif
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <string.h>
#include "wmafixed.h"
#include "mdct.h"
/*these are the sin and cos rotations used by the MDCT*/
/*accessed too infrequently to give much speedup in IRAM*/
int32_t *tcosarray[5], *tsinarray[5];
int32_t tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64];
int32_t tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
uint16_t revtab0[1024];
/**
* init MDCT or IMDCT computation.
*/
int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
{
int n, n4, i;
memset(s, 0, sizeof(*s));
n = 1 << nbits; /* nbits ranges from 12 to 8 inclusive */
s->nbits = nbits;
s->n = n;
n4 = n >> 2;
s->tcos = tcosarray[12-nbits];
s->tsin = tsinarray[12-nbits];
for(i=0;i<n4;i++)
{
int32_t ip = itofix32(i) + 0x2000;
ip = ip >> nbits;
/*I can't remember why this works, but it seems
to agree for ~24 bits, maybe more!*/
s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i]));
s->tcos[i] *=-1;
}
(&s->fft)->nbits = nbits-2;
(&s->fft)->inverse = inverse;
return 0;
}
/**
* Compute inverse MDCT of size N = 2^nbits
* @param output N samples
* @param input N/2 samples
* @param tmp N/2 samples
*/
void ff_imdct_calc(MDCTContext *s,
int32_t *output,
int32_t *input)
{
int k, n8, n4, n2, n, j,scale;
const int32_t *tcos = s->tcos;
const int32_t *tsin = s->tsin;
const int32_t *in1, *in2;
FFTComplex *z1 = (FFTComplex *)output;
FFTComplex *z2 = (FFTComplex *)input;
int revtabshift = 12 - s->nbits;
n = 1 << s->nbits;
n2 = n >> 1;
n4 = n >> 2;
n8 = n >> 3;
/* pre rotation */
in1 = input;
in2 = input + n2 - 1;
for(k = 0; k < n4; k++)
{
j=revtab0[k<<revtabshift];
CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
in1 += 2;
in2 -= 2;
}
scale = fft_calc_unscaled(&s->fft, z1);
/* post rotation + reordering */
for(k = 0; k < n4; k++)
{
CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
}
for(k = 0; k < n8; k++)
{
int32_t r1,r2,r3,r4,r1n,r2n,r3n;
r1 = z2[n8 + k].im;
r1n = r1 * -1;
r2 = z2[n8-1-k].re;
r2n = r2 * -1;
r3 = z2[k+n8].re;
r3n = r3 * -1;
r4 = z2[n8-k-1].im;
output[2*k] = r1n;
output[n2-1-2*k] = r1;
output[2*k+1] = r2;
output[n2-1-2*k-1] = r2n;
output[n2 + 2*k]= r3n;
output[n-1- 2*k]= r3n;
output[n2 + 2*k+1]= r4;
output[n-2 - 2 * k] = r4;
}
}
/* init MDCT */
int mdct_init_global(void)
{
int i,j,m;
/* although seemingly degenerate, these cannot actually be merged together without
a substantial increase in error which is unjustified by the tiny memory savings*/
tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
/* init the MDCT bit reverse table here rather then in fft_init */
for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/
{ /*smaller sizes can reuse the largest*/
m=0;
for(j=0;j<10;j++)
{
m |= ((i >> j) & 1) << (10-j-1);
}
revtab0[i]=m;
}
fft_init_global();
return 0;
}
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef WMA_MDCT_H
#define WMA_MDCT_H 1
#include "fft.h"
typedef struct MDCTContext
{
int n; /* size of MDCT (i.e. number of input data * 2) */
int nbits; /* n = 2^nbits */
/* pre/post rotation tables */
int32_t *tcos;
int32_t *tsin;
FFTContext fft;
}
MDCTContext;
int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
void ff_imdct_calc(MDCTContext *s, int32_t *output, int32_t *input);
int mdct_init_global(void);
#endif
/*****************************************************************************
* wma.c: wma decoder using integer decoder from Rockbox, based on FFmpeg
*****************************************************************************
* Copyright (C) 2008-2009 M2X
*
* Authors: Rafaël Carré <rcarre@m2x.nl>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <vlc_common.h>
#include <vlc_plugin.h>
#include <vlc_codec.h>
#include <vlc_aout.h>
#include <vlc_block_helper.h>
#include <vlc_bits.h>
#include <assert.h>
#include "wmadec.h"
/*****************************************************************************
* decoder_sys_t : wma decoder descriptor
*****************************************************************************/
struct decoder_sys_t
{
audio_date_t end_date; /* To set the PTS */
WMADecodeContext wmadec; /* name is self explanative */
int32_t *p_output; /* buffer where the frames are rendered */
/* to not give too much samples at once to the audio output */
int8_t *p_samples; /* point into p_output */
unsigned int i_samples; /* number of buffered samples available */
};
/* FIXME : check supported configurations */
/* channel configuration */
static unsigned int pi_channels_maps[7] =
{
0,
AOUT_CHAN_CENTER,
AOUT_CHAN_LEFT | AOUT_CHAN_RIGHT,
AOUT_CHAN_CENTER | AOUT_CHAN_LEFT | AOUT_CHAN_RIGHT,
AOUT_CHAN_LEFT | AOUT_CHAN_RIGHT | AOUT_CHAN_REARLEFT | AOUT_CHAN_REARRIGHT,
AOUT_CHAN_LEFT | AOUT_CHAN_RIGHT | AOUT_CHAN_CENTER | AOUT_CHAN_REARLEFT | AOUT_CHAN_REARRIGHT,
AOUT_CHAN_LEFT | AOUT_CHAN_RIGHT | AOUT_CHAN_CENTER | AOUT_CHAN_REARLEFT | AOUT_CHAN_REARRIGHT | AOUT_CHAN_LFE
};
/*****************************************************************************
* Local prototypes
*****************************************************************************/
static int OpenDecoder ( vlc_object_t * );
static void CloseDecoder ( vlc_object_t * );
static aout_buffer_t *DecodeFrame ( decoder_t *, block_t ** );
/*****************************************************************************
* Module descriptor
*****************************************************************************/
vlc_module_begin();
set_category( CAT_INPUT );
set_subcategory( SUBCAT_INPUT_ACODEC );
set_description( _("WMA v1/v2 fixed point audio decoder") );
set_capability( "decoder", 100 );
set_callbacks( OpenDecoder, CloseDecoder );
vlc_module_end();
/*****************************************************************************
* SplitBuffer: Needed because aout really doesn't like big audio chunk and
* wma produces easily > 30000 samples...
*****************************************************************************/
static aout_buffer_t *SplitBuffer( decoder_t *p_dec )
{
decoder_sys_t *p_sys = p_dec->p_sys;
unsigned int i_samples = __MIN( p_sys->i_samples, 2048 );
aout_buffer_t *p_buffer;
if( i_samples == 0 ) return NULL;
if( !( p_buffer = p_dec->pf_aout_buffer_new( p_dec, i_samples ) ) )
return NULL;
p_buffer->start_date = aout_DateGet( &p_sys->end_date );
p_buffer->end_date = aout_DateIncrement( &p_sys->end_date, i_samples );
memcpy( p_buffer->p_buffer, p_sys->p_samples, p_buffer->i_nb_bytes );
p_sys->p_samples += p_buffer->i_nb_bytes;
p_sys->i_samples -= i_samples;
return p_buffer;
}
/*****************************************************************************
* OpenDecoder: probe the decoder and return score
*****************************************************************************/
static int OpenDecoder( vlc_object_t *p_this )
{
decoder_t *p_dec = (decoder_t*)p_this;
decoder_sys_t *p_sys;
if( p_dec->fmt_in.i_codec != VLC_FOURCC('w','m','a','1') &&
p_dec->fmt_in.i_codec != VLC_FOURCC('W','M','A','1') &&
p_dec->fmt_in.i_codec != VLC_FOURCC('w','m','a','2') &&
p_dec->fmt_in.i_codec != VLC_FOURCC('W','M','A','2') )
{
return VLC_EGENERIC;
}
/* Allocate the memory needed to store the decoder's structure */
p_dec->p_sys = p_sys = (decoder_sys_t *)malloc(sizeof(decoder_sys_t));
if( !p_sys )
return VLC_ENOMEM;
memset( p_sys, 0, sizeof( decoder_sys_t ) );
/* Date */
aout_DateInit( &p_sys->end_date, p_dec->fmt_in.audio.i_rate );
/* Set output properties */
p_dec->fmt_out.i_cat = AUDIO_ES;
p_dec->fmt_out.i_codec = VLC_FOURCC('f','i','3','2');
p_dec->fmt_out.audio.i_bitspersample = p_dec->fmt_in.audio.i_bitspersample;
p_dec->fmt_out.audio.i_rate = p_dec->fmt_in.audio.i_rate;
p_dec->fmt_out.audio.i_channels = p_dec->fmt_in.audio.i_channels;
assert( p_dec->fmt_out.audio.i_channels <
( sizeof( pi_channels_maps ) / sizeof( pi_channels_maps[0] ) ) );
p_dec->fmt_out.audio.i_original_channels =
p_dec->fmt_out.audio.i_physical_channels =
pi_channels_maps[p_dec->fmt_out.audio.i_channels];
/* aout core assumes this number is not 0 and uses it in divisions */
assert( p_dec->fmt_out.audio.i_physical_channels != 0 );
asf_waveformatex_t wfx;
wfx.rate = p_dec->fmt_in.audio.i_rate;
wfx.bitrate = p_dec->fmt_in.i_bitrate;
wfx.channels = p_dec->fmt_in.audio.i_channels;
wfx.blockalign = p_dec->fmt_in.audio.i_blockalign;
wfx.bitspersample = p_dec->fmt_in.audio.i_bitspersample;
msg_Dbg( p_dec, "samplerate %d bitrate %d channels %d align %d bps %d",
wfx.rate, wfx.bitrate, wfx.channels, wfx.blockalign,
wfx.bitspersample );
if( p_dec->fmt_in.i_codec == VLC_FOURCC('w','m','a','1')
|| p_dec->fmt_in.i_codec == VLC_FOURCC('W','M','A','1') )
wfx.codec_id = ASF_CODEC_ID_WMAV1;
else if( p_dec->fmt_in.i_codec == VLC_FOURCC('W','M','A','2')
|| p_dec->fmt_in.i_codec == VLC_FOURCC('w','m','a','2') )
wfx.codec_id = ASF_CODEC_ID_WMAV2;
wfx.datalen = p_dec->fmt_in.i_extra;
if( wfx.datalen > 6 ) wfx.datalen = 6;
if( wfx.datalen > 0 )
memcpy( wfx.data, p_dec->fmt_in.p_extra, wfx.datalen );
/* Init codec */
if( wma_decode_init(&p_sys->wmadec, &wfx ) < 0 )
{
msg_Err( p_dec, "codec init failed" );
free( p_sys );
return VLC_EGENERIC;
}
/* Set callback */
p_dec->pf_decode_audio = DecodeFrame;
return VLC_SUCCESS;
}
/*****************************************************************************
* DecodeFrame: decodes a wma frame.
*****************************************************************************/
static aout_buffer_t *DecodeFrame( decoder_t *p_dec, block_t **pp_block )
{
decoder_sys_t *p_sys = p_dec->p_sys;
block_t *p_block;
mtime_t start = mdate(); /* for statistics */
aout_buffer_t *p_aout_buffer = NULL;
if( !pp_block || !*pp_block ) return NULL;
p_block = *pp_block;
if( p_block->i_buffer <= 0 )
{
/* we already decoded the samples, just feed a few to aout */
if( p_sys->i_samples )
p_aout_buffer = SplitBuffer( p_dec );
if( !p_sys->i_samples )
{ /* we need to decode new samples now */
free( p_sys->p_output );
p_sys->p_output = NULL;
block_Release( p_block );
*pp_block = NULL;
}
return p_aout_buffer;
}
/* Date management */
if( p_block->i_pts > 0 &&
p_block->i_pts != aout_DateGet( &p_sys->end_date ) )
{
aout_DateSet( &p_sys->end_date, p_block->i_pts );
/* don't reuse the same pts */
p_block->i_pts = 0;
}
else if( !aout_DateGet( &p_sys->end_date ) )
{
/* We've just started the stream, wait for the first PTS. */
block_Release( p_block );
return NULL;
}
wma_decode_superframe_init( &p_sys->wmadec, p_block->p_buffer,
p_block->i_buffer );
if( p_sys->wmadec.nb_frames <= 0 )
{
msg_Err( p_dec, "can not decode, invalid ASF packet ?" );
block_Release( p_block );
*pp_block = NULL;
return NULL;
}
/* worst case */
size_t i_buffer = BLOCK_MAX_SIZE * MAX_CHANNELS * p_sys->wmadec.nb_frames;
p_sys->p_output = malloc(i_buffer * sizeof(int32_t) );
p_sys->p_samples = (int8_t*)p_sys->p_output;
if( !p_sys->p_output )
{
/* OOM, will try a bit later if VLC hasn't been killed */
block_Release( p_block );
return NULL;
}
p_sys->i_samples = 0;
for( int i = 0 ; i < p_sys->wmadec.nb_frames; i++ )
{
int i_samples = 0;
i_samples = wma_decode_superframe_frame( &p_sys->wmadec,
p_sys->p_output + p_sys->i_samples * p_sys->wmadec.nb_channels,
p_block->p_buffer, p_block->i_buffer );
if( i_samples < 0 )
msg_Warn( p_dec,
"wma_decode_superframe_frame() failed for frame %d", i );
else
p_sys->i_samples += i_samples; /* advance in the samples buffer */
}
p_block->i_buffer = 0; /* this block has been decoded */
for( size_t s = 0 ; s < i_buffer; s++ )
p_sys->p_output[s] >>= 2; /* Q30 -> Q28 translation */
p_aout_buffer = SplitBuffer( p_dec );
assert( p_aout_buffer );
#ifdef NDEBUG
msg_Dbg( p_dec, "%s took %"PRIi64" us",__func__,mdate()-start);
#endif
return p_aout_buffer;
}
/*****************************************************************************
* CloseDecoder : wma decoder destruction
*****************************************************************************/
static void CloseDecoder( vlc_object_t *p_this )
{
decoder_sys_t *p_sys = ((decoder_t*)p_this)->p_sys;
free( p_sys->p_output );
free( p_sys );
}
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _WMADEC_H
#define _WMADEC_H
#include <inttypes.h>
#include "asf.h"
#include "bitstream.h" /* For GetBitContext */
#include "mdct.h"
#undef TRACE
/* size of blocks */
#define BLOCK_MIN_BITS 7
#define BLOCK_MAX_BITS 11
#define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS)
#define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1)
/* XXX: find exact max size */
#define HIGH_BAND_MAX_SIZE 16
#define NB_LSP_COEFS 10
/* XXX: is it a suitable value ? */
#define MAX_CODED_SUPERFRAME_SIZE 16384
#define M_PI 3.14159265358979323846
#define M_PI_F 0x3243f // in fixed 32 format
#define TWO_M_PI_F 0x6487f //in fixed 32
#define MAX_CHANNELS 2
#define NOISE_TAB_SIZE 8192
#define LSP_POW_BITS 7
typedef struct WMADecodeContext
{
GetBitContext gb;
int nb_block_sizes; /* number of block sizes */
int sample_rate;
int nb_channels;
int bit_rate;
int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */
int block_align;
int use_bit_reservoir;
int use_variable_block_len;
int use_exp_vlc; /* exponent coding: 0 = lsp, 1 = vlc + delta */
int use_noise_coding; /* true if perceptual noise is added */
int byte_offset_bits;
VLC exp_vlc;
int exponent_sizes[BLOCK_NB_SIZES];
uint16_t exponent_bands[BLOCK_NB_SIZES][25];
int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */
int coefs_start; /* first coded coef */
int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */
int exponent_high_sizes[BLOCK_NB_SIZES];
int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
VLC hgain_vlc;
/* coded values in high bands */
int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
/* there are two possible tables for spectral coefficients */
VLC coef_vlc[2];
uint16_t *run_table[2];
uint16_t *level_table[2];
/* frame info */
int frame_len; /* frame length in samples */
int frame_len_bits; /* frame_len = 1 << frame_len_bits */
/* block info */
int reset_block_lengths;
int block_len_bits; /* log2 of current block length */
int next_block_len_bits; /* log2 of next block length */
int prev_block_len_bits; /* log2 of prev block length */
int block_len; /* block length in samples */
int block_num; /* block number in current frame */
int block_pos; /* current position in frame */
uint8_t ms_stereo; /* true if mid/side stereo mode */
uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */
int exponents_bsize[MAX_CHANNELS]; // log2 ratio frame/exp. length
int32_t exponents[MAX_CHANNELS][BLOCK_MAX_SIZE];
int32_t max_exponent[MAX_CHANNELS];
int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
int32_t (*coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
MDCTContext mdct_ctx[BLOCK_NB_SIZES];
int32_t *windows[BLOCK_NB_SIZES];
/* output buffer for one frame and the last for IMDCT windowing */
int32_t frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
/* last frame info */
uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
int last_bitoffset;
int last_superframe_len;
int32_t *noise_table;
int noise_index;
int32_t noise_mult; /* XXX: suppress that and integrate it in the noise array */
/* lsp_to_curve tables */
int32_t lsp_cos_table[BLOCK_MAX_SIZE];
int64_t lsp_pow_e_table[256];
int32_t lsp_pow_m_table1[(1 << LSP_POW_BITS)];
int32_t lsp_pow_m_table2[(1 << LSP_POW_BITS)];
/* State of current superframe decoding */
int bit_offset;
int nb_frames;
int current_frame;
#ifdef TRACE
int frame_count;
#endif
}
WMADecodeContext;
int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx);
int wma_decode_superframe_init(WMADecodeContext* s,
uint8_t *buf, int buf_size);
int wma_decode_superframe_frame(WMADecodeContext* s,
int32_t *samples,
uint8_t *buf, int buf_size);
#endif
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/**
* @file wmadec.c
* WMA compatible decoder.
*/
#define IBSS_ATTR
#define ICONST_ATTR
#define ICODE_ATTR
#ifdef NDEBUG
#include <stdio.h>
#undef WMA_DEBUG /* enable when debugging wma */
#endif
#include "asf.h"
#include "wmadec.h"
#include "wmafixed.h"
#include "bitstream.h"
#include <string.h> /* memcpy() */
#define VLCBITS 7 /*7 is the lowest without glitching*/
#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
#define EXPVLCBITS 7
#define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS)
#define HGAINVLCBITS 9
#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS)
typedef struct CoefVLCTable
{
int n; /* total number of codes */
const uint32_t *huffcodes; /* VLC bit values */
const uint8_t *huffbits; /* VLC bit size */
const uint16_t *levels; /* table to build run/level tables */
}
CoefVLCTable;
static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len);
int32_t coefsarray[MAX_CHANNELS][BLOCK_MAX_SIZE] IBSS_ATTR;
/* static variables that replace malloced stuff */
/* these are the MDCT reconstruction windows */
int32_t stat0[2048], stat1[1024], stat2[512], stat3[256], stat4[128];
/* these are VLC lookup tables */
uint16_t *runtabarray[2], *levtabarray[2];
/* these could be made smaller since only one can be 1336 */
uint16_t runtab0[1336], runtab1[1336], levtab0[1336], levtab1[1336];
#define VLCBUF1SIZE 4598
#define VLCBUF2SIZE 3574
#define VLCBUF3SIZE 360
#define VLCBUF4SIZE 540
/*putting these in IRAM actually makes PP slower*/
VLC_TYPE vlcbuf1[VLCBUF1SIZE][2];
VLC_TYPE vlcbuf2[VLCBUF2SIZE][2];
VLC_TYPE vlcbuf3[VLCBUF3SIZE][2];
VLC_TYPE vlcbuf4[VLCBUF4SIZE][2];
#include "wmadata.h" // PJJ
/*
* Helper functions for wma_window.
*
*
*/
#ifdef CPU_ARM
static inline
void vector_fmul_add_add(int32_t *dst, const int32_t *data,
const int32_t *window, int n)
{
/* Block sizes are always power of two */
asm volatile (
"0:"
"ldmia %[d]!, {r0, r1};"
"ldmia %[w]!, {r4, r5};"
/* consume the first data and window value so we can use those
* registers again */
"smull r8, r9, r0, r4;"
"ldmia %[dst], {r0, r4};"
"add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
"smull r8, r9, r1, r5;"
"add r1, r4, r9, lsl #1;"
"stmia %[dst]!, {r0, r1};"
"subs %[n], %[n], #2;"
"bne 0b;"
: [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n)
: : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
}
static inline
void vector_fmul_reverse(int32_t *dst, const int32_t *src0, const int32_t *src1,
int len)
{
/* Block sizes are always power of two */
asm volatile (
"add %[s1], %[s1], %[n], lsl #2;"
"0:"
"ldmia %[s0]!, {r0, r1};"
"ldmdb %[s1]!, {r4, r5};"
"smull r8, r9, r0, r5;"
"mov r0, r9, lsl #1;"
"smull r8, r9, r1, r4;"
"mov r1, r9, lsl #1;"
"stmia %[dst]!, {r0, r1};"
"subs %[n], %[n], #2;"
"bne 0b;"
: [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
: : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
}
#elif defined(CPU_COLDFIRE)
static inline
void vector_fmul_add_add(int32_t *dst, const int32_t *data,
const int32_t *window, int n)
{
/* Block sizes are always power of two. Smallest block is always way bigger
* than four too.*/
asm volatile (
"0:"
"movem.l (%[d]), %%d0-%%d3;"
"movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;"
"mac.l %%d0, %%d4, %%acc0;"
"mac.l %%d1, %%d5, %%acc1;"
"mac.l %%d2, %%a0, %%acc2;"
"mac.l %%d3, %%a1, %%acc3;"
"lea.l (16, %[d]), %[d];"
"lea.l (16, %[w]), %[w];"
"movclr.l %%acc0, %%d0;"
"movclr.l %%acc1, %%d1;"
"movclr.l %%acc2, %%d2;"
"movclr.l %%acc3, %%d3;"
"add.l %%d0, (%[dst])+;"
"add.l %%d1, (%[dst])+;"
"add.l %%d2, (%[dst])+;"
"add.l %%d3, (%[dst])+;"
"subq.l #4, %[n];"
"jne 0b;"
: [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n)
: : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
}
static inline
void vector_fmul_reverse(int32_t *dst, const int32_t *src0, const int32_t *src1,
int len)
{
/* Block sizes are always power of two. Smallest block is always way bigger
* than four too.*/
asm volatile (
"lea.l (-16, %[s1], %[n]*4), %[s1];"
"0:"
"movem.l (%[s0]), %%d0-%%d3;"
"movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
"mac.l %%d0, %%a1, %%acc0;"
"mac.l %%d1, %%a0, %%acc1;"
"mac.l %%d2, %%d5, %%acc2;"
"mac.l %%d3, %%d4, %%acc3;"
"lea.l (16, %[s0]), %[s0];"
"lea.l (-16, %[s1]), %[s1];"
"movclr.l %%acc0, %%d0;"
"movclr.l %%acc1, %%d1;"
"movclr.l %%acc2, %%d2;"
"movclr.l %%acc3, %%d3;"
"movem.l %%d0-%%d3, (%[dst]);"
"lea.l (16, %[dst]), %[dst];"
"subq.l #4, %[n];"
"jne 0b;"
: [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
: : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
}
#else
static inline void vector_fmul_add_add(int32_t *dst, const int32_t *src0, const int32_t *src1, int len){
int i;
for(i=0; i<len; i++)
dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
}
static inline void vector_fmul_reverse(int32_t *dst, const int32_t *src0, const int32_t *src1, int len){
int i;
src1 += len-1;
for(i=0; i<len; i++)
dst[i] = fixmul32b(src0[i], src1[-i]);
}
#endif
/**
* Apply MDCT window and add into output.
*
* We ensure that when the windows overlap their squared sum
* is always 1 (MDCT reconstruction rule).
*
* The Vorbis I spec has a great diagram explaining this process.
* See section 1.3.2.3 of http://xiph.org/vorbis/doc/Vorbis_I_spec.html
*/
static void wma_window(WMADecodeContext *s, int32_t *in, int32_t *out)
{
int block_len, bsize, n;
/* left part */
/*previous block was larger, so we'll use the size of the current block to set the window size*/
if (s->block_len_bits <= s->prev_block_len_bits) {
block_len = s->block_len;
bsize = s->frame_len_bits - s->block_len_bits;
vector_fmul_add_add(out, in, s->windows[bsize], block_len);
} else {
/*previous block was smaller or the same size, so use it's size to set the window length*/
block_len = 1 << s->prev_block_len_bits;
/*find the middle of the two overlapped blocks, this will be the first overlapped sample*/
n = (s->block_len - block_len) / 2;
bsize = s->frame_len_bits - s->prev_block_len_bits;
vector_fmul_add_add(out+n, in+n, s->windows[bsize], block_len);
memcpy(out+n+block_len, in+n+block_len, n*sizeof(int32_t));
}
/* Advance to the end of the current block and prepare to window it for the next block.
* Since the window function needs to be reversed, we do it backwards starting with the
* last sample and moving towards the first
*/
out += s->block_len;
in += s->block_len;
/* right part */
if (s->block_len_bits <= s->next_block_len_bits) {
block_len = s->block_len;
bsize = s->frame_len_bits - s->block_len_bits;
vector_fmul_reverse(out, in, s->windows[bsize], block_len);
} else {
block_len = 1 << s->next_block_len_bits;
n = (s->block_len - block_len) / 2;
bsize = s->frame_len_bits - s->next_block_len_bits;
memcpy(out, in, n*sizeof(int32_t));
vector_fmul_reverse(out+n, in+n, s->windows[bsize], block_len);
memset(out+n+block_len, 0, n*sizeof(int32_t));
}
}
/* XXX: use same run/length optimization as mpeg decoders */
static void init_coef_vlc(VLC *vlc,
uint16_t **prun_table, uint16_t **plevel_table,
const CoefVLCTable *vlc_table, int tab)
{
int n = vlc_table->n;
const uint8_t *table_bits = vlc_table->huffbits;
const uint32_t *table_codes = vlc_table->huffcodes;
const uint16_t *levels_table = vlc_table->levels;
uint16_t *run_table, *level_table;
const uint16_t *p;
int i, l, j, level;
init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0);
run_table = runtabarray[tab];
level_table= levtabarray[tab];
p = levels_table;
i = 2;
level = 1;
while (i < n)
{
l = *p++;
for(j=0;j<l;++j)
{
run_table[i] = j;
level_table[i] = level;
++i;
}
++level;
}
*prun_table = run_table;
*plevel_table = level_table;
}
int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx)
{
int i, flags1, flags2;
int32_t *window;
uint8_t *extradata;
int64_t bps1;
int32_t high_freq;
int64_t bps;
int sample_rate1;
int coef_vlc_table;
#ifdef CPU_COLDFIRE
coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
#endif
s->sample_rate = wfx->rate;
s->nb_channels = wfx->channels;
s->bit_rate = wfx->bitrate;
s->block_align = wfx->blockalign;
s->coefs = &coefsarray;
if (wfx->codec_id == ASF_CODEC_ID_WMAV1) {
s->version = 1;
} else if (wfx->codec_id == ASF_CODEC_ID_WMAV2 ) {
s->version = 2;
} else {
/*one of those other wma flavors that don't have GPLed decoders */
return -1;
}
/* extract flag infos */
flags1 = 0;
flags2 = 0;
extradata = wfx->data;
if (s->version == 1 && wfx->datalen >= 4) {
flags1 = extradata[0] | (extradata[1] << 8);
flags2 = extradata[2] | (extradata[3] << 8);
}else if (s->version == 2 && wfx->datalen >= 6){
flags1 = extradata[0] | (extradata[1] << 8) |
(extradata[2] << 16) | (extradata[3] << 24);
flags2 = extradata[4] | (extradata[5] << 8);
}
s->use_exp_vlc = flags2 & 0x0001;
s->use_bit_reservoir = flags2 & 0x0002;
s->use_variable_block_len = flags2 & 0x0004;
/* compute MDCT block size */
if (s->sample_rate <= 16000){
s->frame_len_bits = 9;
}else if (s->sample_rate <= 22050 ||
(s->sample_rate <= 32000 && s->version == 1)){
s->frame_len_bits = 10;
}else{
s->frame_len_bits = 11;
}
s->frame_len = 1 << s->frame_len_bits;
if (s-> use_variable_block_len)
{
int nb_max, nb;
nb = ((flags2 >> 3) & 3) + 1;
if ((s->bit_rate / s->nb_channels) >= 32000)
{
nb += 2;
}
nb_max = s->frame_len_bits - BLOCK_MIN_BITS; /* max is 11-7 */
if (nb > nb_max)
nb = nb_max;
s->nb_block_sizes = nb + 1;
}
else
{
s->nb_block_sizes = 1;
}
/* init rate dependant parameters */
s->use_noise_coding = 1;
high_freq = itofix64(s->sample_rate) >> 1;
/* if version 2, then the rates are normalized */
sample_rate1 = s->sample_rate;
if (s->version == 2)
{
if (sample_rate1 >= 44100)
sample_rate1 = 44100;
else if (sample_rate1 >= 22050)
sample_rate1 = 22050;
else if (sample_rate1 >= 16000)
sample_rate1 = 16000;
else if (sample_rate1 >= 11025)
sample_rate1 = 11025;
else if (sample_rate1 >= 8000)
sample_rate1 = 8000;
}
int64_t tmp = itofix64(s->bit_rate);
int64_t tmp2 = itofix64(s->nb_channels * s->sample_rate);
bps = fixdiv64(tmp, tmp2);
int64_t tim = bps * s->frame_len;
int64_t tmpi = fixdiv64(tim,itofix64(8));
s->byte_offset_bits = av_log2(fixtoi64(tmpi+0x8000)) + 2;
/* compute high frequency value and choose if noise coding should
be activated */
bps1 = bps;
if (s->nb_channels == 2)
bps1 = fixmul32(bps,0x1999a);
if (sample_rate1 == 44100)
{
if (bps1 >= 0x9c29)
s->use_noise_coding = 0;
else
high_freq = fixmul32(high_freq,0x6666);
}
else if (sample_rate1 == 22050)
{
if (bps1 >= 0x128f6)
s->use_noise_coding = 0;
else if (bps1 >= 0xb852)
high_freq = fixmul32(high_freq,0xb333);
else
high_freq = fixmul32(high_freq,0x999a);
}
else if (sample_rate1 == 16000)
{
if (bps > 0x8000)
high_freq = fixmul32(high_freq,0x8000);
else
high_freq = fixmul32(high_freq,0x4ccd);
}
else if (sample_rate1 == 11025)
{
high_freq = fixmul32(high_freq,0xb333);
}
else if (sample_rate1 == 8000)
{
if (bps <= 0xa000)
{
high_freq = fixmul32(high_freq,0x8000);
}
else if (bps > 0xc000)
{
s->use_noise_coding = 0;
}
else
{
high_freq = fixmul32(high_freq,0xa666);
}
}
else
{
if (bps >= 0xcccd)
{
high_freq = fixmul32(high_freq,0xc000);
}
else if (bps >= 0x999a)
{
high_freq = fixmul32(high_freq,0x999a);
}
else
{
high_freq = fixmul32(high_freq,0x8000);
}
}
/* compute the scale factor band sizes for each MDCT block size */
{
int a, b, pos, lpos, k, block_len, i, j, n;
const uint8_t *table;
if (s->version == 1)
{
s->coefs_start = 3;
}
else
{
s->coefs_start = 0;
}
for(k = 0; k < s->nb_block_sizes; ++k)
{
block_len = s->frame_len >> k;
if (s->version == 1)
{
lpos = 0;
for(i=0;i<25;++i)
{
a = wma_critical_freqs[i];
b = s->sample_rate;
pos = ((block_len * 2 * a) + (b >> 1)) / b;
if (pos > block_len)
pos = block_len;
s->exponent_bands[0][i] = pos - lpos;
if (pos >= block_len)
{
++i;
break;
}
lpos = pos;
}
s->exponent_sizes[0] = i;
}
else
{
/* hardcoded tables */
table = NULL;
a = s->frame_len_bits - BLOCK_MIN_BITS - k;
if (a < 3)
{
if (s->sample_rate >= 44100)
table = exponent_band_44100[a];
else if (s->sample_rate >= 32000)
table = exponent_band_32000[a];
else if (s->sample_rate >= 22050)
table = exponent_band_22050[a];
}
if (table)
{
n = *table++;
for(i=0;i<n;++i)
s->exponent_bands[k][i] = table[i];
s->exponent_sizes[k] = n;
}
else
{
j = 0;
lpos = 0;
for(i=0;i<25;++i)
{
a = wma_critical_freqs[i];
b = s->sample_rate;
pos = ((block_len * 2 * a) + (b << 1)) / (4 * b);
pos <<= 2;
if (pos > block_len)
pos = block_len;
if (pos > lpos)
s->exponent_bands[k][j++] = pos - lpos;
if (pos >= block_len)
break;
lpos = pos;
}
s->exponent_sizes[k] = j;
}
}
/* max number of coefs */
s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k;
/* high freq computation */
int32_t tmp1 = high_freq*2; /* high_freq is a int32_t!*/
int32_t tmp2=itofix32(s->sample_rate>>1);
s->high_band_start[k] = fixtoi32( fixdiv32(tmp1, tmp2) * (block_len>>1) +0x8000);
/*
s->high_band_start[k] = (int)((block_len * 2 * high_freq) /
s->sample_rate + 0.5);*/
n = s->exponent_sizes[k];
j = 0;
pos = 0;
for(i=0;i<n;++i)
{
int start, end;
start = pos;
pos += s->exponent_bands[k][i];
end = pos;
if (start < s->high_band_start[k])
start = s->high_band_start[k];
if (end > s->coefs_end[k])
end = s->coefs_end[k];
if (end > start)
s->exponent_high_bands[k][j++] = end - start;
}
s->exponent_high_sizes[k] = j;
}
}
mdct_init_global();
for(i = 0; i < s->nb_block_sizes; ++i)
{
ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
}
/*ffmpeg uses malloc to only allocate as many window sizes as needed.
* However, we're really only interested in the worst case memory usage.
* In the worst case you can have 5 window sizes, 128 doubling up 2048
* Smaller windows are handled differently.
* Since we don't have malloc, just statically allocate this
*/
int32_t *temp[5];
temp[0] = stat0;
temp[1] = stat1;
temp[2] = stat2;
temp[3] = stat3;
temp[4] = stat4;
/* init MDCT windows : simple sinus window */
for(i = 0; i < s->nb_block_sizes; i++)
{
int n, j;
int32_t alpha;
n = 1 << (s->frame_len_bits - i);
window = temp[i];
alpha = (1<<15)>>(s->frame_len_bits - i+1); /* this calculates 0.5/(2*n) */
for(j=0;j<n;++j)
{
int32_t j2 = itofix32(j) + 0x8000;
window[j] = fsincos(fixmul32(j2,alpha)<<16, 0); /* alpha between 0 and pi/2 */
}
s->windows[i] = window;
}
s->reset_block_lengths = 1;
if (s->use_noise_coding)
{
/* init the noise generator */
if (s->use_exp_vlc)
{
s->noise_mult = 0x51f;
s->noise_table = noisetable_exp;
}
else
{
s->noise_mult = 0xa3d;
/* LSP values are simply 2x the EXP values */
for (i=0;i<NOISE_TAB_SIZE;++i)
noisetable_exp[i] = noisetable_exp[i]<< 1;
s->noise_table = noisetable_exp;
}
#if 0
{
unsigned int seed;
int32_t norm;
seed = 1;
norm = 0; // PJJ: near as makes any diff to 0!
for (i=0;i<NOISE_TAB_SIZE;++i)
{
seed = seed * 314159 + 1;
s->noise_table[i] = itofix32((int)seed) * norm;
}
}
#endif
s->hgain_vlc.table = vlcbuf4;
s->hgain_vlc.table_allocated = VLCBUF4SIZE;
init_vlc(&s->hgain_vlc, HGAINVLCBITS, sizeof(hgain_huffbits),
hgain_huffbits, 1, 1,
hgain_huffcodes, 2, 2, 0);
}
if (s->use_exp_vlc)
{
s->exp_vlc.table = vlcbuf3;
s->exp_vlc.table_allocated = VLCBUF3SIZE;
init_vlc(&s->exp_vlc, EXPVLCBITS, sizeof(scale_huffbits),
scale_huffbits, 1, 1,
scale_huffcodes, 4, 4, 0);
}
else
{
wma_lsp_to_curve_init(s, s->frame_len);
}
/* choose the VLC tables for the coefficients */
coef_vlc_table = 2;
if (s->sample_rate >= 32000)
{
if (bps1 < 0xb852)
coef_vlc_table = 0;
else if (bps1 < 0x128f6)
coef_vlc_table = 1;
}
runtabarray[0] = runtab0; runtabarray[1] = runtab1;
levtabarray[0] = levtab0; levtabarray[1] = levtab1;
s->coef_vlc[0].table = vlcbuf1;
s->coef_vlc[0].table_allocated = VLCBUF1SIZE;
s->coef_vlc[1].table = vlcbuf2;
s->coef_vlc[1].table_allocated = VLCBUF2SIZE;
init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0],
&coef_vlcs[coef_vlc_table * 2], 0);
init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1],
&coef_vlcs[coef_vlc_table * 2 + 1], 1);
s->last_superframe_len = 0;
s->last_bitoffset = 0;
return 0;
}
/* compute x^-0.25 with an exponent and mantissa table. We use linear
interpolation to reduce the mantissa table size at a small speed
expense (linear interpolation approximately doubles the number of
bits of precision). */
static inline int32_t pow_m1_4(WMADecodeContext *s, int32_t x)
{
union {
float f;
unsigned int v;
} u, t;
unsigned int e, m;
int32_t a, b;
u.f = fixtof64(x);
e = u.v >> 23;
m = (u.v >> (23 - LSP_POW_BITS)) & ((1 << LSP_POW_BITS) - 1);
/* build interpolation scale: 1 <= t < 2. */
t.v = ((u.v << LSP_POW_BITS) & ((1 << 23) - 1)) | (127 << 23);
a = s->lsp_pow_m_table1[m];
b = s->lsp_pow_m_table2[m];
/* lsp_pow_e_table contains 32.32 format */
/* TODO: Since we're unlikely have value that cover the whole
* IEEE754 range, we probably don't need to have all possible exponents */
return (lsp_pow_e_table[e] * (a + fixmul32(b, ftofix32(t.f))) >>32);
}
static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len)
{
int32_t wdel, a, b, temp, temp2;
int i, m;
wdel = fixdiv32(M_PI_F, itofix32(frame_len));
temp = fixdiv32(itofix32(1), itofix32(frame_len));
for (i=0; i<frame_len; ++i)
{
/* TODO: can probably reuse the trig_init values here */
fsincos((temp*i)<<15, &temp2);
/* get 3 bits headroom + 1 bit from not doubleing the values */
s->lsp_cos_table[i] = temp2>>3;
}
/* NOTE: these two tables are needed to avoid two operations in
pow_m1_4 */
b = itofix32(1);
int ix = 0;
/*double check this later*/
for(i=(1 << LSP_POW_BITS) - 1;i>=0;i--)
{
m = (1 << LSP_POW_BITS) + i;
a = pow_a_table[ix++]<<4;
s->lsp_pow_m_table1[i] = 2 * a - b;
s->lsp_pow_m_table2[i] = b - a;
b = a;
}
}
/* NOTE: We use the same code as Vorbis here */
/* XXX: optimize it further with SSE/3Dnow */
static void wma_lsp_to_curve(WMADecodeContext *s,
int32_t *out,
int32_t *val_max_ptr,
int n,
int32_t *lsp)
{
int i, j;
int32_t p, q, w, v, val_max, temp, temp2;
val_max = 0;
for(i=0;i<n;++i)
{
/* shift by 2 now to reduce rounding error,
* we can renormalize right before pow_m1_4
*/
p = 0x8000<<5;
q = 0x8000<<5;
w = s->lsp_cos_table[i];
for (j=1;j<NB_LSP_COEFS;j+=2)
{
/* w is 5.27 format, lsp is in 16.16, temp2 becomes 5.27 format */
temp2 = ((w - (lsp[j - 1]<<11)));
temp = q;
/* q is 16.16 format, temp2 is 5.27, q becomes 16.16 */
q = fixmul32b(q, temp2 )<<4;
p = fixmul32b(p, (w - (lsp[j]<<11)))<<4;
}
/* 2 in 5.27 format is 0x10000000 */
p = fixmul32(p, fixmul32b(p, (0x10000000 - w)))<<3;
q = fixmul32(q, fixmul32b(q, (0x10000000 + w)))<<3;
v = (p + q) >>9; /* p/q end up as 16.16 */
v = pow_m1_4(s, v);
if (v > val_max)
val_max = v;
out[i] = v;
}
*val_max_ptr = val_max;
}
/* decode exponents coded with LSP coefficients (same idea as Vorbis) */
static void decode_exp_lsp(WMADecodeContext *s, int ch)
{
int32_t lsp_coefs[NB_LSP_COEFS];
int val, i;
for (i = 0; i < NB_LSP_COEFS; ++i)
{
if (i == 0 || i >= 8)
val = get_bits(&s->gb, 3);
else
val = get_bits(&s->gb, 4);
lsp_coefs[i] = lsp_codebook[i][val];
}
wma_lsp_to_curve(s,
s->exponents[ch],
&s->max_exponent[ch],
s->block_len,
lsp_coefs);
}
/* decode exponents coded with VLC codes */
static int decode_exp_vlc(WMADecodeContext *s, int ch)
{
int last_exp, n, code;
const uint16_t *ptr, *band_ptr;
int32_t v, max_scale;
int32_t *q,*q_end;
/*accommodate the 60 negative indices */
const int32_t *pow_10_to_yover16_ptr = &pow_10_to_yover16[61];
band_ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
ptr = band_ptr;
q = s->exponents[ch];
q_end = q + s->block_len;
max_scale = 0;
if (s->version == 1) /* wmav1 only */
{
last_exp = get_bits(&s->gb, 5) + 10;
/* XXX: use a table */
v = pow_10_to_yover16_ptr[last_exp];
max_scale = v;
n = *ptr++;
do
{
*q++ = v;
}
while (--n);
}
else
last_exp = 36;
while (q < q_end)
{
code = get_vlc2(&s->gb, s->exp_vlc.table, EXPVLCBITS, EXPMAX);
if (code < 0)
{
return -1;
}
/* NOTE: this offset is the same as MPEG4 AAC ! */
last_exp += code - 60;
/* XXX: use a table */
v = pow_10_to_yover16_ptr[last_exp];
if (v > max_scale)
{
max_scale = v;
}
n = *ptr++;
do
{
*q++ = v;
}
while (--n);
}
s->max_exponent[ch] = max_scale;
return 0;
}
/* return 0 if OK. return 1 if last block of frame. return -1 if
unrecorrable error. */
static int wma_decode_block(WMADecodeContext *s)
{
int n, v, a, ch, code, bsize;
int coef_nb_bits, total_gain;
int nb_coefs[MAX_CHANNELS];
int32_t mdct_norm;
/* compute current block length */
if (s->use_variable_block_len)
{
n = av_log2(s->nb_block_sizes - 1) + 1;
if (s->reset_block_lengths)
{
s->reset_block_lengths = 0;
v = get_bits(&s->gb, n);
if (v >= s->nb_block_sizes)
{
return -2;
}
s->prev_block_len_bits = s->frame_len_bits - v;
v = get_bits(&s->gb, n);
if (v >= s->nb_block_sizes)
{
return -3;
}
s->block_len_bits = s->frame_len_bits - v;
}
else
{
/* update block lengths */
s->prev_block_len_bits = s->block_len_bits;
s->block_len_bits = s->next_block_len_bits;
}
v = get_bits(&s->gb, n);
if (v >= s->nb_block_sizes)
return -4;
s->next_block_len_bits = s->frame_len_bits - v;
}
else
{
/* fixed block len */
s->next_block_len_bits = s->frame_len_bits;
s->prev_block_len_bits = s->frame_len_bits;
s->block_len_bits = s->frame_len_bits;
}
/* now check if the block length is coherent with the frame length */
s->block_len = 1 << s->block_len_bits;
if ((s->block_pos + s->block_len) > s->frame_len)
{
return -5; /* oddly 32k sample from tracker fails here */
}
if (s->nb_channels == 2)
{
s->ms_stereo = get_bits(&s->gb, 1);
}
v = 0;
for (ch = 0; ch < s->nb_channels; ++ch)
{
a = get_bits(&s->gb, 1);
s->channel_coded[ch] = a;
v |= a;
}
/* if no channel coded, no need to go further */
/* XXX: fix potential framing problems */
if (!v)
{
goto next;
}
bsize = s->frame_len_bits - s->block_len_bits;
/* read total gain and extract corresponding number of bits for
coef escape coding */
total_gain = 1;
for(;;)
{
a = get_bits(&s->gb, 7);
total_gain += a;
if (a != 127)
{
break;
}
}
if (total_gain < 15)
coef_nb_bits = 13;
else if (total_gain < 32)
coef_nb_bits = 12;
else if (total_gain < 40)
coef_nb_bits = 11;
else if (total_gain < 45)
coef_nb_bits = 10;
else
coef_nb_bits = 9;
/* compute number of coefficients */
n = s->coefs_end[bsize] - s->coefs_start;
for(ch = 0; ch < s->nb_channels; ++ch)
{
nb_coefs[ch] = n;
}
/* complex coding */
if (s->use_noise_coding)
{
for(ch = 0; ch < s->nb_channels; ++ch)
{
if (s->channel_coded[ch])
{
int i, n, a;
n = s->exponent_high_sizes[bsize];
for(i=0;i<n;++i)
{
a = get_bits(&s->gb, 1);
s->high_band_coded[ch][i] = a;
/* if noise coding, the coefficients are not transmitted */
if (a)
nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
}
}
}
for(ch = 0; ch < s->nb_channels; ++ch)
{
if (s->channel_coded[ch])
{
int i, n, val, code;
n = s->exponent_high_sizes[bsize];
val = (int)0x80000000;
for(i=0;i<n;++i)
{
if (s->high_band_coded[ch][i])
{
if (val == (int)0x80000000)
{
val = get_bits(&s->gb, 7) - 19;
}
else
{
//code = get_vlc(&s->gb, &s->hgain_vlc);
code = get_vlc2(&s->gb, s->hgain_vlc.table, HGAINVLCBITS, HGAINMAX);
if (code < 0)
{
return -6;
}
val += code - 18;
}
s->high_band_values[ch][i] = val;
}
}
}
}
}
/* exponents can be reused in short blocks. */
if ((s->block_len_bits == s->frame_len_bits) || get_bits(&s->gb, 1))
{
for(ch = 0; ch < s->nb_channels; ++ch)
{
if (s->channel_coded[ch])
{
if (s->use_exp_vlc)
{
if (decode_exp_vlc(s, ch) < 0)
{
return -7;
}
}
else
{
decode_exp_lsp(s, ch);
}
s->exponents_bsize[ch] = bsize;
}
}
}
/* parse spectral coefficients : just RLE encoding */
for(ch = 0; ch < s->nb_channels; ++ch)
{
if (s->channel_coded[ch])
{
VLC *coef_vlc;
int level, run, sign, tindex;
int16_t *ptr, *eptr;
const uint16_t *level_table, *run_table;
/* special VLC tables are used for ms stereo because
there is potentially less energy there */
tindex = (ch == 1 && s->ms_stereo);
coef_vlc = &s->coef_vlc[tindex];
run_table = s->run_table[tindex];
level_table = s->level_table[tindex];
/* XXX: optimize */
ptr = &s->coefs1[ch][0];
eptr = ptr + nb_coefs[ch];
memset(ptr, 0, s->block_len * sizeof(int16_t));
for(;;)
{
code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, VLCMAX);
//code = get_vlc(&s->gb, coef_vlc);
if (code < 0)
{
return -8;
}
if (code == 1)
{
/* EOB */
break;
}
else if (code == 0)
{
/* escape */
level = get_bits(&s->gb, coef_nb_bits);
/* NOTE: this is rather suboptimal. reading
block_len_bits would be better */
run = get_bits(&s->gb, s->frame_len_bits);
}
else
{
/* normal code */
run = run_table[code];
level = level_table[code];
}
sign = get_bits(&s->gb, 1);
if (!sign)
level = -level;
ptr += run;
if (ptr >= eptr)
{
break;
}
*ptr++ = level;
/* NOTE: EOB can be omitted */
if (ptr >= eptr)
break;
}
}
if (s->version == 1 && s->nb_channels >= 2)
{
align_get_bits(&s->gb);
}
}
{
int n4 = s->block_len >> 1;
/* theres no reason to do a divide by two in fixed precision ... */
mdct_norm = 0x10000>>(s->block_len_bits-1);
if (s->version == 1)
{
mdct_norm *= fixtoi32(fixsqrt32(itofix32(n4))); /* PJJ : exercise this path */
}
}
/* finally compute the MDCT coefficients */
for(ch = 0; ch < s->nb_channels; ++ch)
{
if (s->channel_coded[ch])
{
int16_t *coefs1;
int32_t *exponents, *exp_ptr;
int32_t *coefs, atemp;
int64_t mult;
int64_t mult1;
int32_t noise, temp1, temp2, mult2;
int i, j, n, n1, last_high_band, esize;
int32_t exp_power[HIGH_BAND_MAX_SIZE];
coefs1 = s->coefs1[ch];
exponents = s->exponents[ch];
esize = s->exponents_bsize[ch];
coefs = (*(s->coefs))[ch];
n=0;
/*
* Previously the IMDCT was run in 17.15 precision to avoid overflow. However rare files could
* overflow here as well, so switch to 17.15 during coefs calculation.
*/
if (s->use_noise_coding)
{
/*TODO: mult should be converted to 32 bit to speed up noise coding*/
mult = fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch]));
mult = mult* mdct_norm; //what the hell? This is actually int64_t*2^16!
mult1 = mult;
/* very low freqs : noise */
for(i = 0;i < s->coefs_start; ++i)
{
*coefs++ = fixmul32((fixmul32(s->noise_table[s->noise_index],
(*exponents++))>>4),Fixed32From64(mult1)) >>1;
s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
}
n1 = s->exponent_high_sizes[bsize];
/* compute power of high bands */
exp_ptr = exponents +
s->high_band_start[bsize] -
s->coefs_start;
last_high_band = 0; /* avoid warning */
for (j=0;j<n1;++j)
{
n = s->exponent_high_bands[s->frame_len_bits -
s->block_len_bits][j];
if (s->high_band_coded[ch][j])
{
int32_t e2, v;
e2 = 0;
for(i = 0;i < n; ++i)
{
/*v is noramlized later on so its fixed format is irrelevant*/
v = exp_ptr[i]>>4;
e2 += fixmul32(v, v)>>3;
}
exp_power[j] = e2/n; /*n is an int...*/
last_high_band = j;
}
exp_ptr += n;
}
/* main freqs and high freqs */
for(j=-1;j<n1;++j)
{
if (j < 0)
{
n = s->high_band_start[bsize] -
s->coefs_start;
}
else
{
n = s->exponent_high_bands[s->frame_len_bits -
s->block_len_bits][j];
}
if (j >= 0 && s->high_band_coded[ch][j])
{
/* use noise with specified power */
int32_t tmp = fixdiv32(exp_power[j],exp_power[last_high_band]);
mult1 = (int64_t)fixsqrt32(tmp);
/* XXX: use a table */
/*mult1 is 48.16, pow_table is 48.16*/
mult1 = mult1 * pow_table[s->high_band_values[ch][j]+20] >> PRECISION;
/*this step has a fairly high degree of error for some reason*/
mult1 = fixdiv64(mult1,fixmul32(s->max_exponent[ch],s->noise_mult));
mult1 = mult1*mdct_norm>>PRECISION;
for(i = 0;i < n; ++i)
{
noise = s->noise_table[s->noise_index];
s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
*coefs++ = fixmul32((fixmul32(*exponents,noise)>>4),Fixed32From64(mult1)) >>1;
++exponents;
}
}
else
{
/* coded values + small noise */
for(i = 0;i < n; ++i)
{
// PJJ: check code path
noise = s->noise_table[s->noise_index];
s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
/*don't forget to renormalize the noise*/
temp1 = (((int32_t)*coefs1++)<<16) + (noise>>4);
temp2 = fixmul32(*exponents, mult>>17);
*coefs++ = fixmul32(temp1, temp2);
++exponents;
}
}
}
/* very high freqs : noise */
n = s->block_len - s->coefs_end[bsize];
mult2 = fixmul32(mult>>16,exponents[-1]) ; /*the work around for 32.32 vars are getting stupid*/
for (i = 0; i < n; ++i)
{
/*renormalize the noise product and then reduce to 17.15 precison*/
*coefs++ = fixmul32(s->noise_table[s->noise_index],mult2) >>5;
s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
}
}
else
{
/*Noise coding not used, simply convert from exp to fixed representation*/
int32_t mult3 = (int32_t)(fixdiv64(pow_table[total_gain+20],Fixed32To64(s->max_exponent[ch])));
mult3 = fixmul32(mult3, mdct_norm);
n = nb_coefs[ch];
/* XXX: optimize more, unrolling this loop in asm might be a good idea */
for(i = 0;i < s->coefs_start; i++)
*coefs++ = 0;
for(i = 0;i < n; ++i)
{
atemp = (coefs1[i] * mult3)>>1;
*coefs++=fixmul32(atemp,exponents[i<<bsize>>esize]);
}
n = s->block_len - s->coefs_end[bsize];
memset(coefs, 0, n*sizeof(int32_t));
}
}
}
if (s->ms_stereo && s->channel_coded[1])
{
int32_t a, b;
int i;
int32_t (*coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE] = (s->coefs);
/* nominal case for ms stereo: we do it before mdct */
/* no need to optimize this case because it should almost
never happen */
if (!s->channel_coded[0])
{
memset((*(s->coefs))[0], 0, sizeof(int32_t) * s->block_len);
s->channel_coded[0] = 1;
}
for(i = 0; i < s->block_len; ++i)
{
a = (*coefs)[0][i];
b = (*coefs)[1][i];
(*coefs)[0][i] = a + b;
(*coefs)[1][i] = a - b;
}
}
for(ch = 0; ch < s->nb_channels; ++ch)
{
if (s->channel_coded[ch])
{
static int32_t output[BLOCK_MAX_SIZE * 2] IBSS_ATTR;
int n4, index, n;
n = s->block_len;
n4 = s->block_len >>1;
ff_imdct_calc(&s->mdct_ctx[bsize],
output,
(*(s->coefs))[ch]);
/* add in the frame */
index = (s->frame_len / 2) + s->block_pos - n4;
wma_window(s, output, &s->frame_out[ch][index]);
/* specific fast case for ms-stereo : add to second
channel if it is not coded */
if (s->ms_stereo && !s->channel_coded[1])
{
wma_window(s, output, &s->frame_out[1][index]);
}
}
}
next:
/* update block number */
++s->block_num;
s->block_pos += s->block_len;
if (s->block_pos >= s->frame_len)
{
return 1;
}
else
{
return 0;
}
}
/* decode a frame of frame_len samples */
static int wma_decode_frame(WMADecodeContext *s, int32_t *samples)
{
int ret, i, n, ch, incr;
int32_t *ptr;
int32_t *iptr;
/* read each block */
s->block_num = 0;
s->block_pos = 0;
for(;;)
{
ret = wma_decode_block(s);
if (ret < 0)
{
#ifdef WMA_DEBUG
printf("wma_decode_block failed with code %d\n", ret);
#endif
return -1;
}
if (ret)
{
break;
}
}
/* return frame with full 30-bit precision */
n = s->frame_len;
incr = s->nb_channels;
for(ch = 0; ch < s->nb_channels; ++ch)
{
ptr = samples + ch;
iptr = s->frame_out[ch];
for (i=0;i<n;++i)
{
*ptr = (*iptr++);
ptr += incr;
}
/* prepare for next block */
memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
s->frame_len * sizeof(int32_t));
}
return 0;
}
/* Initialise the superframe decoding */
int wma_decode_superframe_init(WMADecodeContext* s,
uint8_t *buf, /*input*/
int buf_size)
{
if (buf_size==0)
{
s->last_superframe_len = 0;
return 0;
}
s->current_frame = 0;
init_get_bits(&s->gb, buf, buf_size*8);
if (s->use_bit_reservoir)
{
/* read super frame header */
get_bits(&s->gb, 4); /* super frame index */
s->nb_frames = get_bits(&s->gb, 4);
if (s->last_superframe_len == 0)
s->nb_frames --;
else if (s->nb_frames == 0)
s->nb_frames++;
s->bit_offset = get_bits(&s->gb, s->byte_offset_bits + 3);
} else {
s->nb_frames = 1;
}
return 1;
}
/* Decode a single frame in the current superframe - return -1 if
there was a decoding error, or the number of samples decoded.
*/
int wma_decode_superframe_frame(WMADecodeContext* s,
int32_t* samples, /*output*/
uint8_t *buf, /*input*/
int buf_size)
{
int pos, len;
uint8_t *q;
int done = 0;
if ((s->use_bit_reservoir) && (s->current_frame == 0))
{
if (s->last_superframe_len > 0)
{
/* add s->bit_offset bits to last frame */
if ((s->last_superframe_len + ((s->bit_offset + 7) >> 3)) >
MAX_CODED_SUPERFRAME_SIZE)
{
#ifdef WMA_DEBUG
printf("superframe size too large error\n");
#endif
goto fail;
}
q = s->last_superframe + s->last_superframe_len;
len = s->bit_offset;
while (len > 0)
{
*q++ = (get_bits)(&s->gb, 8);
len -= 8;
}
if (len > 0)
{
*q++ = (get_bits)(&s->gb, len) << (8 - len);
}
/* XXX: s->bit_offset bits into last frame */
init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE*8);
/* skip unused bits */
if (s->last_bitoffset > 0)
skip_bits(&s->gb, s->last_bitoffset);
/* this frame is stored in the last superframe and in the
current one */
if (wma_decode_frame(s, samples) < 0)
{
goto fail;
}
done = 1;
}
/* read each frame starting from s->bit_offset */
pos = s->bit_offset + 4 + 4 + s->byte_offset_bits + 3;
init_get_bits(&s->gb, buf + (pos >> 3), (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3))*8);
len = pos & 7;
if (len > 0)
skip_bits(&s->gb, len);
s->reset_block_lengths = 1;
}
/* If we haven't decoded a frame yet, do it now */
if (!done)
{
if (wma_decode_frame(s, samples) < 0)
{
goto fail;
}
}
s->current_frame++;
if ((s->use_bit_reservoir) && (s->current_frame == s->nb_frames))
{
/* we copy the end of the frame in the last frame buffer */
pos = get_bits_count(&s->gb) + ((s->bit_offset + 4 + 4 + s->byte_offset_bits + 3) & ~7);
s->last_bitoffset = pos & 7;
pos >>= 3;
len = buf_size - pos;
if (len > MAX_CODED_SUPERFRAME_SIZE || len < 0)
{
#ifdef WMA_DEBUG
printf("superframe size too large error after decodeing\n");
#endif
goto fail;
}
s->last_superframe_len = len;
memcpy(s->last_superframe, buf + pos, len);
}
return s->frame_len;
fail:
/* when error, we reset the bit reservoir */
s->last_superframe_len = 0;
return -1;
}
/****************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2007 Michael Giacomelli
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "wmadec.h"
#include "wmafixed.h"
int64_t IntTo64(int x){
int64_t res = 0;
unsigned char *p = (unsigned char *)&res;
#ifdef ROCKBOX_BIG_ENDIAN
p[5] = x & 0xff;
p[4] = (x & 0xff00)>>8;
p[3] = (x & 0xff0000)>>16;
p[2] = (x & 0xff000000)>>24;
#else
p[2] = x & 0xff;
p[3] = (x & 0xff00)>>8;
p[4] = (x & 0xff0000)>>16;
p[5] = (x & 0xff000000)>>24;
#endif
return res;
}
int IntFrom64(int64_t x)
{
int res = 0;
unsigned char *p = (unsigned char *)&x;
#ifdef ROCKBOX_BIG_ENDIAN
res = p[5] | (p[4]<<8) | (p[3]<<16) | (p[2]<<24);
#else
res = p[2] | (p[3]<<8) | (p[4]<<16) | (p[5]<<24);
#endif
return res;
}
int32_t Fixed32From64(int64_t x)
{
return x & 0xFFFFFFFF;
}
int64_t Fixed32To64(int32_t x)
{
return (int64_t)x;
}
/*
* Not performance senstitive code here
*/
int64_t fixmul64byfixed(int64_t x, int32_t y)
{
return (x * y);
/* return (int64_t) fixmul32(Fixed32From64(x),y); */
}
int32_t fixdiv32(int32_t x, int32_t y)
{
int64_t temp;
if(x == 0)
return 0;
if(y == 0)
return 0x7fffffff;
temp = x;
temp <<= PRECISION;
return (int32_t)(temp / y);
}
int64_t fixdiv64(int64_t x, int64_t y)
{
int64_t temp;
if(x == 0)
return 0;
if(y == 0)
return 0x07ffffffffffffffLL;
temp = x;
temp <<= PRECISION64;
return (int64_t)(temp / y);
}
int32_t fixsqrt32(int32_t x)
{
unsigned long r = 0, s, v = (unsigned long)x;
#define STEP(k) s = r + (1 << k * 2); r >>= 1; \
if (s <= v) { v -= s; r |= (1 << k * 2); }
STEP(15);
STEP(14);
STEP(13);
STEP(12);
STEP(11);
STEP(10);
STEP(9);
STEP(8);
STEP(7);
STEP(6);
STEP(5);
STEP(4);
STEP(3);
STEP(2);
STEP(1);
STEP(0);
#undef STEP
return (int32_t)(r << (PRECISION / 2));
}
/* Inverse gain of circular cordic rotation in s0.31 format. */
static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
/* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
static const unsigned long atan_table[] = {
0x1fffffff, /* +0.785398163 (or pi/4) */
0x12e4051d, /* +0.463647609 */
0x09fb385b, /* +0.244978663 */
0x051111d4, /* +0.124354995 */
0x028b0d43, /* +0.062418810 */
0x0145d7e1, /* +0.031239833 */
0x00a2f61e, /* +0.015623729 */
0x00517c55, /* +0.007812341 */
0x0028be53, /* +0.003906230 */
0x00145f2e, /* +0.001953123 */
0x000a2f98, /* +0.000976562 */
0x000517cc, /* +0.000488281 */
0x00028be6, /* +0.000244141 */
0x000145f3, /* +0.000122070 */
0x0000a2f9, /* +0.000061035 */
0x0000517c, /* +0.000030518 */
0x000028be, /* +0.000015259 */
0x0000145f, /* +0.000007629 */
0x00000a2f, /* +0.000003815 */
0x00000517, /* +0.000001907 */
0x0000028b, /* +0.000000954 */
0x00000145, /* +0.000000477 */
0x000000a2, /* +0.000000238 */
0x00000051, /* +0.000000119 */
0x00000028, /* +0.000000060 */
0x00000014, /* +0.000000030 */
0x0000000a, /* +0.000000015 */
0x00000005, /* +0.000000007 */
0x00000002, /* +0.000000004 */
0x00000001, /* +0.000000002 */
0x00000000, /* +0.000000001 */
0x00000000, /* +0.000000000 */
};
/*
* Below here functions do not use standard fixed precision!
*/
/**
* Implements sin and cos using CORDIC rotation.
*
* @param phase has range from 0 to 0xffffffff, representing 0 and
* 2*pi respectively.
* @param cos return address for cos
* @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
* representing -1 and 1 respectively.
*
* Gives at least 24 bits precision (last 2-8 bits or so are probably off)
*/
long fsincos(unsigned long phase, int32_t *cos)
{
int32_t x, x1, y, y1;
unsigned long z, z1;
int i;
/* Setup initial vector */
x = cordic_circular_gain;
y = 0;
z = phase;
/* The phase has to be somewhere between 0..pi for this to work right */
if (z < 0xffffffff / 4) {
/* z in first quadrant, z += pi/2 to correct */
x = -x;
z += 0xffffffff / 4;
} else if (z < 3 * (0xffffffff / 4)) {
/* z in third quadrant, z -= pi/2 to correct */
z -= 0xffffffff / 4;
} else {
/* z in fourth quadrant, z -= 3pi/2 to correct */
x = -x;
z -= 3 * (0xffffffff / 4);
}
/* Each iteration adds roughly 1-bit of extra precision */
for (i = 0; i < 31; i++) {
x1 = x >> i;
y1 = y >> i;
z1 = atan_table[i];
/* Decided which direction to rotate vector. Pivot point is pi/2 */
if (z >= 0xffffffff / 4) {
x -= y1;
y += x1;
z -= z1;
} else {
x += y1;
y -= x1;
z += z1;
}
}
if (cos)
*cos = x;
return y;
}
/****************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2007 Michael Giacomelli
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* fixed precision code. We use a combination of Sign 15.16 and Sign.31
precision here.
The WMA decoder does not always follow this convention, and occasionally
renormalizes values to other formats in order to maximize precision.
However, only the two precisions above are provided in this file.
*/
#include <inttypes.h>
#define PRECISION 16
#define PRECISION64 16
#define fixtof64(x) (float)((float)(x) / (float)(1 << PRECISION64)) //does not work on int64_t!
#define ftofix32(x) ((int32_t)((x) * (float)(1 << PRECISION) + ((x) < 0 ? -0.5 : 0.5)))
#define itofix64(x) (IntTo64(x))
#define itofix32(x) ((x) << PRECISION)
#define fixtoi32(x) ((x) >> PRECISION)
#define fixtoi64(x) (IntFrom64(x))
/*fixed functions*/
int64_t IntTo64(int x);
int IntFrom64(int64_t x);
int32_t Fixed32From64(int64_t x);
int64_t Fixed32To64(int32_t x);
int64_t fixmul64byfixed(int64_t x, int32_t y);
int32_t fixdiv32(int32_t x, int32_t y);
int64_t fixdiv64(int64_t x, int64_t y);
int32_t fixsqrt32(int32_t x);
long fsincos(unsigned long phase, int32_t *cos);
#ifdef CPU_ARM
/*Sign-15.16 format */
#define fixmul32(x, y) \
({ int32_t __hi; \
uint32_t __lo; \
int32_t __result; \
asm ("smull %0, %1, %3, %4\n\t" \
"movs %0, %0, lsr %5\n\t" \
"adc %2, %0, %1, lsl %6" \
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
: "%r" (x), "r" (y), \
"M" (PRECISION), "M" (32 - PRECISION) \
: "cc"); \
__result; \
})
#define fixmul32b(x, y) \
({ int32_t __hi; \
uint32_t __lo; \
int32_t __result; \
asm ("smull %0, %1, %3, %4\n\t" \
"movs %2, %1, lsl #1" \
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
: "%r" (x), "r" (y) \
: "cc"); \
__result; \
})
#elif defined(CPU_COLDFIRE)
static inline int32_t fixmul32(int32_t x, int32_t y)
{
#if PRECISION != 16
#warning Coldfire fixmul32() only works for PRECISION == 16
#endif
int32_t t1;
asm (
"mac.l %[x], %[y], %%acc0 \n" /* multiply */
"mulu.l %[y], %[x] \n" /* get lower half, avoid emac stall */
"movclr.l %%acc0, %[t1] \n" /* get higher half */
"lsr.l #1, %[t1] \n"
"move.w %[t1], %[x] \n"
"swap %[x] \n"
: [t1] "=&d" (t1), [x] "+d" (x)
: [y] "d" (y)
);
return x;
}
static inline int32_t fixmul32b(int32_t x, int32_t y)
{
asm (
"mac.l %[x], %[y], %%acc0 \n" /* multiply */
"movclr.l %%acc0, %[x] \n" /* get higher half */
: [x] "+d" (x)
: [y] "d" (y)
);
return x;
}
#else
static inline int32_t fixmul32(int32_t x, int32_t y)
{
int64_t temp;
temp = x;
temp *= y;
temp >>= PRECISION;
return (int32_t)temp;
}
static inline int32_t fixmul32b(int32_t x, int32_t y)
{
int64_t temp;
temp = x;
temp *= y;
temp >>= 31; //16+31-16 = 31 bits
return (int32_t)temp;
}
#endif
#ifdef CPU_ARM
static inline
void CMUL(int32_t *x, int32_t *y,
int32_t a, int32_t b,
int32_t t, int32_t v)
{
/* This version loses one bit of precision. Could be solved at the cost
* of 2 extra cycles if it becomes an issue. */
int x1, y1, l;
asm(
"smull %[l], %[y1], %[b], %[t] \n"
"smlal %[l], %[y1], %[a], %[v] \n"
"rsb %[b], %[b], #0 \n"
"smull %[l], %[x1], %[a], %[t] \n"
"smlal %[l], %[x1], %[b], %[v] \n"
: [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
: [a] "r" (a), [t] "r" (t), [v] "r" (v)
: "cc"
);
*x = x1 << 1;
*y = y1 << 1;
}
#elif defined CPU_COLDFIRE
static inline
void CMUL(int32_t *x, int32_t *y,
int32_t a, int32_t b,
int32_t t, int32_t v)
{
asm volatile ("mac.l %[a], %[t], %%acc0;"
"msac.l %[b], %[v], %%acc0;"
"mac.l %[b], %[t], %%acc1;"
"mac.l %[a], %[v], %%acc1;"
"movclr.l %%acc0, %[a];"
"move.l %[a], (%[x]);"
"movclr.l %%acc1, %[a];"
"move.l %[a], (%[y]);"
: [a] "+&r" (a)
: [x] "a" (x), [y] "a" (y),
[b] "r" (b), [t] "r" (t), [v] "r" (v)
: "cc", "memory");
}
#else
static inline
void CMUL(int32_t *pre,
int32_t *pim,
int32_t are,
int32_t aim,
int32_t bre,
int32_t bim)
{
//int64_t x,y;
int32_t _aref = are;
int32_t _aimf = aim;
int32_t _bref = bre;
int32_t _bimf = bim;
int32_t _r1 = fixmul32b(_bref, _aref);
int32_t _r2 = fixmul32b(_bimf, _aimf);
int32_t _r3 = fixmul32b(_bref, _aimf);
int32_t _r4 = fixmul32b(_bimf, _aref);
*pre = _r1 - _r2;
*pim = _r3 + _r4;
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment