Commit 199f5add authored by michael's avatar michael

use 16bit IDWT (a SIMD implementation of it should be >2x faster then with

the old 32bit code)
disable mmx/sse2 optimizations as they need a rewrite now


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@10218 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 2fcb309c
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
/* dct code */ /* dct code */
typedef short DCTELEM; typedef short DCTELEM;
typedef int DWTELEM; typedef int DWTELEM;
typedef short IDWTELEM;
void fdct_ifast (DCTELEM *data); void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data); void fdct_ifast248 (DCTELEM *data);
...@@ -390,8 +391,8 @@ typedef struct DSPContext { ...@@ -390,8 +391,8 @@ typedef struct DSPContext {
void (*h264_dct)(DCTELEM block[4][4]); void (*h264_dct)(DCTELEM block[4][4]);
/* snow wavelet */ /* snow wavelet */
void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void (*horizontal_compose97i)(DWTELEM *b, int width); void (*horizontal_compose97i)(IDWTELEM *b, int width);
void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
void (*prefetch)(void *mem, int stride, int h); void (*prefetch)(void *mem, int stride, int h);
......
...@@ -3621,6 +3621,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -3621,6 +3621,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif #endif
#ifdef CONFIG_SNOW_DECODER #ifdef CONFIG_SNOW_DECODER
#if 0
if(mm_flags & MM_SSE2){ if(mm_flags & MM_SSE2){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
...@@ -3631,6 +3632,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -3631,6 +3632,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
} }
#endif
#endif #endif
if(mm_flags & MM_3DNOW){ if(mm_flags & MM_3DNOW){
......
This diff is collapsed.
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#define QSHIFT 5 #define QSHIFT 5
#define QROOT (1<<QSHIFT) #define QROOT (1<<QSHIFT)
#define LOSSLESS_QLOG -128 #define LOSSLESS_QLOG -128
#define FRAC_BITS 8 #define FRAC_BITS 4
#define MAX_REF_FRAMES 8 #define MAX_REF_FRAMES 8
#define LOG2_OBMC_MAX 8 #define LOG2_OBMC_MAX 8
...@@ -43,17 +43,18 @@ ...@@ -43,17 +43,18 @@
/** Used to minimize the amount of memory used in order to optimize cache performance. **/ /** Used to minimize the amount of memory used in order to optimize cache performance. **/
struct slice_buffer_s { struct slice_buffer_s {
DWTELEM * * line; ///< For use by idwt and predict_slices. IDWTELEM * * line; ///< For use by idwt and predict_slices.
DWTELEM * * data_stack; ///< Used for internal purposes. IDWTELEM * * data_stack; ///< Used for internal purposes.
int data_stack_top; int data_stack_top;
int line_count; int line_count;
int line_width; int line_width;
int data_count; int data_count;
DWTELEM * base_buffer; ///< Buffer that this structure is caching. IDWTELEM * base_buffer; ///< Buffer that this structure is caching.
}; };
#define liftS lift #define liftS lift
#define lift5 lift #define lift5 lift
#define inv_lift5 inv_lift
#if 1 #if 1
#define W_AM 3 #define W_AM 3
#define W_AO 0 #define W_AO 0
...@@ -123,8 +124,8 @@ struct slice_buffer_s { ...@@ -123,8 +124,8 @@ struct slice_buffer_s {
#define W_DS 9 #define W_DS 9
#endif #endif
extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); extern void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width); extern void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
extern void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); extern void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
#ifdef CONFIG_SNOW_ENCODER #ifdef CONFIG_SNOW_ENCODER
...@@ -137,7 +138,7 @@ static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int ...@@ -137,7 +138,7 @@ static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
/* C bits used by mmx/sse2/altivec */ /* C bits used by mmx/sse2/altivec */
static av_always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){ static av_always_inline void snow_interleave_line_header(int * i, int width, IDWTELEM * low, IDWTELEM * high){
(*i) = (width) - 2; (*i) = (width) - 2;
if (width & 1){ if (width & 1){
...@@ -146,14 +147,14 @@ static av_always_inline void snow_interleave_line_header(int * i, int width, DWT ...@@ -146,14 +147,14 @@ static av_always_inline void snow_interleave_line_header(int * i, int width, DWT
} }
} }
static av_always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){ static av_always_inline void snow_interleave_line_footer(int * i, IDWTELEM * low, IDWTELEM * high){
for (; (*i)>=0; (*i)-=2){ for (; (*i)>=0; (*i)-=2){
low[(*i)+1] = high[(*i)>>1]; low[(*i)+1] = high[(*i)>>1];
low[*i] = low[(*i)>>1]; low[*i] = low[(*i)>>1];
} }
} }
static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){ static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, IDWTELEM * dst, IDWTELEM * src, IDWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
for(; i<w; i++){ for(; i<w; i++){
dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift); dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
} }
...@@ -163,7 +164,7 @@ static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELE ...@@ -163,7 +164,7 @@ static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELE
} }
} }
static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){ static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, IDWTELEM * dst, IDWTELEM * src, IDWTELEM * ref, int width, int w){
for(; i<w; i++){ for(; i<w; i++){
dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS); dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS);
} }
......
...@@ -141,10 +141,10 @@ f8f51fa737add17f7fecaefa118b57ed *./tests/data/a-ffv1.avi ...@@ -141,10 +141,10 @@ f8f51fa737add17f7fecaefa118b57ed *./tests/data/a-ffv1.avi
2654678 ./tests/data/a-ffv1.avi 2654678 ./tests/data/a-ffv1.avi
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv 799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176 stddev: 0.00 PSNR:99.99 bytes:7602176
19c377580ec83d0c5fc4da0740dec278 *./tests/data/a-snow.avi 0356b219110f391044352547360377a8 *./tests/data/a-snow.avi
156532 ./tests/data/a-snow.avi 156586 ./tests/data/a-snow.avi
64282679f712a2b09cde43edb18a6fb0 *./tests/data/out.yuv c038bc896a435796588ca3b96f38bbb5 *./tests/data/out.yuv
stddev: 23.14 PSNR:20.83 bytes:7602176 stddev: 23.15 PSNR:20.83 bytes:7602176
ba999e86070aa971376e7f317a022c37 *./tests/data/a-snow53.avi ba999e86070aa971376e7f317a022c37 *./tests/data/a-snow53.avi
3519486 ./tests/data/a-snow53.avi 3519486 ./tests/data/a-snow53.avi
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv 799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
......
...@@ -141,10 +141,10 @@ d72b0960e162d4998b9acbabb07e99ab *./tests/data/a-ffv1.avi ...@@ -141,10 +141,10 @@ d72b0960e162d4998b9acbabb07e99ab *./tests/data/a-ffv1.avi
3525804 ./tests/data/a-ffv1.avi 3525804 ./tests/data/a-ffv1.avi
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176 stddev: 0.00 PSNR:99.99 bytes:7602176
213abed95d2e43cf7d2c9e1921779e6d *./tests/data/a-snow.avi ae64e5ff9b5684c46e74e48381e6a132 *./tests/data/a-snow.avi
68852 ./tests/data/a-snow.avi 68900 ./tests/data/a-snow.avi
f3ab734e188a8e2af7b89e0f101bd7a1 *./tests/data/out.yuv 5f5b97b726f97d3514b3c2b8e635175c *./tests/data/out.yuv
stddev: 10.86 PSNR:27.40 bytes:7602176 stddev: 10.87 PSNR:27.39 bytes:7602176
3d0da6aeec9b80c6ee0ff4b747bdd0f0 *./tests/data/a-snow53.avi 3d0da6aeec9b80c6ee0ff4b747bdd0f0 *./tests/data/a-snow53.avi
2721980 ./tests/data/a-snow53.avi 2721980 ./tests/data/a-snow53.avi
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
......
...@@ -2050,47 +2050,47 @@ ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ...@@ -2050,47 +2050,47 @@ ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0 ret: 0 st:-1 ts:-1.000000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:1.894167 flags:1 ret: 0 st:-1 ts:1.894167 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46872 size:3663 flags:1 ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st: 0 ts:0.800000 flags:0 ret: 0 st: 0 ts:0.800000 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31656 size:3478 flags:1 ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1 ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0 ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1 ret: 0 st:-1 ts:1.470835 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46872 size:3663 flags:1 ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st: 0 ts:0.360000 flags:0 ret: 0 st: 0 ts:0.360000 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17968 size:3228 flags:1 ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1 ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0 ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1 ret: 0 st:-1 ts:1.047503 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31656 size:3478 flags:1 ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0 ret: 0 st: 0 ts:-0.040000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.840000 flags:1 ret: 0 st: 0 ts:2.840000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63328 size:3635 flags:1 ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st:-1 ts:1.730004 flags:0 ret: 0 st:-1 ts:1.730004 flags:0
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63328 size:3635 flags:1 ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st:-1 ts:0.624171 flags:1 ret: 0 st:-1 ts:0.624171 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17968 size:3228 flags:1 ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0 ret: 0 st: 0 ts:-0.480000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.400000 flags:1 ret: 0 st: 0 ts:2.400000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63328 size:3635 flags:1 ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st:-1 ts:1.306672 flags:0 ret: 0 st:-1 ts:1.306672 flags:0
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46872 size:3663 flags:1 ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st:-1 ts:0.200839 flags:1 ret: 0 st:-1 ts:0.200839 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0 ret: 0 st: 0 ts:-0.920000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1 ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.000000 flags:1 ret: 0 st: 0 ts:2.000000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63328 size:3635 flags:1 ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st:-1 ts:0.883340 flags:0 ret: 0 st:-1 ts:0.883340 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31656 size:3478 flags:1 ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1 ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0 ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1 ret: 0 st: 0 ts:1.560000 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46872 size:3663 flags:1 ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st:-1 ts:0.460008 flags:0 ret: 0 st:-1 ts:0.460008 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17968 size:3228 flags:1 ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1 ret:-1 st:-1 ts:-0.645825 flags:1
---------------- ----------------
tests/data/a-snow53.avi tests/data/a-snow53.avi
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment