dsputil.h 36 KB
Newer Older
glantau's avatar
glantau committed
1 2
/*
 * DSP utils
3
 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
glantau's avatar
glantau committed
5
 *
6 7 8
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
glantau's avatar
glantau committed
9 10
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
glantau's avatar
glantau committed
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
glantau's avatar
glantau committed
14 15 16 17 18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
glantau's avatar
glantau committed
21
 */
michaelni's avatar
michaelni committed
22 23

/**
24
 * @file libavcodec/dsputil.h
michaelni's avatar
michaelni committed
25
 * DSP utils.
michaelni's avatar
michaelni committed
26 27
 * note, many functions in here may use MMX which trashes the FPU state, it is
 * absolutely necessary to call emms_c() between dsp & float/double code
michaelni's avatar
michaelni committed
28 29
 */

30 31
#ifndef AVCODEC_DSPUTIL_H
#define AVCODEC_DSPUTIL_H
glantau's avatar
glantau committed
32

33
#include "libavutil/intreadwrite.h"
34
#include "avcodec.h"
glantau's avatar
glantau committed
35

michaelni's avatar
michaelni committed
36

michaelni's avatar
michaelni committed
37
//#define DEBUG
glantau's avatar
glantau committed
38 39
/* dct code */
typedef short DCTELEM;
40
typedef int DWTELEM;
41
typedef short IDWTELEM;
glantau's avatar
glantau committed
42

43
void fdct_ifast (DCTELEM *data);
romansh's avatar
 
romansh committed
44
void fdct_ifast248 (DCTELEM *data);
45
void ff_jpeg_fdct_islow (DCTELEM *data);
romansh's avatar
 
romansh committed
46
void ff_fdct248_islow (DCTELEM *data);
glantau's avatar
glantau committed
47 48

void j_rev_dct (DCTELEM *data);
michael's avatar
michael committed
49
void j_rev_dct4 (DCTELEM *data);
michael's avatar
michael committed
50
void j_rev_dct2 (DCTELEM *data);
michael's avatar
michael committed
51
void j_rev_dct1 (DCTELEM *data);
michael's avatar
michael committed
52
void ff_wmv2_idct_c(DCTELEM *data);
glantau's avatar
glantau committed
53

54
void ff_fdct_mmx(DCTELEM *block);
michael's avatar
michael committed
55
void ff_fdct_mmx2(DCTELEM *block);
56
void ff_fdct_sse2(DCTELEM *block);
glantau's avatar
glantau committed
57

lorenm's avatar
lorenm committed
58
void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
59
void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
60 61
void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
62 63
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
64 65 66 67
void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
68

69 70
void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
                              const float *src2, int src3, int blocksize, int step);
lorenm's avatar
lorenm committed
71 72
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
                             const float *win, float add_bias, int len);
michael's avatar
michael committed
73
void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
74
void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
75

glantau's avatar
glantau committed
76
/* encoding scans */
kabi's avatar
kabi committed
77 78 79
extern const uint8_t ff_alternate_horizontal_scan[64];
extern const uint8_t ff_alternate_vertical_scan[64];
extern const uint8_t ff_zigzag_direct[64];
romansh's avatar
 
romansh committed
80
extern const uint8_t ff_zigzag248_direct[64];
81

glantau's avatar
glantau committed
82
/* pixel operations */
michael's avatar
michael committed
83
#define MAX_NEG_CROP 1024
glantau's avatar
glantau committed
84 85

/* temporary */
mru's avatar
mru committed
86
extern uint32_t ff_squareTbl[512];
mru's avatar
mru committed
87
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
glantau's avatar
glantau committed
88

89
/* VP3 DSP functions */
90 91 92
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
glantau's avatar
glantau committed
93

conrad's avatar
conrad committed
94 95 96
void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);

97 98 99 100
/* VP6 DSP functions */
void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
                           const int16_t *h_weights, const int16_t *v_weights);

101 102 103 104 105
/* 1/2^n downscaling functions from imgconvert.c */
void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
106 107 108

void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
              int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
109

michaelni's avatar
michaelni committed
110
/* minimum alignment rules ;)
diego's avatar
diego committed
111 112 113 114 115 116
If you notice errors in the align stuff, need more alignment for some ASM code
for some CPU or need to use a function with less aligned data then send a mail
to the ffmpeg-devel mailing list, ...

!warning These alignments might not match reality, (missing attribute((align))
stuff somewhere possible).
diego's avatar
diego committed
117
I (Michael) did not check them, these are just the alignments which I think
diego's avatar
diego committed
118
could be reached easily ...
glantau's avatar
glantau committed
119

michaelni's avatar
michaelni committed
120 121 122
!future video codecs might need functions with less strict alignment
*/

123
/*
kabi's avatar
kabi committed
124 125 126 127
void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
128
void clear_blocks_c(DCTELEM *blocks);
129
*/
glantau's avatar
glantau committed
130 131

/* add and put pixel (decoding) */
michaelni's avatar
michaelni committed
132
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
michael's avatar
michael committed
133
//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
kabi's avatar
kabi committed
134
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
135
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
kabi's avatar
kabi committed
136
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
michaelni's avatar
michaelni committed
137
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
lorenm's avatar
lorenm committed
138
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
lorenm's avatar
lorenm committed
139
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
michaelni's avatar
michaelni committed
140

michaelni's avatar
michaelni committed
141
#define DEF_OLD_QPEL(name)\
kabi's avatar
kabi committed
142 143 144
void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
michaelni's avatar
michaelni committed
145 146 147 148 149 150 151 152 153 154 155 156 157

DEF_OLD_QPEL(qpel16_mc11_old_c)
DEF_OLD_QPEL(qpel16_mc31_old_c)
DEF_OLD_QPEL(qpel16_mc12_old_c)
DEF_OLD_QPEL(qpel16_mc32_old_c)
DEF_OLD_QPEL(qpel16_mc13_old_c)
DEF_OLD_QPEL(qpel16_mc33_old_c)
DEF_OLD_QPEL(qpel8_mc11_old_c)
DEF_OLD_QPEL(qpel8_mc31_old_c)
DEF_OLD_QPEL(qpel8_mc12_old_c)
DEF_OLD_QPEL(qpel8_mc32_old_c)
DEF_OLD_QPEL(qpel8_mc13_old_c)
DEF_OLD_QPEL(qpel8_mc33_old_c)
michaelni's avatar
michaelni committed
158 159 160 161 162 163

#define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    b(block  , pixels  , line_size, h);\
    b(block+n, pixels+n, line_size, h);\
}
michaelni's avatar
michaelni committed
164

glantau's avatar
glantau committed
165
/* motion estimation */
michael's avatar
michael committed
166
// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
diego's avatar
diego committed
167
// although currently h<4 is not used as functions with width <8 are neither used nor implemented
michael's avatar
michael committed
168
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
michaelni's avatar
michaelni committed
169

michaelni's avatar
michaelni committed
170

171 172 173
// for snow slices
typedef struct slice_buffer_s slice_buffer;

174 175 176 177 178 179 180
/**
 * Scantable.
 */
typedef struct ScanTable{
    const uint8_t *scantable;
    uint8_t permutated[64];
    uint8_t raster_end[64];
181
#if ARCH_PPC
182
                /** Used by dct_quantize_altivec to find last-non-zero */
aurel's avatar
aurel committed
183
    DECLARE_ALIGNED(16, uint8_t, inverse[64]);
184 185 186 187 188
#endif
} ScanTable;

void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);

189 190 191 192
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
                         int block_w, int block_h,
                         int src_x, int src_y, int w, int h);

michaelni's avatar
michaelni committed
193 194 195
/**
 * DSPContext.
 */
196 197
typedef struct DSPContext {
    /* pixel ops : interface with DCT */
kabi's avatar
kabi committed
198 199 200
    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
201
    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
kabi's avatar
kabi committed
202
    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
lorenm's avatar
lorenm committed
203 204
    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
205
    int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
michaelni's avatar
michaelni committed
206 207 208
    /**
     * translational global motion compensation.
     */
kabi's avatar
kabi committed
209
    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
michaelni's avatar
michaelni committed
210 211 212
    /**
     * global motion compensation.
     */
kabi's avatar
kabi committed
213
    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
214
                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
lorenm's avatar
lorenm committed
215
    void (*clear_block)(DCTELEM *block/*align 16*/);
216
    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
kabi's avatar
kabi committed
217 218
    int (*pix_sum)(uint8_t * pix, int line_size);
    int (*pix_norm1)(uint8_t * pix, int line_size);
michael's avatar
michael committed
219
// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
220

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
    me_cmp_func sse[6];
    me_cmp_func hadamard8_diff[6];
    me_cmp_func dct_sad[6];
    me_cmp_func quant_psnr[6];
    me_cmp_func bit[6];
    me_cmp_func rd[6];
    me_cmp_func vsad[6];
    me_cmp_func vsse[6];
    me_cmp_func nsse[6];
    me_cmp_func w53[6];
    me_cmp_func w97[6];
    me_cmp_func dct_max[6];
    me_cmp_func dct264_sad[6];

    me_cmp_func me_pre_cmp[6];
    me_cmp_func me_cmp[6];
    me_cmp_func me_sub_cmp[6];
    me_cmp_func mb_cmp[6];
    me_cmp_func ildct_cmp[6]; //only width 16 used
    me_cmp_func frame_skip_cmp[6]; //only width 8 used
242

243 244
    int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
                             int size);
245

michaelni's avatar
michaelni committed
246 247
    /**
     * Halfpel motion compensation with rounding (a+b+1)>>1.
lu_zero's avatar
lu_zero committed
248
     * this is an array[4][4] of motion compensation functions for 4
michael's avatar
michael committed
249
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
michaelni's avatar
michaelni committed
250
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
michaelni's avatar
michaelni committed
251 252 253 254 255
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
256
    op_pixels_func put_pixels_tab[4][4];
michaelni's avatar
michaelni committed
257 258 259

    /**
     * Halfpel motion compensation with rounding (a+b+1)>>1.
260
     * This is an array[4][4] of motion compensation functions for 4
michael's avatar
michael committed
261
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
michaelni's avatar
michaelni committed
262
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
michaelni's avatar
michaelni committed
263 264 265 266 267
     * @param block destination into which the result is averaged (a+b+1)>>1
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
268
    op_pixels_func avg_pixels_tab[4][4];
michaelni's avatar
michaelni committed
269 270 271

    /**
     * Halfpel motion compensation with no rounding (a+b)>>1.
lu_zero's avatar
lu_zero committed
272
     * this is an array[2][4] of motion compensation functions for 2
michaelni's avatar
doxy  
michaelni committed
273
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
michaelni's avatar
michaelni committed
274
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
michaelni's avatar
michaelni committed
275 276 277 278 279
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
michael's avatar
michael committed
280
    op_pixels_func put_no_rnd_pixels_tab[4][4];
michaelni's avatar
michaelni committed
281 282 283

    /**
     * Halfpel motion compensation with no rounding (a+b)>>1.
lu_zero's avatar
lu_zero committed
284
     * this is an array[2][4] of motion compensation functions for 2
michaelni's avatar
doxy  
michaelni committed
285
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
michaelni's avatar
michaelni committed
286
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
michaelni's avatar
michaelni committed
287 288 289 290 291
     * @param block destination into which the result is averaged (a+b)>>1
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
michael's avatar
michael committed
292
    op_pixels_func avg_no_rnd_pixels_tab[4][4];
293

294
    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
295

296 297
    /**
     * Thirdpel motion compensation with rounding (a+b+1)>>1.
lu_zero's avatar
lu_zero committed
298 299
     * this is an array[12] of motion compensation functions for the 9 thirdpe
     * positions<br>
300 301 302 303 304 305 306
     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
307 308
    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?

309 310 311 312
    qpel_mc_func put_qpel_pixels_tab[2][16];
    qpel_mc_func avg_qpel_pixels_tab[2][16];
    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
michaelni's avatar
michaelni committed
313
    qpel_mc_func put_mspel_pixels_tab[8];
314

michaelni's avatar
michaelni committed
315
    /**
lu_zero's avatar
lu_zero committed
316
     * h264 Chroma MC
michaelni's avatar
michaelni committed
317 318 319
     */
    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
320 321
    /* This is really one func used in VC-1 decoding */
    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
322
    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
323

324 325
    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
326

327 328 329
    qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
    qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];

lorenm's avatar
lorenm committed
330 331
    h264_weight_func weight_h264_pixels_tab[10];
    h264_biweight_func biweight_h264_pixels_tab[10];
332

333 334 335 336 337 338 339 340 341
    /* AVS specific */
    qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
    qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
    void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);

michael's avatar
michael committed
342
    me_cmp_func pix_abs[2][4];
343

michaelni's avatar
michaelni committed
344 345
    /* huffyuv specific */
    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
lorenm's avatar
lorenm committed
346
    void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
michaelni's avatar
michaelni committed
347
    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
348 349 350 351 352
    /**
     * subtract huffyuv's variant of median prediction
     * note, this might read from src1[-1], src2[-1]
     */
    void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
lorenm's avatar
lorenm committed
353
    void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top);
lorenm's avatar
lorenm committed
354 355
    /* this might write to dst[w] */
    void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
michael's avatar
michael committed
356
    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
lorenm's avatar
lorenm committed
357

lorenm's avatar
lorenm committed
358 359 360
    void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
    /* v/h_loop_filter_luma_intra: align 16 */
361 362
    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
lorenm's avatar
lorenm committed
363 364 365 366
    void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
367 368
    // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
369
                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
370

michael's avatar
michael committed
371 372 373
    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);

michael's avatar
michael committed
374
    void (*h261_loop_filter)(uint8_t *src, int stride);
375

michael's avatar
michael committed
376 377 378
    void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);

conrad's avatar
conrad committed
379 380 381
    void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
    void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);

382 383 384
    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
                             const int16_t *h_weights,const int16_t *v_weights);

385
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
386
    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
lorenm's avatar
lorenm committed
387
    void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
lorenm's avatar
lorenm committed
388 389
    /* no alignment needed */
    void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
390
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
lorenm's avatar
lorenm committed
391
    void (*vector_fmul)(float *dst, const float *src, int len);
392 393 394
    void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
    void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
lorenm's avatar
lorenm committed
395 396
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
lorenm's avatar
lorenm committed
397 398
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
399
    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
400 401

    /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
402
     * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
michael's avatar
michael committed
403
    void (*float_to_int16)(int16_t *dst, const float *src, long len);
404
    void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
405

406 407
    /* (I)DCT */
    void (*fdct)(DCTELEM *block/* align 16*/);
romansh's avatar
 
romansh committed
408
    void (*fdct248)(DCTELEM *block/* align 16*/);
409

410 411
    /* IDCT really*/
    void (*idct)(DCTELEM *block/* align 16*/);
412

michaelni's avatar
michaelni committed
413
    /**
michaelni's avatar
michaelni committed
414
     * block -> idct -> clip to unsigned 8 bit -> dest.
michaelni's avatar
michaelni committed
415
     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
takis's avatar
takis committed
416
     * @param line_size size in bytes of a horizontal line of dest
michaelni's avatar
michaelni committed
417
     */
418
    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
419

michaelni's avatar
michaelni committed
420 421
    /**
     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
takis's avatar
takis committed
422
     * @param line_size size in bytes of a horizontal line of dest
michaelni's avatar
michaelni committed
423
     */
424
    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
425

michaelni's avatar
michaelni committed
426
    /**
michaelni's avatar
michaelni committed
427
     * idct input permutation.
michaelni's avatar
michaelni committed
428 429 430 431
     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
     * IDCT)
     * this permutation must be performed before the idct_put/add, note, normally this can be merged
     * with the zigzag/alternate scan<br>
michaelni's avatar
michaelni committed
432 433 434 435 436 437
     * an example to avoid confusion:
     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
     * - (x -> referece dct -> reference idct -> x)
     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
     */
438 439 440 441 442 443
    uint8_t idct_permutation[64];
    int idct_permutation_type;
#define FF_NO_IDCT_PERM 1
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
444
#define FF_PARTTRANS_IDCT_PERM 5
445
#define FF_SSE2_IDCT_PERM 6
446

447 448 449 450
    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
#define BASIS_SHIFT 16
#define RECON_SHIFT 6
451

aurel's avatar
aurel committed
452
    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
453
#define EDGE_WIDTH 16
aurel's avatar
aurel committed
454

455
    /* h264 functions */
michael's avatar
michael committed
456 457 458 459
    /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
       NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
        The reason for above, is that no 2 out of one list may use a different permutation.
    */
lorenm's avatar
lorenm committed
460 461 462 463
    void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
464
    void (*h264_dct)(DCTELEM block[4][4]);
465 466 467 468
    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
469 470

    /* snow wavelet */
471 472
    void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
    void (*horizontal_compose97i)(IDWTELEM *b, int width);
473
    void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
474 475

    void (*prefetch)(void *mem, int stride, int h);
476 477

    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
478

479
    /* mlp/truehd functions */
480 481
    void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
                               int firorder, int iirorder,
482 483 484
                               unsigned int filter_shift, int32_t mask, int blocksize,
                               int32_t *sample_buffer);

485 486
    /* vc1 functions */
    void (*vc1_inv_trans_8x8)(DCTELEM *b);
487 488 489
    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
490 491 492 493
    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
494 495
    void (*vc1_v_overlap)(uint8_t* src, int stride);
    void (*vc1_h_overlap)(uint8_t* src, int stride);
496 497 498 499 500 501
    void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
    void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
    void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
    void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
    void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
    void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
502 503 504 505
    /* put 8x8 block with bicubic interpolation and quarterpel precision
     * last argument is actually round value instead of height
     */
    op_pixels_func put_vc1_mspel_pixels_tab[16];
506
    op_pixels_func avg_vc1_mspel_pixels_tab[16];
michael's avatar
michael committed
507 508

    /* intrax8 functions */
509 510
    void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
    void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
michael's avatar
michael committed
511 512
           int * range, int * sum,  int edges);

513 514 515
    /* ape functions */
    /**
     * Add contents of the second vector to the first one.
516
     * @param len length of vectors, should be multiple of 16
517 518 519 520
     */
    void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
    /**
     * Add contents of the second vector to the first one.
521
     * @param len length of vectors, should be multiple of 16
522 523 524 525
     */
    void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
    /**
     * Calculate scalar product of two vectors.
526
     * @param len length of vectors, should be multiple of 16
527 528 529
     * @param shift number of bits to discard from product
     */
    int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
kostya's avatar
kostya committed
530

kostya's avatar
kostya committed
531 532 533 534
    /* rv30 functions */
    qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
    qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];

kostya's avatar
kostya committed
535 536 537 538 539
    /* rv40 functions */
    qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
    qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
    h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
    h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
540 541
} DSPContext;

mru's avatar
mru committed
542
void dsputil_static_init(void);
543
void dsputil_init(DSPContext* p, AVCodecContext *avctx);
glantau's avatar
glantau committed
544

545 546
int ff_check_alignment(void);

michaelni's avatar
michaelni committed
547 548 549 550
/**
 * permute block according to permuatation.
 * @param last last non zero element in scantable order
 */
kabi's avatar
kabi committed
551
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
glantau's avatar
glantau committed
552

michael's avatar
michael committed
553 554
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);

555
#define         BYTE_VEC32(c)   ((c)*0x01010101UL)
michaelni's avatar
michaelni committed
556 557 558 559 560 561 562 563 564 565 566

static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
{
    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}

static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
{
    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}

michael's avatar
michael committed
567 568 569 570 571 572 573 574 575 576 577 578
static inline int get_penalty_factor(int lambda, int lambda2, int type){
    switch(type&0xFF){
    default:
    case FF_CMP_SAD:
        return lambda>>FF_LAMBDA_SHIFT;
    case FF_CMP_DCT:
        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
    case FF_CMP_W53:
        return (4*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_W97:
        return (2*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_SATD:
579
    case FF_CMP_DCT264:
michael's avatar
michael committed
580 581 582 583 584 585 586 587 588 589 590
        return (2*lambda)>>FF_LAMBDA_SHIFT;
    case FF_CMP_RD:
    case FF_CMP_PSNR:
    case FF_CMP_SSE:
    case FF_CMP_NSSE:
        return lambda2>>FF_LAMBDA_SHIFT;
    case FF_CMP_BIT:
        return 1;
    }
}

michaelni's avatar
michaelni committed
591
/**
michaelni's avatar
michaelni committed
592
 * Empty mmx state.
michaelni's avatar
michaelni committed
593 594 595
 * this must be called between any dsp function and float/double code.
 * for example sin(); dsp->idct_put(); emms_c(); cos()
 */
596 597
#define emms_c()

598 599 600 601
/* should be defined by architectures supporting
   one or more MultiMedia extension */
int mm_support(void);

602
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
mru's avatar
mru committed
603
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
604 605 606 607 608 609 610 611
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);

612
#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v)
613

614
#if HAVE_MMX
glantau's avatar
glantau committed
615

kabi's avatar
kabi committed
616
#undef emms_c
617

mru's avatar
mru committed
618 619
extern int mm_flags;

kabi's avatar
kabi committed
620 621
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
622
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
glantau's avatar
glantau committed
623 624 625

static inline void emms(void)
{
626
    __asm__ volatile ("emms;":::"memory");
627 628
}

michaelni's avatar
michaelni committed
629

630 631
#define emms_c() \
{\
632
    if (mm_flags & FF_MM_MMX)\
633
        emms();\
glantau's avatar
glantau committed
634 635
}

636
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
kabi's avatar
kabi committed
637

638
#elif ARCH_ARM
glantau's avatar
glantau committed
639

mru's avatar
mru committed
640 641
extern int mm_flags;

642
#if HAVE_NEON
643 644 645 646
#   define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
#   define STRIDE_ALIGN 16
#endif

647
#elif ARCH_PPC
648

mru's avatar
mru committed
649 650
extern int mm_flags;

651
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
michael's avatar
michael committed
652
#define STRIDE_ALIGN 16
653

654
#elif HAVE_MMI
655

656
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
michael's avatar
michael committed
657
#define STRIDE_ALIGN 16
658

659 660 661 662 663
#else

#define mm_flags 0
#define mm_support() 0

664
#endif
glantau's avatar
glantau committed
665

666 667 668
#ifndef DECLARE_ALIGNED_8
#   define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v)
#endif
glantau's avatar
glantau committed
669

670 671
#ifndef STRIDE_ALIGN
#   define STRIDE_ALIGN 8
glantau's avatar
glantau committed
672 673
#endif

674
/* PSNR */
kabi's avatar
kabi committed
675
void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
676 677
              int orig_linesize[3], int coded_linesize,
              AVCodecContext *avctx);
678 679 680 681 682 683 684

/* FFT computation */

/* NOTE: soon integer code will be added, so you must use the
   FFTSample type */
typedef float FFTSample;

lorenm's avatar
lorenm committed
685 686
struct MDCTContext;

687 688 689 690 691 692 693 694 695 696
typedef struct FFTComplex {
    FFTSample re, im;
} FFTComplex;

typedef struct FFTContext {
    int nbits;
    int inverse;
    uint16_t *revtab;
    FFTComplex *exptab;
    FFTComplex *exptab1; /* only used by SSE code */
lorenm's avatar
lorenm committed
697 698
    FFTComplex *tmp_buf;
    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
699
    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
lorenm's avatar
lorenm committed
700 701
    void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
    void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
702 703
} FFTContext;

704 705
extern FFTSample* ff_cos_tabs[13];

alexc's avatar
alexc committed
706 707 708 709 710
/**
 * Sets up a complex FFT.
 * @param nbits           log2 of the length of the input array
 * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
 */
711
int ff_fft_init(FFTContext *s, int nbits, int inverse);
lorenm's avatar
lorenm committed
712 713
void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
714 715
void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
716 717
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
718
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
719

alexc's avatar
alexc committed
720 721 722
/**
 * Do the permutation needed BEFORE calling ff_fft_calc().
 */
lorenm's avatar
lorenm committed
723 724 725 726
static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
{
    s->fft_permute(s, z);
}
alexc's avatar
alexc committed
727 728 729 730
/**
 * Do a complex FFT with the parameters defined in ff_fft_init(). The
 * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
 */
731
static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
732 733 734
{
    s->fft_calc(s, z);
}
735
void ff_fft_end(FFTContext *s);
736 737 738 739 740 741 742 743 744 745 746 747

/* MDCT computation */

typedef struct MDCTContext {
    int n;  /* size of MDCT (i.e. number of input data * 2) */
    int nbits; /* n = 2^nbits */
    /* pre/post rotation tables */
    FFTSample *tcos;
    FFTSample *tsin;
    FFTContext fft;
} MDCTContext;

lorenm's avatar
lorenm committed
748 749 750 751 752 753 754 755 756
static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input)
{
    s->fft.imdct_calc(s, output, input);
}
static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input)
{
    s->fft.imdct_half(s, output, input);
}

757 758 759
/**
 * Generate a Kaiser-Bessel Derived Window.
 * @param   window  pointer to half window
760 761
 * @param   alpha   determines window shape
 * @param   n       size of half window
762
 */
763
void ff_kbd_window_init(float *window, float alpha, int n);
764

765 766 767 768 769 770
/**
 * Generate a sine window.
 * @param   window  pointer to half window
 * @param   n       size of half window
 */
void ff_sine_window_init(float *window, int n);
771 772 773 774 775
extern float ff_sine_128 [ 128];
extern float ff_sine_256 [ 256];
extern float ff_sine_512 [ 512];
extern float ff_sine_1024[1024];
extern float ff_sine_2048[2048];
776 777
extern float ff_sine_4096[4096];
extern float *ff_sine_windows[6];
778

779
int ff_mdct_init(MDCTContext *s, int nbits, int inverse, double scale);
lorenm's avatar
lorenm committed
780 781
void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
lorenm's avatar
lorenm committed
782
void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
lorenm's avatar
lorenm committed
783
void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
lorenm's avatar
lorenm committed
784
void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
lorenm's avatar
lorenm committed
785
void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
lorenm's avatar
lorenm committed
786
void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
lorenm's avatar
lorenm committed
787
void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
lorenm's avatar
lorenm committed
788
void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input);
bellard's avatar
bellard committed
789
void ff_mdct_end(MDCTContext *s);
790

791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
/* Real Discrete Fourier Transform */

enum RDFTransformType {
    RDFT,
    IRDFT,
    RIDFT,
    IRIDFT,
};

typedef struct {
    int nbits;
    int inverse;
    int sign_convention;

    /* pre/post rotation tables */
    FFTSample *tcos;
    FFTSample *tsin;
    FFTContext fft;
} RDFTContext;

/**
 * Sets up a real FFT.
 * @param nbits           log2 of the length of the input array
 * @param trans           the type of transform
 */
int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
void ff_rdft_calc(RDFTContext *s, FFTSample *data);
void ff_rdft_end(RDFTContext *s);

820
#define WRAPPER8_16(name8, name16)\
michael's avatar
michael committed
821 822 823 824 825
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    return name8(s, dst           , src           , stride, h)\
          +name8(s, dst+8         , src+8         , stride, h);\
}

826
#define WRAPPER8_16_SQ(name8, name16)\
michael's avatar
michael committed
827 828 829 830 831 832 833 834 835 836 837
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    int score=0;\
    score +=name8(s, dst           , src           , stride, 8);\
    score +=name8(s, dst+8         , src+8         , stride, 8);\
    if(h==16){\
        dst += 8*stride;\
        src += 8*stride;\
        score +=name8(s, dst           , src           , stride, 8);\
        score +=name8(s, dst+8         , src+8         , stride, 8);\
    }\
    return score;\
michaelni's avatar
michaelni committed
838 839
}

840

alexc's avatar
alexc committed
841
static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
842 843 844 845
{
    int i;
    for(i=0; i<h; i++)
    {
846
        AV_WN16(dst   , AV_RN16(src   ));
847 848 849 850 851
        dst+=dstStride;
        src+=srcStride;
    }
}

alexc's avatar
alexc committed
852
static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
853 854 855 856
{
    int i;
    for(i=0; i<h; i++)
    {
857
        AV_WN32(dst   , AV_RN32(src   ));
858 859 860 861 862
        dst+=dstStride;
        src+=srcStride;
    }
}

alexc's avatar
alexc committed
863
static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
864 865 866 867
{
    int i;
    for(i=0; i<h; i++)
    {
868 869
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
870 871 872 873 874
        dst+=dstStride;
        src+=srcStride;
    }
}

alexc's avatar
alexc committed
875
static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
876 877 878 879
{
    int i;
    for(i=0; i<h; i++)
    {
880 881
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
882 883 884 885 886 887
        dst[8]= src[8];
        dst+=dstStride;
        src+=srcStride;
    }
}

alexc's avatar
alexc committed
888
static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
889 890 891 892
{
    int i;
    for(i=0; i<h; i++)
    {
893 894 895 896
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
        AV_WN32(dst+8 , AV_RN32(src+8 ));
        AV_WN32(dst+12, AV_RN32(src+12));
897 898 899 900 901
        dst+=dstStride;
        src+=srcStride;
    }
}

alexc's avatar
alexc committed
902
static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
903 904 905 906
{
    int i;
    for(i=0; i<h; i++)
    {
907 908 909 910
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
        AV_WN32(dst+8 , AV_RN32(src+8 ));
        AV_WN32(dst+12, AV_RN32(src+12));
911 912 913 914 915 916
        dst[16]= src[16];
        dst+=dstStride;
        src+=srcStride;
    }
}

917
#endif /* AVCODEC_DSPUTIL_H */