Commit df159f06 authored by gpoirier's avatar gpoirier

First part of a series of speed-enchancing patches.

This one sets up a snow.h and makes snow use the dsputil function pointer
framework to access the three functions that will be implemented in asm
in the other parts of the patchset.
Patch by Robert Edele < yartrebo AH earthlink POIS net>
Original thread:
Subject: [Ffmpeg-devel] [PATCH] Snow mmx+sse2 asm optimizations
Date: Sun, 05 Feb 2006 12:47:14 -0500


git-svn-id: file:///var/local/repositories/ffmpeg/trunk@5172 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
parent 923428c7
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "mpegvideo.h" #include "mpegvideo.h"
#include "simple_idct.h" #include "simple_idct.h"
#include "faandct.h" #include "faandct.h"
#include "snow.h"
/* snow.c */ /* snow.c */
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
...@@ -4047,6 +4048,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -4047,6 +4048,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->try_8x8basis= try_8x8basis_c; c->try_8x8basis= try_8x8basis_c;
c->add_8x8basis= add_8x8basis_c; c->add_8x8basis= add_8x8basis_c;
c->vertical_compose97i = ff_snow_vertical_compose97i;
c->horizontal_compose97i = ff_snow_horizontal_compose97i;
c->inner_add_yblock = ff_snow_inner_add_yblock;
#ifdef HAVE_MMX #ifdef HAVE_MMX
dsputil_init_mmx(c, avctx); dsputil_init_mmx(c, avctx);
#endif #endif
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
//#define DEBUG //#define DEBUG
/* dct code */ /* dct code */
typedef short DCTELEM; typedef short DCTELEM;
typedef int DWTELEM;
void fdct_ifast (DCTELEM *data); void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data); void fdct_ifast248 (DCTELEM *data);
...@@ -133,6 +134,9 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ ...@@ -133,6 +134,9 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
// for snow slices
typedef struct slice_buffer_s slice_buffer;
/** /**
* DSPContext. * DSPContext.
*/ */
...@@ -334,6 +338,11 @@ typedef struct DSPContext { ...@@ -334,6 +338,11 @@ typedef struct DSPContext {
void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride); void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
/* snow wavelet */
void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
void (*horizontal_compose97i)(DWTELEM *b, int width);
void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
} DSPContext; } DSPContext;
void dsputil_static_init(void); void dsputil_static_init(void);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# define REG_d "rdx" # define REG_d "rdx"
# define REG_D "rdi" # define REG_D "rdi"
# define REG_S "rsi" # define REG_S "rsi"
# define PTR_SIZE "8"
#else #else
# define REG_a "eax" # define REG_a "eax"
# define REG_b "ebx" # define REG_b "ebx"
...@@ -19,6 +20,7 @@ ...@@ -19,6 +20,7 @@
# define REG_d "edx" # define REG_d "edx"
# define REG_D "edi" # define REG_D "edi"
# define REG_S "esi" # define REG_S "esi"
# define PTR_SIZE "4"
#endif #endif
/* /*
......
This diff is collapsed.
/*
* Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
* Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _SNOW_H
#define _SNOW_H
#include "dsputil.h"
#define MID_STATE 128
#define MAX_DECOMPOSITIONS 8
#define MAX_PLANES 4
#define QSHIFT 5
#define QROOT (1<<QSHIFT)
#define LOSSLESS_QLOG -128
#define FRAC_BITS 8
#define LOG2_OBMC_MAX 6
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
struct slice_buffer_s {
DWTELEM * * line; ///< For use by idwt and predict_slices.
DWTELEM * * data_stack; ///< Used for internal purposes.
int data_stack_top;
int line_count;
int line_width;
int data_count;
DWTELEM * base_buffer; ///< Buffer that this structure is caching.
};
#define liftS lift
#define lift5 lift
#if 1
#define W_AM 3
#define W_AO 0
#define W_AS 1
#undef liftS
#define W_BM 1
#define W_BO 8
#define W_BS 4
#define W_CM 1
#define W_CO 0
#define W_CS 0
#define W_DM 3
#define W_DO 4
#define W_DS 3
#elif 0
#define W_AM 55
#define W_AO 16
#define W_AS 5
#define W_BM 3
#define W_BO 32
#define W_BS 6
#define W_CM 127
#define W_CO 64
#define W_CS 7
#define W_DM 7
#define W_DO 8
#define W_DS 4
#elif 0
#define W_AM 97
#define W_AO 32
#define W_AS 6
#define W_BM 63
#define W_BO 512
#define W_BS 10
#define W_CM 13
#define W_CO 8
#define W_CS 4
#define W_DM 15
#define W_DO 16
#define W_DS 5
#else
#define W_AM 203
#define W_AO 64
#define W_AS 7
#define W_BM 217
#define W_BO 2048
#define W_BS 12
#define W_CM 113
#define W_CO 64
#define W_CS 7
#define W_DM 227
#define W_DO 128
#define W_DS 9
#endif
extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width);
extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment