Commit 14ee5e98 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

i420_rgb: split files and clean up

parent 38917a22
...@@ -63,8 +63,8 @@ endif ...@@ -63,8 +63,8 @@ endif
# MMX # MMX
libi420_rgb_mmx_plugin_la_SOURCES = i420_rgb.c i420_rgb.h \ libi420_rgb_mmx_plugin_la_SOURCES = i420_rgb.c i420_rgb.h \
i420_rgb16.c i420_rgb_mmx.h i420_rgb16_x86.c i420_rgb_mmx.h
libi420_rgb_mmx_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) libi420_rgb_mmx_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) -DMMX
libi420_yuy2_mmx_plugin_la_SOURCES = i420_yuy2.c i420_yuy2.h libi420_yuy2_mmx_plugin_la_SOURCES = i420_yuy2.c i420_yuy2.h
libi420_yuy2_mmx_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) libi420_yuy2_mmx_plugin_la_CPPFLAGS = $(AM_CPPFLAGS)
...@@ -81,8 +81,8 @@ endif ...@@ -81,8 +81,8 @@ endif
# SSE2 # SSE2
libi420_rgb_sse2_plugin_la_SOURCES = i420_rgb.c i420_rgb.h \ libi420_rgb_sse2_plugin_la_SOURCES = i420_rgb.c i420_rgb.h \
i420_rgb16.c i420_rgb_sse2.h i420_rgb16_x86.c i420_rgb_sse2.h
libi420_rgb_sse2_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) libi420_rgb_sse2_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) -DSSE2
libi420_yuy2_sse2_plugin_la_SOURCES = i420_yuy2.c i420_yuy2.h libi420_yuy2_sse2_plugin_la_SOURCES = i420_yuy2.c i420_yuy2.h
libi420_yuy2_sse2_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) libi420_yuy2_sse2_plugin_la_CPPFLAGS = $(AM_CPPFLAGS)
......
...@@ -38,19 +38,22 @@ ...@@ -38,19 +38,22 @@
#include <vlc_cpu.h> #include <vlc_cpu.h>
#include "i420_rgb.h" #include "i420_rgb.h"
#if defined (MODULE_NAME_IS_i420_rgb) #ifdef PLAIN
# include "i420_rgb_c.h" # include "i420_rgb_c.h"
static picture_t *I420_RGB8_Filter ( filter_t *, picture_t * ); static picture_t *I420_RGB8_Filter( filter_t *, picture_t * );
// static picture_t *I420_RGB16_dither_Filter ( filter_t *, picture_t * ); static picture_t *I420_RGB16_Filter( filter_t *, picture_t * );
static picture_t *I420_RGB16_Filter ( filter_t *, picture_t * ); static picture_t *I420_RGB32_Filter( filter_t *, picture_t * );
static picture_t *I420_RGB32_Filter ( filter_t *, picture_t * );
static void SetGammaTable( int *pi_table, double f_gamma );
static void SetYUV( filter_t * );
static void Set8bppPalette( filter_t *, uint8_t * );
#else #else
static picture_t *I420_R5G5B5_Filter ( filter_t *, picture_t * ); static picture_t *I420_R5G5B5_Filter( filter_t *, picture_t * );
static picture_t *I420_R5G6B5_Filter ( filter_t *, picture_t * ); static picture_t *I420_R5G6B5_Filter( filter_t *, picture_t * );
static picture_t *I420_A8R8G8B8_Filter ( filter_t *, picture_t * ); static picture_t *I420_A8R8G8B8_Filter( filter_t *, picture_t * );
static picture_t *I420_R8G8B8A8_Filter ( filter_t *, picture_t * ); static picture_t *I420_R8G8B8A8_Filter( filter_t *, picture_t * );
static picture_t *I420_B8G8R8A8_Filter ( filter_t *, picture_t * ); static picture_t *I420_B8G8R8A8_Filter( filter_t *, picture_t * );
static picture_t *I420_A8B8G8R8_Filter ( filter_t *, picture_t * ); static picture_t *I420_A8B8G8R8_Filter( filter_t *, picture_t * );
#endif #endif
/***************************************************************************** /*****************************************************************************
...@@ -65,36 +68,27 @@ ...@@ -65,36 +68,27 @@
<< p_filter->fmt_out.video.i_lbshift)) << p_filter->fmt_out.video.i_lbshift))
/***************************************************************************** /*****************************************************************************
* Local and extern prototypes. * Module descriptor.
*****************************************************************************/ *****************************************************************************/
static int Activate ( vlc_object_t * ); static int Activate ( vlc_object_t * );
static void Deactivate ( vlc_object_t * ); static void Deactivate ( vlc_object_t * );
#if defined (MODULE_NAME_IS_i420_rgb)
static void SetGammaTable ( int *pi_table, double f_gamma );
static void SetYUV ( filter_t * );
static void Set8bppPalette ( filter_t *, uint8_t * );
#endif
/*****************************************************************************
* Module descriptor.
*****************************************************************************/
vlc_module_begin () vlc_module_begin ()
#if defined (MODULE_NAME_IS_i420_rgb) #if defined (SSE2)
set_description( N_("I420,IYUV,YV12 to "
"RGB2,RV15,RV16,RV24,RV32 conversions") )
set_capability( "video filter2", 80 )
# define vlc_CPU_capable() (true)
#elif defined (MODULE_NAME_IS_i420_rgb_mmx)
set_description( N_( "MMX I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions") )
set_capability( "video filter2", 100 )
# define vlc_CPU_capable() vlc_CPU_MMX()
#elif defined (MODULE_NAME_IS_i420_rgb_sse2)
set_description( N_( "SSE2 I420,IYUV,YV12 to " set_description( N_( "SSE2 I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions") ) "RV15,RV16,RV24,RV32 conversions") )
set_capability( "video filter2", 120 ) set_capability( "video filter2", 120 )
# define vlc_CPU_capable() vlc_CPU_SSE2() # define vlc_CPU_capable() vlc_CPU_SSE2()
#elif defined (MMX)
set_description( N_( "MMX I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions") )
set_capability( "video filter2", 100 )
# define vlc_CPU_capable() vlc_CPU_MMX()
#else
set_description( N_("I420,IYUV,YV12 to "
"RGB2,RV15,RV16,RV24,RV32 conversions") )
set_capability( "video filter2", 80 )
# define vlc_CPU_capable() (true)
#endif #endif
set_callbacks( Activate, Deactivate ) set_callbacks( Activate, Deactivate )
vlc_module_end () vlc_module_end ()
...@@ -107,7 +101,7 @@ vlc_module_end () ...@@ -107,7 +101,7 @@ vlc_module_end ()
static int Activate( vlc_object_t *p_this ) static int Activate( vlc_object_t *p_this )
{ {
filter_t *p_filter = (filter_t *)p_this; filter_t *p_filter = (filter_t *)p_this;
#if defined (MODULE_NAME_IS_i420_rgb) #ifdef PLAIN
size_t i_tables_size; size_t i_tables_size;
#endif #endif
...@@ -125,14 +119,9 @@ static int Activate( vlc_object_t *p_this ) ...@@ -125,14 +119,9 @@ static int Activate( vlc_object_t *p_this )
case VLC_CODEC_I420: case VLC_CODEC_I420:
switch( p_filter->fmt_out.video.i_chroma ) switch( p_filter->fmt_out.video.i_chroma )
{ {
#if defined (MODULE_NAME_IS_i420_rgb) #ifndef PLAIN
case VLC_CODEC_RGB8:
p_filter->pf_video_filter = I420_RGB8_Filter;
break;
#endif
case VLC_CODEC_RGB15: case VLC_CODEC_RGB15:
case VLC_CODEC_RGB16: case VLC_CODEC_RGB16:
#if ! defined (MODULE_NAME_IS_i420_rgb)
/* If we don't have support for the bitmasks, bail out */ /* If we don't have support for the bitmasks, bail out */
if( ( p_filter->fmt_out.video.i_rmask == 0x7c00 if( ( p_filter->fmt_out.video.i_rmask == 0x7c00
&& p_filter->fmt_out.video.i_gmask == 0x03e0 && p_filter->fmt_out.video.i_gmask == 0x03e0
...@@ -152,19 +141,8 @@ static int Activate( vlc_object_t *p_this ) ...@@ -152,19 +141,8 @@ static int Activate( vlc_object_t *p_this )
} }
else else
return VLC_EGENERIC; return VLC_EGENERIC;
#else
// generic C chroma converter */
p_filter->pf_video_filter = I420_RGB16_Filter;
#endif
break; break;
#if 0
/* Hmmm, is there only X11 using 32bits per pixel for RV24 ? */
case VLC_CODEC_RGB24:
#endif
case VLC_CODEC_RGB32: case VLC_CODEC_RGB32:
#if ! defined (MODULE_NAME_IS_i420_rgb)
/* If we don't have support for the bitmasks, bail out */ /* If we don't have support for the bitmasks, bail out */
if( p_filter->fmt_out.video.i_rmask == 0x00ff0000 if( p_filter->fmt_out.video.i_rmask == 0x00ff0000
&& p_filter->fmt_out.video.i_gmask == 0x0000ff00 && p_filter->fmt_out.video.i_gmask == 0x0000ff00
...@@ -200,12 +178,19 @@ static int Activate( vlc_object_t *p_this ) ...@@ -200,12 +178,19 @@ static int Activate( vlc_object_t *p_this )
} }
else else
return VLC_EGENERIC; return VLC_EGENERIC;
break;
#else #else
/* generic C chroma converter */ case VLC_CODEC_RGB8:
p_filter->pf_video_filter = I420_RGB8_Filter;
break;
case VLC_CODEC_RGB15:
case VLC_CODEC_RGB16:
p_filter->pf_video_filter = I420_RGB16_Filter;
break;
case VLC_CODEC_RGB32:
p_filter->pf_video_filter = I420_RGB32_Filter; p_filter->pf_video_filter = I420_RGB32_Filter;
#endif
break; break;
#endif
default: default:
return VLC_EGENERIC; return VLC_EGENERIC;
} }
...@@ -223,22 +208,19 @@ static int Activate( vlc_object_t *p_this ) ...@@ -223,22 +208,19 @@ static int Activate( vlc_object_t *p_this )
switch( p_filter->fmt_out.video.i_chroma ) switch( p_filter->fmt_out.video.i_chroma )
{ {
#if defined (MODULE_NAME_IS_i420_rgb) #ifdef PLAIN
case VLC_CODEC_RGB8: case VLC_CODEC_RGB8:
p_filter->p_sys->p_buffer = malloc( VOUT_MAX_WIDTH ); p_filter->p_sys->p_buffer = malloc( VOUT_MAX_WIDTH );
break; break;
#endif #endif
case VLC_CODEC_RGB15: case VLC_CODEC_RGB15:
case VLC_CODEC_RGB16: case VLC_CODEC_RGB16:
p_filter->p_sys->p_buffer = malloc( VOUT_MAX_WIDTH * 2 ); p_filter->p_sys->p_buffer = malloc( VOUT_MAX_WIDTH * 2 );
break; break;
case VLC_CODEC_RGB24: case VLC_CODEC_RGB24:
case VLC_CODEC_RGB32: case VLC_CODEC_RGB32:
p_filter->p_sys->p_buffer = malloc( VOUT_MAX_WIDTH * 4 ); p_filter->p_sys->p_buffer = malloc( VOUT_MAX_WIDTH * 4 );
break; break;
default: default:
p_filter->p_sys->p_buffer = NULL; p_filter->p_sys->p_buffer = NULL;
break; break;
...@@ -261,7 +243,7 @@ static int Activate( vlc_object_t *p_this ) ...@@ -261,7 +243,7 @@ static int Activate( vlc_object_t *p_this )
return VLC_EGENERIC; return VLC_EGENERIC;
} }
#if defined (MODULE_NAME_IS_i420_rgb) #ifdef PLAIN
switch( p_filter->fmt_out.video.i_chroma ) switch( p_filter->fmt_out.video.i_chroma )
{ {
case VLC_CODEC_RGB8: case VLC_CODEC_RGB8:
...@@ -300,7 +282,7 @@ static void Deactivate( vlc_object_t *p_this ) ...@@ -300,7 +282,7 @@ static void Deactivate( vlc_object_t *p_this )
{ {
filter_t *p_filter = (filter_t *)p_this; filter_t *p_filter = (filter_t *)p_this;
#if defined (MODULE_NAME_IS_i420_rgb) #ifdef PLAIN
free( p_filter->p_sys->p_base ); free( p_filter->p_sys->p_base );
#endif #endif
free( p_filter->p_sys->p_offset ); free( p_filter->p_sys->p_offset );
...@@ -308,21 +290,18 @@ static void Deactivate( vlc_object_t *p_this ) ...@@ -308,21 +290,18 @@ static void Deactivate( vlc_object_t *p_this )
free( p_filter->p_sys ); free( p_filter->p_sys );
} }
#if defined (MODULE_NAME_IS_i420_rgb) #ifndef PLAIN
VIDEO_FILTER_WRAPPER( I420_RGB8 )
VIDEO_FILTER_WRAPPER( I420_RGB16 )
//VIDEO_FILTER_WRAPPER( I420_RGB16_dither )
VIDEO_FILTER_WRAPPER( I420_RGB32 )
#else
VIDEO_FILTER_WRAPPER( I420_R5G5B5 ) VIDEO_FILTER_WRAPPER( I420_R5G5B5 )
VIDEO_FILTER_WRAPPER( I420_R5G6B5 ) VIDEO_FILTER_WRAPPER( I420_R5G6B5 )
VIDEO_FILTER_WRAPPER( I420_A8R8G8B8 ) VIDEO_FILTER_WRAPPER( I420_A8R8G8B8 )
VIDEO_FILTER_WRAPPER( I420_R8G8B8A8 ) VIDEO_FILTER_WRAPPER( I420_R8G8B8A8 )
VIDEO_FILTER_WRAPPER( I420_B8G8R8A8 ) VIDEO_FILTER_WRAPPER( I420_B8G8R8A8 )
VIDEO_FILTER_WRAPPER( I420_A8B8G8R8 ) VIDEO_FILTER_WRAPPER( I420_A8B8G8R8 )
#endif #else
VIDEO_FILTER_WRAPPER( I420_RGB8 )
VIDEO_FILTER_WRAPPER( I420_RGB16 )
VIDEO_FILTER_WRAPPER( I420_RGB32 )
#if defined (MODULE_NAME_IS_i420_rgb)
/***************************************************************************** /*****************************************************************************
* SetGammaTable: return intensity table transformed by gamma curve. * SetGammaTable: return intensity table transformed by gamma curve.
***************************************************************************** *****************************************************************************
...@@ -538,6 +517,4 @@ static void Set8bppPalette( filter_t *p_filter, uint8_t *p_rgb8 ) ...@@ -538,6 +517,4 @@ static void Set8bppPalette( filter_t *p_filter, uint8_t *p_rgb8 )
} }
} }
} }
#endif #endif
...@@ -21,6 +21,10 @@ ...@@ -21,6 +21,10 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/ *****************************************************************************/
#if !defined (SSE2) && !defined (MMX)
# define PLAIN
#endif
/** Number of entries in RGB palette/colormap */ /** Number of entries in RGB palette/colormap */
#define CMAP_RGB2_SIZE 256 #define CMAP_RGB2_SIZE 256
...@@ -35,7 +39,7 @@ struct filter_sys_t ...@@ -35,7 +39,7 @@ struct filter_sys_t
uint8_t *p_buffer; uint8_t *p_buffer;
int *p_offset; int *p_offset;
#ifdef MODULE_NAME_IS_i420_rgb #ifdef PLAIN
/**< Pre-calculated conversion tables */ /**< Pre-calculated conversion tables */
void *p_base; /**< base for all conversion tables */ void *p_base; /**< base for all conversion tables */
uint8_t *p_rgb8; /**< RGB 8 bits table */ uint8_t *p_rgb8; /**< RGB 8 bits table */
...@@ -55,12 +59,11 @@ struct filter_sys_t ...@@ -55,12 +59,11 @@ struct filter_sys_t
/***************************************************************************** /*****************************************************************************
* Prototypes * Prototypes
*****************************************************************************/ *****************************************************************************/
#ifdef MODULE_NAME_IS_i420_rgb #ifdef PLAIN
void I420_RGB8 ( filter_t *, picture_t *, picture_t * ); void I420_RGB8 ( filter_t *, picture_t *, picture_t * );
void I420_RGB16_dither ( filter_t *, picture_t *, picture_t * );
void I420_RGB16 ( filter_t *, picture_t *, picture_t * ); void I420_RGB16 ( filter_t *, picture_t *, picture_t * );
void I420_RGB32 ( filter_t *, picture_t *, picture_t * ); void I420_RGB32 ( filter_t *, picture_t *, picture_t * );
#else // if defined(MODULE_NAME_IS_i420_rgb_mmx) #else
void I420_R5G5B5 ( filter_t *, picture_t *, picture_t * ); void I420_R5G5B5 ( filter_t *, picture_t *, picture_t * );
void I420_R5G6B5 ( filter_t *, picture_t *, picture_t * ); void I420_R5G6B5 ( filter_t *, picture_t *, picture_t * );
void I420_A8R8G8B8 ( filter_t *, picture_t *, picture_t * ); void I420_A8R8G8B8 ( filter_t *, picture_t *, picture_t * );
......
...@@ -22,10 +22,6 @@ ...@@ -22,10 +22,6 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/ *****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
# include "config.h" # include "config.h"
#endif #endif
...@@ -35,19 +31,67 @@ ...@@ -35,19 +31,67 @@
#include <vlc_cpu.h> #include <vlc_cpu.h>
#include "i420_rgb.h" #include "i420_rgb.h"
#if defined (MODULE_NAME_IS_i420_rgb) #include "i420_rgb_c.h"
# include "i420_rgb_c.h"
# define VLC_TARGET /*****************************************************************************
#elif defined (MODULE_NAME_IS_i420_rgb_mmx) * SetOffset: build offset array for conversion functions
# include "i420_rgb_mmx.h" *****************************************************************************
# define VLC_TARGET VLC_MMX * This function will build an offset array used in later conversion functions.
#elif defined (MODULE_NAME_IS_i420_rgb_sse2) * It will also set horizontal and vertical scaling indicators.
# include "i420_rgb_sse2.h" *****************************************************************************/
# define VLC_TARGET VLC_SSE static void SetOffset( int i_width, int i_height, int i_pic_width,
#endif int i_pic_height, bool *pb_hscale,
unsigned int *pi_vscale, int *p_offset )
{
/*
* Prepare horizontal offset array
*/
if( i_pic_width - i_width == 0 )
{ /* No horizontal scaling: YUV conversion is done directly to picture */
*pb_hscale = 0;
}
else if( i_pic_width - i_width > 0 )
{ /* Prepare scaling array for horizontal extension */
int i_scale_count = i_pic_width;
*pb_hscale = 1;
for( int i_x = i_width; i_x--; )
{
while( (i_scale_count -= i_width) > 0 )
{
*p_offset++ = 0;
}
*p_offset++ = 1;
i_scale_count += i_pic_width;
}
}
else /* if( i_pic_width - i_width < 0 ) */
{ /* Prepare scaling array for horizontal reduction */
int i_scale_count = i_pic_width;
*pb_hscale = 1;
for( int i_x = i_pic_width; i_x--; )
{
*p_offset = 1;
while( (i_scale_count -= i_pic_width) > 0 )
{
*p_offset += 1;
}
p_offset++;
i_scale_count += i_width;
}
}
static void SetOffset( int, int, int, int, bool *, /*
unsigned int *, int * ); * Set vertical scaling indicator
*/
if( i_pic_height - i_height == 0 )
*pi_vscale = 0;
else if( i_pic_height - i_height > 0 )
*pi_vscale = 1;
else /* if( i_pic_height - i_height < 0 ) */
*pi_vscale = -1;
}
/***************************************************************************** /*****************************************************************************
* I420_RGB16: color YUV 4:2:0 to RGB 16 bpp * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
...@@ -60,8 +104,6 @@ static void SetOffset( int, int, int, int, bool *, ...@@ -60,8 +104,6 @@ static void SetOffset( int, int, int, int, bool *,
* - output: 1 line * - output: 1 line
*****************************************************************************/ *****************************************************************************/
#if defined (MODULE_NAME_IS_i420_rgb)
void I420_RGB16( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) void I420_RGB16( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{ {
/* We got this one from the old arguments */ /* We got this one from the old arguments */
...@@ -154,13 +196,21 @@ void I420_RGB16( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) ...@@ -154,13 +196,21 @@ void I420_RGB16( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
} }
} }
#else // ! defined (MODULE_NAME_IS_i420_rgb) /*****************************************************************************
* I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
*****************************************************************************
* Horizontal alignment needed:
* - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
* - output: 1 pixel (2 bytes), margins allowed
* Vertical alignment needed:
* - input: 2 lines (2 Y lines, 1 U/V line)
* - output: 1 line
*****************************************************************************/
VLC_TARGET void I420_RGB32( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{ {
/* We got this one from the old arguments */ /* We got this one from the old arguments */
uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels; uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS; uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS; uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS; uint8_t *p_v = p_src->V_PIXELS;
...@@ -173,11 +223,15 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) ...@@ -173,11 +223,15 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
int i_rewind; int i_rewind;
int i_scale_count; /* scale modulo counter */ int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */ int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint16_t * p_pic_start; /* beginning of the current line for copy */ uint32_t * p_pic_start; /* beginning of the current line for copy */
int i_uval, i_vval; /* U and V samples */
int i_red, i_green, i_blue; /* U and V modified samples */
uint32_t * p_yuv = p_filter->p_sys->p_rgb32;
uint32_t * p_ybase; /* Y dependant conversion table */
/* Conversion buffer pointer */ /* Conversion buffer pointer */
uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer; uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint16_t * p_buffer; uint32_t * p_buffer;
/* Offset array pointer */ /* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset; int * p_offset_start = p_filter->p_sys->p_offset;
...@@ -189,6 +243,7 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) ...@@ -189,6 +243,7 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
- p_src->p[1].i_visible_pitch; - p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch; i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1' * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
...@@ -199,138 +254,12 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) ...@@ -199,138 +254,12 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
p_filter->fmt_out.video.i_height, p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start ); &b_hscale, &i_vscale, p_offset_start );
/* /*
* Perform conversion * Perform conversion
*/ */
i_scale_count = ( i_vscale == 1 ) ? i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height : p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height; p_filter->fmt_in.video.i_height;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ ) for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{ {
p_pic_start = p_pic; p_pic_start = p_pic;
...@@ -338,16 +267,10 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) ...@@ -338,16 +267,10 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; ) for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{ {
MMX_CALL ( CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
MMX_INIT_16 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
MMX_YUV_MUL CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
MMX_YUV_ADD CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
MMX_UNPACK_15
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
} }
/* Here we do some unaligned reads and duplicate conversions, but /* Here we do some unaligned reads and duplicate conversions, but
...@@ -358,20 +281,13 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) ...@@ -358,20 +281,13 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
p_u -= i_rewind >> 1; p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1; p_v -= i_rewind >> 1;
p_buffer -= i_rewind; p_buffer -= i_rewind;
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
MMX_CALL ( CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
MMX_INIT_16 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
MMX_YUV_MUL CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
} }
SCALE_WIDTH; SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 ); SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin; p_y += i_source_margin;
if( i_y % 2 ) if( i_y % 2 )
...@@ -380,1328 +296,4 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ) ...@@ -380,1328 +296,4 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
p_v += i_source_margin_c; p_v += i_source_margin_c;
} }
} }
/* re-enable FPU registers */
MMX_END;
#endif
} }
VLC_TARGET
void I420_R5G6B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint16_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
uint16_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
{
SSE2_CALL(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
#endif
/*****************************************************************************
* I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
*****************************************************************************
* Horizontal alignment needed:
* - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
* - output: 1 pixel (2 bytes), margins allowed
* Vertical alignment needed:
* - input: 2 lines (2 Y lines, 1 U/V line)
* - output: 1 line
*****************************************************************************/
#if defined (MODULE_NAME_IS_i420_rgb)
void I420_RGB32( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
int i_uval, i_vval; /* U and V samples */
int i_red, i_green, i_blue; /* U and V modified samples */
uint32_t * p_yuv = p_filter->p_sys->p_rgb32;
uint32_t * p_ybase; /* Y dependant conversion table */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
}
#else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
VLC_TARGET
void I420_A8R8G8B8( filter_t *p_filter, picture_t *p_src,
picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
VLC_TARGET
void I420_R8G8B8A8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
VLC_TARGET
void I420_B8G8R8A8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
#else
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_BGRA
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_BGRA
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
VLC_TARGET
void I420_A8B8G8R8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
#else
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ABGR
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ABGR
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
#endif
/* Following functions are local */
/*****************************************************************************
* SetOffset: build offset array for conversion functions
*****************************************************************************
* This function will build an offset array used in later conversion functions.
* It will also set horizontal and vertical scaling indicators.
*****************************************************************************/
static void SetOffset( int i_width, int i_height, int i_pic_width,
int i_pic_height, bool *pb_hscale,
unsigned int *pi_vscale, int *p_offset )
{
int i_x; /* x position in destination */
int i_scale_count; /* modulo counter */
/*
* Prepare horizontal offset array
*/
if( i_pic_width - i_width == 0 )
{
/* No horizontal scaling: YUV conversion is done directly to picture */
*pb_hscale = 0;
}
else if( i_pic_width - i_width > 0 )
{
/* Prepare scaling array for horizontal extension */
*pb_hscale = 1;
i_scale_count = i_pic_width;
for( i_x = i_width; i_x--; )
{
while( (i_scale_count -= i_width) > 0 )
{
*p_offset++ = 0;
}
*p_offset++ = 1;
i_scale_count += i_pic_width;
}
}
else /* if( i_pic_width - i_width < 0 ) */
{
/* Prepare scaling array for horizontal reduction */
*pb_hscale = 1;
i_scale_count = i_width;
for( i_x = i_pic_width; i_x--; )
{
*p_offset = 1;
while( (i_scale_count -= i_pic_width) > 0 )
{
*p_offset += 1;
}
p_offset++;
i_scale_count += i_width;
}
}
/*
* Set vertical scaling indicator
*/
if( i_pic_height - i_height == 0 )
{
*pi_vscale = 0;
}
else if( i_pic_height - i_height > 0 )
{
*pi_vscale = 1;
}
else /* if( i_pic_height - i_height < 0 ) */
{
*pi_vscale = -1;
}
}
/*****************************************************************************
* i420_rgb16_x86.c : YUV to bitmap RGB conversion module for vlc
*****************************************************************************
* Copyright (C) 2000 VLC authors and VideoLAN
* $Id$
*
* Authors: Samuel Hocevar <sam@zoy.org>
* Damien Fouilleul <damienf@videolan.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <vlc_common.h>
#include <vlc_filter.h>
#include <vlc_cpu.h>
#include "i420_rgb.h"
#ifdef SSE2
# include "i420_rgb_sse2.h"
# define VLC_TARGET VLC_SSE
#else
# include "i420_rgb_mmx.h"
# define VLC_TARGET VLC_MMX
#endif
/*****************************************************************************
* SetOffset: build offset array for conversion functions
*****************************************************************************
* This function will build an offset array used in later conversion functions.
* It will also set horizontal and vertical scaling indicators.
*****************************************************************************/
static void SetOffset( int i_width, int i_height, int i_pic_width,
int i_pic_height, bool *pb_hscale,
unsigned int *pi_vscale, int *p_offset )
{
/*
* Prepare horizontal offset array
*/
if( i_pic_width - i_width == 0 )
{ /* No horizontal scaling: YUV conversion is done directly to picture */
*pb_hscale = 0;
}
else if( i_pic_width - i_width > 0 )
{ /* Prepare scaling array for horizontal extension */
int i_scale_count = i_pic_width;
*pb_hscale = 1;
for( int i_x = i_width; i_x--; )
{
while( (i_scale_count -= i_width) > 0 )
{
*p_offset++ = 0;
}
*p_offset++ = 1;
i_scale_count += i_pic_width;
}
}
else /* if( i_pic_width - i_width < 0 ) */
{ /* Prepare scaling array for horizontal reduction */
int i_scale_count = i_pic_width;
*pb_hscale = 1;
for( int i_x = i_pic_width; i_x--; )
{
*p_offset = 1;
while( (i_scale_count -= i_pic_width) > 0 )
{
*p_offset += 1;
}
p_offset++;
i_scale_count += i_width;
}
}
/*
* Set vertical scaling indicator
*/
if( i_pic_height - i_height == 0 )
*pi_vscale = 0;
else if( i_pic_height - i_height > 0 )
*pi_vscale = 1;
else /* if( i_pic_height - i_height < 0 ) */
*pi_vscale = -1;
}
VLC_TARGET
void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint16_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
uint16_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#ifdef SSE2
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END;
#else /* SSE2 */
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif /* SSE2 */
}
VLC_TARGET
void I420_R5G6B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint16_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
uint16_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#ifdef SSE2
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
{
SSE2_CALL(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END;
#else /* SSE2 */
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 2 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif /* SSE2 */
}
VLC_TARGET
void I420_A8R8G8B8( filter_t *p_filter, picture_t *p_src,
picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#ifdef SSE2
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END;
#else
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
VLC_TARGET
void I420_R8G8B8A8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#ifdef SSE2
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END;
#else
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
VLC_TARGET
void I420_B8G8R8A8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#ifdef SSE2
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
#else
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_BGRA
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_BGRA
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
VLC_TARGET
void I420_A8B8G8R8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
{
/* We got this one from the old arguments */
uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
uint8_t *p_y = p_src->Y_PIXELS;
uint8_t *p_u = p_src->U_PIXELS;
uint8_t *p_v = p_src->V_PIXELS;
bool b_hscale; /* horizontal scaling type */
unsigned int i_vscale; /* vertical scaling type */
unsigned int i_x, i_y; /* horizontal and vertical indexes */
int i_right_margin;
int i_rewind;
int i_scale_count; /* scale modulo counter */
int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
uint32_t * p_pic_start; /* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
uint32_t * p_buffer;
/* Offset array pointer */
int * p_offset_start = p_filter->p_sys->p_offset;
int * p_offset;
const int i_source_margin = p_src->p[0].i_pitch
- p_src->p[0].i_visible_pitch;
const int i_source_margin_c = p_src->p[1].i_pitch
- p_src->p[1].i_visible_pitch;
i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset( p_filter->fmt_in.video.i_width,
p_filter->fmt_in.video.i_height,
p_filter->fmt_out.video.i_width,
p_filter->fmt_out.video.i_height,
&b_hscale, &i_vscale, p_offset_start );
/*
* Perform conversion
*/
i_scale_count = ( i_vscale == 1 ) ?
p_filter->fmt_out.video.i_height :
p_filter->fmt_in.video.i_height;
#ifdef SSE2
i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer = b_hscale ? p_buffer_start : p_pic;
if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
p_dest->p->i_pitch|
((intptr_t)p_y)|
((intptr_t)p_buffer))) )
{
/* use faster SSE2 aligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_ALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y += 16;
p_u += 4;
p_v += 4;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
{
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
p_buffer += 16;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
SSE2_CALL (
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y += 16;
p_u += 8;
p_v += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
p_buffer = b_hscale ? p_buffer_start : p_pic;
}
}
#else
i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
{
p_pic_start = p_pic;
p_buffer = b_hscale ? p_buffer_start : p_pic;
for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
{
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ABGR
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if( i_rewind )
{
p_y -= i_rewind;
p_u -= i_rewind >> 1;
p_v -= i_rewind >> 1;
p_buffer -= i_rewind;
MMX_CALL (
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ABGR
);
p_y += 8;
p_u += 4;
p_v += 4;
p_buffer += 8;
}
SCALE_WIDTH;
SCALE_HEIGHT( 420, 4 );
p_y += i_source_margin;
if( i_y % 2 )
{
p_u += i_source_margin_c;
p_v += i_source_margin_c;
}
}
/* re-enable FPU registers */
MMX_END;
#endif
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment