Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
14ee5e98
Commit
14ee5e98
authored
Sep 14, 2013
by
Rémi Denis-Courmont
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
i420_rgb: split files and clean up
parent
38917a22
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
1613 additions
and
1574 deletions
+1613
-1574
modules/video_chroma/Makefile.am
modules/video_chroma/Makefile.am
+4
-4
modules/video_chroma/i420_rgb.c
modules/video_chroma/i420_rgb.c
+47
-70
modules/video_chroma/i420_rgb.h
modules/video_chroma/i420_rgb.h
+7
-4
modules/video_chroma/i420_rgb16.c
modules/video_chroma/i420_rgb16.c
+88
-1496
modules/video_chroma/i420_rgb16_x86.c
modules/video_chroma/i420_rgb16_x86.c
+1467
-0
No files found.
modules/video_chroma/Makefile.am
View file @
14ee5e98
...
...
@@ -63,8 +63,8 @@ endif
# MMX
libi420_rgb_mmx_plugin_la_SOURCES
=
i420_rgb.c i420_rgb.h
\
i420_rgb16.c i420_rgb_mmx.h
libi420_rgb_mmx_plugin_la_CPPFLAGS
=
$(AM_CPPFLAGS)
i420_rgb16
_x86
.c i420_rgb_mmx.h
libi420_rgb_mmx_plugin_la_CPPFLAGS
=
$(AM_CPPFLAGS)
-DMMX
libi420_yuy2_mmx_plugin_la_SOURCES
=
i420_yuy2.c i420_yuy2.h
libi420_yuy2_mmx_plugin_la_CPPFLAGS
=
$(AM_CPPFLAGS)
...
...
@@ -81,8 +81,8 @@ endif
# SSE2
libi420_rgb_sse2_plugin_la_SOURCES
=
i420_rgb.c i420_rgb.h
\
i420_rgb16.c i420_rgb_sse2.h
libi420_rgb_sse2_plugin_la_CPPFLAGS
=
$(AM_CPPFLAGS)
i420_rgb16
_x86
.c i420_rgb_sse2.h
libi420_rgb_sse2_plugin_la_CPPFLAGS
=
$(AM_CPPFLAGS)
-DSSE2
libi420_yuy2_sse2_plugin_la_SOURCES
=
i420_yuy2.c i420_yuy2.h
libi420_yuy2_sse2_plugin_la_CPPFLAGS
=
$(AM_CPPFLAGS)
...
...
modules/video_chroma/i420_rgb.c
View file @
14ee5e98
...
...
@@ -38,19 +38,22 @@
#include <vlc_cpu.h>
#include "i420_rgb.h"
#if
defined (MODULE_NAME_IS_i420_rgb)
#if
def PLAIN
# include "i420_rgb_c.h"
static
picture_t
*
I420_RGB8_Filter
(
filter_t
*
,
picture_t
*
);
// static picture_t *I420_RGB16_dither_Filter ( filter_t *, picture_t * );
static
picture_t
*
I420_RGB16_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_RGB32_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_RGB8_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_RGB16_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_RGB32_Filter
(
filter_t
*
,
picture_t
*
);
static
void
SetGammaTable
(
int
*
pi_table
,
double
f_gamma
);
static
void
SetYUV
(
filter_t
*
);
static
void
Set8bppPalette
(
filter_t
*
,
uint8_t
*
);
#else
static
picture_t
*
I420_R5G5B5_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_R5G6B5_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_A8R8G8B8_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_R8G8B8A8_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_B8G8R8A8_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_A8B8G8R8_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_R5G5B5_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_R5G6B5_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_A8R8G8B8_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_R8G8B8A8_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_B8G8R8A8_Filter
(
filter_t
*
,
picture_t
*
);
static
picture_t
*
I420_A8B8G8R8_Filter
(
filter_t
*
,
picture_t
*
);
#endif
/*****************************************************************************
...
...
@@ -65,36 +68,27 @@
<< p_filter->fmt_out.video.i_lbshift))
/*****************************************************************************
*
Local and extern prototypes
.
*
Module descriptor
.
*****************************************************************************/
static
int
Activate
(
vlc_object_t
*
);
static
void
Deactivate
(
vlc_object_t
*
);
#if defined (MODULE_NAME_IS_i420_rgb)
static
void
SetGammaTable
(
int
*
pi_table
,
double
f_gamma
);
static
void
SetYUV
(
filter_t
*
);
static
void
Set8bppPalette
(
filter_t
*
,
uint8_t
*
);
#endif
/*****************************************************************************
* Module descriptor.
*****************************************************************************/
vlc_module_begin
()
#if defined (MODULE_NAME_IS_i420_rgb)
set_description
(
N_
(
"I420,IYUV,YV12 to "
"RGB2,RV15,RV16,RV24,RV32 conversions"
)
)
set_capability
(
"video filter2"
,
80
)
# define vlc_CPU_capable() (true)
#elif defined (MODULE_NAME_IS_i420_rgb_mmx)
set_description
(
N_
(
"MMX I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions"
)
)
set_capability
(
"video filter2"
,
100
)
# define vlc_CPU_capable() vlc_CPU_MMX()
#elif defined (MODULE_NAME_IS_i420_rgb_sse2)
#if defined (SSE2)
set_description
(
N_
(
"SSE2 I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions"
)
)
set_capability
(
"video filter2"
,
120
)
# define vlc_CPU_capable() vlc_CPU_SSE2()
#elif defined (MMX)
set_description
(
N_
(
"MMX I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions"
)
)
set_capability
(
"video filter2"
,
100
)
# define vlc_CPU_capable() vlc_CPU_MMX()
#else
set_description
(
N_
(
"I420,IYUV,YV12 to "
"RGB2,RV15,RV16,RV24,RV32 conversions"
)
)
set_capability
(
"video filter2"
,
80
)
# define vlc_CPU_capable() (true)
#endif
set_callbacks
(
Activate
,
Deactivate
)
vlc_module_end
()
...
...
@@ -107,7 +101,7 @@ vlc_module_end ()
static
int
Activate
(
vlc_object_t
*
p_this
)
{
filter_t
*
p_filter
=
(
filter_t
*
)
p_this
;
#if
defined (MODULE_NAME_IS_i420_rgb)
#if
def PLAIN
size_t
i_tables_size
;
#endif
...
...
@@ -125,14 +119,9 @@ static int Activate( vlc_object_t *p_this )
case
VLC_CODEC_I420
:
switch
(
p_filter
->
fmt_out
.
video
.
i_chroma
)
{
#if defined (MODULE_NAME_IS_i420_rgb)
case
VLC_CODEC_RGB8
:
p_filter
->
pf_video_filter
=
I420_RGB8_Filter
;
break
;
#endif
#ifndef PLAIN
case
VLC_CODEC_RGB15
:
case
VLC_CODEC_RGB16
:
#if ! defined (MODULE_NAME_IS_i420_rgb)
/* If we don't have support for the bitmasks, bail out */
if
(
(
p_filter
->
fmt_out
.
video
.
i_rmask
==
0x7c00
&&
p_filter
->
fmt_out
.
video
.
i_gmask
==
0x03e0
...
...
@@ -152,19 +141,8 @@ static int Activate( vlc_object_t *p_this )
}
else
return
VLC_EGENERIC
;
#else
// generic C chroma converter */
p_filter
->
pf_video_filter
=
I420_RGB16_Filter
;
#endif
break
;
#if 0
/* Hmmm, is there only X11 using 32bits per pixel for RV24 ? */
case VLC_CODEC_RGB24:
#endif
case
VLC_CODEC_RGB32
:
#if ! defined (MODULE_NAME_IS_i420_rgb)
/* If we don't have support for the bitmasks, bail out */
if
(
p_filter
->
fmt_out
.
video
.
i_rmask
==
0x00ff0000
&&
p_filter
->
fmt_out
.
video
.
i_gmask
==
0x0000ff00
...
...
@@ -200,12 +178,19 @@ static int Activate( vlc_object_t *p_this )
}
else
return
VLC_EGENERIC
;
break
;
#else
/* generic C chroma converter */
case
VLC_CODEC_RGB8
:
p_filter
->
pf_video_filter
=
I420_RGB8_Filter
;
break
;
case
VLC_CODEC_RGB15
:
case
VLC_CODEC_RGB16
:
p_filter
->
pf_video_filter
=
I420_RGB16_Filter
;
break
;
case
VLC_CODEC_RGB32
:
p_filter
->
pf_video_filter
=
I420_RGB32_Filter
;
#endif
break
;
#endif
default:
return
VLC_EGENERIC
;
}
...
...
@@ -223,22 +208,19 @@ static int Activate( vlc_object_t *p_this )
switch
(
p_filter
->
fmt_out
.
video
.
i_chroma
)
{
#if
defined (MODULE_NAME_IS_i420_rgb)
#if
def PLAIN
case
VLC_CODEC_RGB8
:
p_filter
->
p_sys
->
p_buffer
=
malloc
(
VOUT_MAX_WIDTH
);
break
;
#endif
case
VLC_CODEC_RGB15
:
case
VLC_CODEC_RGB16
:
p_filter
->
p_sys
->
p_buffer
=
malloc
(
VOUT_MAX_WIDTH
*
2
);
break
;
case
VLC_CODEC_RGB24
:
case
VLC_CODEC_RGB32
:
p_filter
->
p_sys
->
p_buffer
=
malloc
(
VOUT_MAX_WIDTH
*
4
);
break
;
default:
p_filter
->
p_sys
->
p_buffer
=
NULL
;
break
;
...
...
@@ -261,7 +243,7 @@ static int Activate( vlc_object_t *p_this )
return
VLC_EGENERIC
;
}
#if
defined (MODULE_NAME_IS_i420_rgb)
#if
def PLAIN
switch
(
p_filter
->
fmt_out
.
video
.
i_chroma
)
{
case
VLC_CODEC_RGB8
:
...
...
@@ -300,7 +282,7 @@ static void Deactivate( vlc_object_t *p_this )
{
filter_t
*
p_filter
=
(
filter_t
*
)
p_this
;
#if
defined (MODULE_NAME_IS_i420_rgb)
#if
def PLAIN
free
(
p_filter
->
p_sys
->
p_base
);
#endif
free
(
p_filter
->
p_sys
->
p_offset
);
...
...
@@ -308,21 +290,18 @@ static void Deactivate( vlc_object_t *p_this )
free
(
p_filter
->
p_sys
);
}
#if defined (MODULE_NAME_IS_i420_rgb)
VIDEO_FILTER_WRAPPER
(
I420_RGB8
)
VIDEO_FILTER_WRAPPER
(
I420_RGB16
)
//VIDEO_FILTER_WRAPPER( I420_RGB16_dither )
VIDEO_FILTER_WRAPPER
(
I420_RGB32
)
#else
#ifndef PLAIN
VIDEO_FILTER_WRAPPER
(
I420_R5G5B5
)
VIDEO_FILTER_WRAPPER
(
I420_R5G6B5
)
VIDEO_FILTER_WRAPPER
(
I420_A8R8G8B8
)
VIDEO_FILTER_WRAPPER
(
I420_R8G8B8A8
)
VIDEO_FILTER_WRAPPER
(
I420_B8G8R8A8
)
VIDEO_FILTER_WRAPPER
(
I420_A8B8G8R8
)
#endif
#else
VIDEO_FILTER_WRAPPER
(
I420_RGB8
)
VIDEO_FILTER_WRAPPER
(
I420_RGB16
)
VIDEO_FILTER_WRAPPER
(
I420_RGB32
)
#if defined (MODULE_NAME_IS_i420_rgb)
/*****************************************************************************
* SetGammaTable: return intensity table transformed by gamma curve.
*****************************************************************************
...
...
@@ -538,6 +517,4 @@ static void Set8bppPalette( filter_t *p_filter, uint8_t *p_rgb8 )
}
}
}
#endif
modules/video_chroma/i420_rgb.h
View file @
14ee5e98
...
...
@@ -21,6 +21,10 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#if !defined (SSE2) && !defined (MMX)
# define PLAIN
#endif
/** Number of entries in RGB palette/colormap */
#define CMAP_RGB2_SIZE 256
...
...
@@ -35,7 +39,7 @@ struct filter_sys_t
uint8_t
*
p_buffer
;
int
*
p_offset
;
#ifdef
MODULE_NAME_IS_i420_rgb
#ifdef
PLAIN
/**< Pre-calculated conversion tables */
void
*
p_base
;
/**< base for all conversion tables */
uint8_t
*
p_rgb8
;
/**< RGB 8 bits table */
...
...
@@ -55,12 +59,11 @@ struct filter_sys_t
/*****************************************************************************
* Prototypes
*****************************************************************************/
#ifdef
MODULE_NAME_IS_i420_rgb
#ifdef
PLAIN
void
I420_RGB8
(
filter_t
*
,
picture_t
*
,
picture_t
*
);
void
I420_RGB16_dither
(
filter_t
*
,
picture_t
*
,
picture_t
*
);
void
I420_RGB16
(
filter_t
*
,
picture_t
*
,
picture_t
*
);
void
I420_RGB32
(
filter_t
*
,
picture_t
*
,
picture_t
*
);
#else
// if defined(MODULE_NAME_IS_i420_rgb_mmx)
#else
void
I420_R5G5B5
(
filter_t
*
,
picture_t
*
,
picture_t
*
);
void
I420_R5G6B5
(
filter_t
*
,
picture_t
*
,
picture_t
*
);
void
I420_A8R8G8B8
(
filter_t
*
,
picture_t
*
,
picture_t
*
);
...
...
modules/video_chroma/i420_rgb16.c
View file @
14ee5e98
...
...
@@ -22,10 +22,6 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
/*****************************************************************************
* Preamble
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
...
...
@@ -35,19 +31,67 @@
#include <vlc_cpu.h>
#include "i420_rgb.h"
#if defined (MODULE_NAME_IS_i420_rgb)
# include "i420_rgb_c.h"
# define VLC_TARGET
#elif defined (MODULE_NAME_IS_i420_rgb_mmx)
# include "i420_rgb_mmx.h"
# define VLC_TARGET VLC_MMX
#elif defined (MODULE_NAME_IS_i420_rgb_sse2)
# include "i420_rgb_sse2.h"
# define VLC_TARGET VLC_SSE
#endif
#include "i420_rgb_c.h"
/*****************************************************************************
* SetOffset: build offset array for conversion functions
*****************************************************************************
* This function will build an offset array used in later conversion functions.
* It will also set horizontal and vertical scaling indicators.
*****************************************************************************/
static
void
SetOffset
(
int
i_width
,
int
i_height
,
int
i_pic_width
,
int
i_pic_height
,
bool
*
pb_hscale
,
unsigned
int
*
pi_vscale
,
int
*
p_offset
)
{
/*
* Prepare horizontal offset array
*/
if
(
i_pic_width
-
i_width
==
0
)
{
/* No horizontal scaling: YUV conversion is done directly to picture */
*
pb_hscale
=
0
;
}
else
if
(
i_pic_width
-
i_width
>
0
)
{
/* Prepare scaling array for horizontal extension */
int
i_scale_count
=
i_pic_width
;
*
pb_hscale
=
1
;
for
(
int
i_x
=
i_width
;
i_x
--
;
)
{
while
(
(
i_scale_count
-=
i_width
)
>
0
)
{
*
p_offset
++
=
0
;
}
*
p_offset
++
=
1
;
i_scale_count
+=
i_pic_width
;
}
}
else
/* if( i_pic_width - i_width < 0 ) */
{
/* Prepare scaling array for horizontal reduction */
int
i_scale_count
=
i_pic_width
;
*
pb_hscale
=
1
;
for
(
int
i_x
=
i_pic_width
;
i_x
--
;
)
{
*
p_offset
=
1
;
while
(
(
i_scale_count
-=
i_pic_width
)
>
0
)
{
*
p_offset
+=
1
;
}
p_offset
++
;
i_scale_count
+=
i_width
;
}
}
static
void
SetOffset
(
int
,
int
,
int
,
int
,
bool
*
,
unsigned
int
*
,
int
*
);
/*
* Set vertical scaling indicator
*/
if
(
i_pic_height
-
i_height
==
0
)
*
pi_vscale
=
0
;
else
if
(
i_pic_height
-
i_height
>
0
)
*
pi_vscale
=
1
;
else
/* if( i_pic_height - i_height < 0 ) */
*
pi_vscale
=
-
1
;
}
/*****************************************************************************
* I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
...
...
@@ -60,8 +104,6 @@ static void SetOffset( int, int, int, int, bool *,
* - output: 1 line
*****************************************************************************/
#if defined (MODULE_NAME_IS_i420_rgb)
void
I420_RGB16
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
...
...
@@ -154,13 +196,21 @@ void I420_RGB16( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
}
}
#else // ! defined (MODULE_NAME_IS_i420_rgb)
/*****************************************************************************
* I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
*****************************************************************************
* Horizontal alignment needed:
* - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
* - output: 1 pixel (2 bytes), margins allowed
* Vertical alignment needed:
* - input: 2 lines (2 Y lines, 1 U/V line)
* - output: 1 line
*****************************************************************************/
VLC_TARGET
void
I420_R5G5B5
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
void
I420_RGB32
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint
16_t
*
p_pic
=
(
uint16
_t
*
)
p_dest
->
p
->
p_pixels
;
uint
32_t
*
p_pic
=
(
uint32
_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
...
...
@@ -173,11 +223,15 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint16_t
*
p_pic_start
;
/* beginning of the current line for copy */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
int
i_uval
,
i_vval
;
/* U and V samples */
int
i_red
,
i_green
,
i_blue
;
/* U and V modified samples */
uint32_t
*
p_yuv
=
p_filter
->
p_sys
->
p_rgb32
;
uint32_t
*
p_ybase
;
/* Y dependant conversion table */
/* Conversion buffer pointer */
uint
16_t
*
p_buffer_start
=
(
uint16
_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint
16
_t
*
p_buffer
;
uint
32_t
*
p_buffer_start
=
(
uint32
_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint
32
_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
...
...
@@ -189,6 +243,7 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
...
...
@@ -199,138 +254,12 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
...
...
@@ -338,16 +267,10 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
}
/* Here we do some unaligned reads and duplicate conversions, but
...
...
@@ -358,20 +281,13 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
...
...
@@ -380,1328 +296,4 @@ void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
VLC_TARGET
void
I420_R5G6B5
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint16_t
*
p_pic
=
(
uint16_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint16_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint16_t
*
p_buffer_start
=
(
uint16_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint16_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
#endif
/*****************************************************************************
* I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
*****************************************************************************
* Horizontal alignment needed:
* - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
* - output: 1 pixel (2 bytes), margins allowed
* Vertical alignment needed:
* - input: 2 lines (2 Y lines, 1 U/V line)
* - output: 1 line
*****************************************************************************/
#if defined (MODULE_NAME_IS_i420_rgb)
void
I420_RGB32
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
int
i_uval
,
i_vval
;
/* U and V samples */
int
i_red
,
i_green
,
i_blue
;
/* U and V modified samples */
uint32_t
*
p_yuv
=
p_filter
->
p_sys
->
p_rgb32
;
uint32_t
*
p_ybase
;
/* Y dependant conversion table */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
CONVERT_YUV_PIXEL
(
4
);
CONVERT_Y_PIXEL
(
4
);
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
}
#else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
VLC_TARGET
void
I420_A8R8G8B8
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
VLC_TARGET
void
I420_R8G8B8A8
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else // defined (MODULE_NAME_IS_i420_rgb_mmx)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
VLC_TARGET
void
I420_B8G8R8A8
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
#else
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_BGRA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_BGRA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
VLC_TARGET
void
I420_A8B8G8R8
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#if defined (MODULE_NAME_IS_i420_rgb_sse2)
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
#else
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ABGR
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ABGR
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
#endif
/* Following functions are local */
/*****************************************************************************
* SetOffset: build offset array for conversion functions
*****************************************************************************
* This function will build an offset array used in later conversion functions.
* It will also set horizontal and vertical scaling indicators.
*****************************************************************************/
static
void
SetOffset
(
int
i_width
,
int
i_height
,
int
i_pic_width
,
int
i_pic_height
,
bool
*
pb_hscale
,
unsigned
int
*
pi_vscale
,
int
*
p_offset
)
{
int
i_x
;
/* x position in destination */
int
i_scale_count
;
/* modulo counter */
/*
* Prepare horizontal offset array
*/
if
(
i_pic_width
-
i_width
==
0
)
{
/* No horizontal scaling: YUV conversion is done directly to picture */
*
pb_hscale
=
0
;
}
else
if
(
i_pic_width
-
i_width
>
0
)
{
/* Prepare scaling array for horizontal extension */
*
pb_hscale
=
1
;
i_scale_count
=
i_pic_width
;
for
(
i_x
=
i_width
;
i_x
--
;
)
{
while
(
(
i_scale_count
-=
i_width
)
>
0
)
{
*
p_offset
++
=
0
;
}
*
p_offset
++
=
1
;
i_scale_count
+=
i_pic_width
;
}
}
else
/* if( i_pic_width - i_width < 0 ) */
{
/* Prepare scaling array for horizontal reduction */
*
pb_hscale
=
1
;
i_scale_count
=
i_width
;
for
(
i_x
=
i_pic_width
;
i_x
--
;
)
{
*
p_offset
=
1
;
while
(
(
i_scale_count
-=
i_pic_width
)
>
0
)
{
*
p_offset
+=
1
;
}
p_offset
++
;
i_scale_count
+=
i_width
;
}
}
/*
* Set vertical scaling indicator
*/
if
(
i_pic_height
-
i_height
==
0
)
{
*
pi_vscale
=
0
;
}
else
if
(
i_pic_height
-
i_height
>
0
)
{
*
pi_vscale
=
1
;
}
else
/* if( i_pic_height - i_height < 0 ) */
{
*
pi_vscale
=
-
1
;
}
}
modules/video_chroma/i420_rgb16_x86.c
0 → 100644
View file @
14ee5e98
/*****************************************************************************
* i420_rgb16_x86.c : YUV to bitmap RGB conversion module for vlc
*****************************************************************************
* Copyright (C) 2000 VLC authors and VideoLAN
* $Id$
*
* Authors: Samuel Hocevar <sam@zoy.org>
* Damien Fouilleul <damienf@videolan.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <vlc_common.h>
#include <vlc_filter.h>
#include <vlc_cpu.h>
#include "i420_rgb.h"
#ifdef SSE2
# include "i420_rgb_sse2.h"
# define VLC_TARGET VLC_SSE
#else
# include "i420_rgb_mmx.h"
# define VLC_TARGET VLC_MMX
#endif
/*****************************************************************************
* SetOffset: build offset array for conversion functions
*****************************************************************************
* This function will build an offset array used in later conversion functions.
* It will also set horizontal and vertical scaling indicators.
*****************************************************************************/
static
void
SetOffset
(
int
i_width
,
int
i_height
,
int
i_pic_width
,
int
i_pic_height
,
bool
*
pb_hscale
,
unsigned
int
*
pi_vscale
,
int
*
p_offset
)
{
/*
* Prepare horizontal offset array
*/
if
(
i_pic_width
-
i_width
==
0
)
{
/* No horizontal scaling: YUV conversion is done directly to picture */
*
pb_hscale
=
0
;
}
else
if
(
i_pic_width
-
i_width
>
0
)
{
/* Prepare scaling array for horizontal extension */
int
i_scale_count
=
i_pic_width
;
*
pb_hscale
=
1
;
for
(
int
i_x
=
i_width
;
i_x
--
;
)
{
while
(
(
i_scale_count
-=
i_width
)
>
0
)
{
*
p_offset
++
=
0
;
}
*
p_offset
++
=
1
;
i_scale_count
+=
i_pic_width
;
}
}
else
/* if( i_pic_width - i_width < 0 ) */
{
/* Prepare scaling array for horizontal reduction */
int
i_scale_count
=
i_pic_width
;
*
pb_hscale
=
1
;
for
(
int
i_x
=
i_pic_width
;
i_x
--
;
)
{
*
p_offset
=
1
;
while
(
(
i_scale_count
-=
i_pic_width
)
>
0
)
{
*
p_offset
+=
1
;
}
p_offset
++
;
i_scale_count
+=
i_width
;
}
}
/*
* Set vertical scaling indicator
*/
if
(
i_pic_height
-
i_height
==
0
)
*
pi_vscale
=
0
;
else
if
(
i_pic_height
-
i_height
>
0
)
*
pi_vscale
=
1
;
else
/* if( i_pic_height - i_height < 0 ) */
*
pi_vscale
=
-
1
;
}
VLC_TARGET
void
I420_R5G5B5
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint16_t
*
p_pic
=
(
uint16_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint16_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint16_t
*
p_buffer_start
=
(
uint16_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint16_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#ifdef SSE2
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_15_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else
/* SSE2 */
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_15
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
/* SSE2 */
}
VLC_TARGET
void
I420_R5G6B5
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint16_t
*
p_pic
=
(
uint16_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint16_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint16_t
*
p_buffer_start
=
(
uint16_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint16_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#ifdef SSE2
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_16_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_16_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_16_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else
/* SSE2 */
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_16
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_16
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
2
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
/* SSE2 */
}
VLC_TARGET
void
I420_A8R8G8B8
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#ifdef SSE2
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ARGB_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ARGB
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
VLC_TARGET
void
I420_R8G8B8A8
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#ifdef SSE2
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_RGBA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
/* make sure all SSE2 stores are visible thereafter */
SSE2_END
;
#else
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_RGBA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
VLC_TARGET
void
I420_B8G8R8A8
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#ifdef SSE2
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_BGRA_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
#else
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_BGRA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_BGRA
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
VLC_TARGET
void
I420_A8B8G8R8
(
filter_t
*
p_filter
,
picture_t
*
p_src
,
picture_t
*
p_dest
)
{
/* We got this one from the old arguments */
uint32_t
*
p_pic
=
(
uint32_t
*
)
p_dest
->
p
->
p_pixels
;
uint8_t
*
p_y
=
p_src
->
Y_PIXELS
;
uint8_t
*
p_u
=
p_src
->
U_PIXELS
;
uint8_t
*
p_v
=
p_src
->
V_PIXELS
;
bool
b_hscale
;
/* horizontal scaling type */
unsigned
int
i_vscale
;
/* vertical scaling type */
unsigned
int
i_x
,
i_y
;
/* horizontal and vertical indexes */
int
i_right_margin
;
int
i_rewind
;
int
i_scale_count
;
/* scale modulo counter */
int
i_chroma_width
=
p_filter
->
fmt_in
.
video
.
i_width
/
2
;
/* chroma width */
uint32_t
*
p_pic_start
;
/* beginning of the current line for copy */
/* Conversion buffer pointer */
uint32_t
*
p_buffer_start
=
(
uint32_t
*
)
p_filter
->
p_sys
->
p_buffer
;
uint32_t
*
p_buffer
;
/* Offset array pointer */
int
*
p_offset_start
=
p_filter
->
p_sys
->
p_offset
;
int
*
p_offset
;
const
int
i_source_margin
=
p_src
->
p
[
0
].
i_pitch
-
p_src
->
p
[
0
].
i_visible_pitch
;
const
int
i_source_margin_c
=
p_src
->
p
[
1
].
i_pitch
-
p_src
->
p
[
1
].
i_visible_pitch
;
i_right_margin
=
p_dest
->
p
->
i_pitch
-
p_dest
->
p
->
i_visible_pitch
;
/* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
* on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
* then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
SetOffset
(
p_filter
->
fmt_in
.
video
.
i_width
,
p_filter
->
fmt_in
.
video
.
i_height
,
p_filter
->
fmt_out
.
video
.
i_width
,
p_filter
->
fmt_out
.
video
.
i_height
,
&
b_hscale
,
&
i_vscale
,
p_offset_start
);
/*
* Perform conversion
*/
i_scale_count
=
(
i_vscale
==
1
)
?
p_filter
->
fmt_out
.
video
.
i_height
:
p_filter
->
fmt_in
.
video
.
i_height
;
#ifdef SSE2
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
15
;
/*
** SSE2 128 bits fetch/store instructions are faster
** if memory access is 16 bytes aligned
*/
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
if
(
0
==
(
15
&
(
p_src
->
p
[
Y_PLANE
].
i_pitch
|
p_dest
->
p
->
i_pitch
|
((
intptr_t
)
p_y
)
|
((
intptr_t
)
p_buffer
)))
)
{
/* use faster SSE2 aligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_ALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_ALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y
+=
16
;
p_u
+=
4
;
p_v
+=
4
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
else
{
/* use slower SSE2 unaligned fetch and store */
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
16
;
i_x
--
;
)
{
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
p_buffer
+=
16
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
SSE2_CALL
(
SSE2_INIT_32_UNALIGNED
SSE2_YUV_MUL
SSE2_YUV_ADD
SSE2_UNPACK_32_ABGR_UNALIGNED
);
p_y
+=
16
;
p_u
+=
8
;
p_v
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
}
}
#else
i_rewind
=
(
-
p_filter
->
fmt_in
.
video
.
i_width
)
&
7
;
for
(
i_y
=
0
;
i_y
<
p_filter
->
fmt_in
.
video
.
i_height
;
i_y
++
)
{
p_pic_start
=
p_pic
;
p_buffer
=
b_hscale
?
p_buffer_start
:
p_pic
;
for
(
i_x
=
p_filter
->
fmt_in
.
video
.
i_width
/
8
;
i_x
--
;
)
{
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ABGR
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
/* Here we do some unaligned reads and duplicate conversions, but
* at least we have all the pixels */
if
(
i_rewind
)
{
p_y
-=
i_rewind
;
p_u
-=
i_rewind
>>
1
;
p_v
-=
i_rewind
>>
1
;
p_buffer
-=
i_rewind
;
MMX_CALL
(
MMX_INIT_32
MMX_YUV_MUL
MMX_YUV_ADD
MMX_UNPACK_32_ABGR
);
p_y
+=
8
;
p_u
+=
4
;
p_v
+=
4
;
p_buffer
+=
8
;
}
SCALE_WIDTH
;
SCALE_HEIGHT
(
420
,
4
);
p_y
+=
i_source_margin
;
if
(
i_y
%
2
)
{
p_u
+=
i_source_margin_c
;
p_v
+=
i_source_margin_c
;
}
}
/* re-enable FPU registers */
MMX_END
;
#endif
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment