Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
a865ced4
Commit
a865ced4
authored
Oct 13, 2012
by
Rémi Denis-Courmont
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
sepia: clobber XMM registers correctly
parent
18d7c971
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
117 additions
and
119 deletions
+117
-119
modules/video_filter/sepia.c
modules/video_filter/sepia.c
+117
-119
No files found.
modules/video_filter/sepia.c
View file @
a865ced4
...
@@ -206,6 +206,7 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
...
@@ -206,6 +206,7 @@ static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
* instructions. It copies those 8 bytes to 128b register and fills the gaps
* instructions. It copies those 8 bytes to 128b register and fills the gaps
* with zeroes and following operations are made with word-operating instructs.
* with zeroes and following operations are made with word-operating instructs.
*****************************************************************************/
*****************************************************************************/
VLC_SSE
static
inline
void
Sepia8ySSE2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
static
inline
void
Sepia8ySSE2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
i_intensity_spread
)
int
i_intensity_spread
)
{
{
...
@@ -225,72 +226,51 @@ static inline void Sepia8ySSE2(uint8_t * dst, const uint8_t * src,
...
@@ -225,72 +226,51 @@ static inline void Sepia8ySSE2(uint8_t * dst, const uint8_t * src,
"movq %%xmm1, (%0)
\n
"
// load to dest
"movq %%xmm1, (%0)
\n
"
// load to dest
:
:
:
"r"
(
dst
),
"r"
(
src
),
"r"
(
i_intensity_spread
)
:
"r"
(
dst
),
"r"
(
src
),
"r"
(
i_intensity_spread
)
:
"memory"
);
:
"memory"
,
"xmm1"
,
"xmm2"
,
"xmm3"
);
}
}
#endif
/*****************************************************************************
VLC_SSE
* PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
static
void
PlanarI420SepiaSSE
(
picture_t
*
p_pic
,
picture_t
*
p_outpic
,
*****************************************************************************
* This function applies sepia effect to one frame of the video by iterating
* through video lines. We iterate for every two lines and for every two pixels
* in line to calculate new sepia values for four y components as well for u
* and v components.
*****************************************************************************/
static
void
PlanarI420Sepia
(
picture_t
*
p_pic
,
picture_t
*
p_outpic
,
int
i_intensity
)
int
i_intensity
)
{
{
/
/ prepared values to copy for U and V channels
/
* prepared values to copy for U and V channels */
const
uint8_t
filling_const_8u
=
128
-
i_intensity
/
6
;
const
uint8_t
filling_const_8u
=
128
-
i_intensity
/
6
;
const
uint8_t
filling_const_8v
=
128
+
i_intensity
/
14
;
const
uint8_t
filling_const_8v
=
128
+
i_intensity
/
14
;
#if defined(CAN_COMPILE_SSE2)
if
(
vlc_CPU_SSE2
())
{
/* prepared value for faster broadcasting in xmm register */
/* prepared value for faster broadcasting in xmm register */
int
i_intensity_spread
=
0x10001
*
(
uint8_t
)
i_intensity
;
int
i_intensity_spread
=
0x10001
*
(
uint8_t
)
i_intensity
;
__asm__
volatile
(
__asm__
volatile
(
"pxor %%xmm7, %%xmm7
\n
"
"pxor %%xmm7, %%xmm7
\n
"
::
);
::
:
"xmm7"
);
/* iterate for every two visible line in the frame */
/* iterate for every two visible line in the frame */
for
(
int
y
=
0
;
y
<
p_pic
->
p
[
Y_PLANE
].
i_visible_lines
-
1
;
y
+=
2
)
for
(
int
y
=
0
;
y
<
p_pic
->
p
[
Y_PLANE
].
i_visible_lines
-
1
;
y
+=
2
)
{
{
const
int
i_dy_line1_start
=
y
*
p_outpic
->
p
[
Y_PLANE
].
i_pitch
;
const
int
i_dy_line1_start
=
y
*
p_outpic
->
p
[
Y_PLANE
].
i_pitch
;
const
int
i_dy_line2_start
=
const
int
i_dy_line2_start
=
(
y
+
1
)
*
p_outpic
->
p
[
Y_PLANE
].
i_pitch
;
(
y
+
1
)
*
p_outpic
->
p
[
Y_PLANE
].
i_pitch
;
const
int
i_du_line_start
=
(
y
/
2
)
*
p_outpic
->
p
[
U_PLANE
].
i_pitch
;
const
int
i_du_line_start
=
const
int
i_dv_line_start
=
(
y
/
2
)
*
p_outpic
->
p
[
V_PLANE
].
i_pitch
;
(
y
/
2
)
*
p_outpic
->
p
[
U_PLANE
].
i_pitch
;
const
int
i_dv_line_start
=
(
y
/
2
)
*
p_outpic
->
p
[
V_PLANE
].
i_pitch
;
int
x
=
0
;
int
x
=
0
;
/* iterate for every visible line in the frame (eight values at once) */
/* iterate for every visible line in the frame (eight values at once) */
for
(
;
x
<
p_pic
->
p
[
Y_PLANE
].
i_visible_pitch
-
15
;
x
+=
16
)
for
(
;
x
<
p_pic
->
p
[
Y_PLANE
].
i_visible_pitch
-
15
;
x
+=
16
)
{
{
/* Compute yellow channel values with asm function */
/* Compute yellow channel values with asm function */
Sepia8ySSE2
(
Sepia8ySSE2
(
&
p_outpic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line1_start
+
x
],
&
p_outpic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line1_start
+
x
],
&
p_pic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line1_start
+
x
],
&
p_pic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line1_start
+
x
],
i_intensity_spread
);
i_intensity_spread
);
Sepia8ySSE2
(
Sepia8ySSE2
(
&
p_outpic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line2_start
+
x
],
&
p_outpic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line2_start
+
x
],
&
p_pic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line2_start
+
x
],
&
p_pic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line2_start
+
x
],
i_intensity_spread
);
i_intensity_spread
);
Sepia8ySSE2
(
Sepia8ySSE2
(
&
p_outpic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line1_start
+
x
+
8
],
&
p_outpic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line1_start
+
x
+
8
],
&
p_pic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line1_start
+
x
+
8
],
&
p_pic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line1_start
+
x
+
8
],
i_intensity_spread
);
i_intensity_spread
);
Sepia8ySSE2
(
Sepia8ySSE2
(
&
p_outpic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line2_start
+
x
+
8
],
&
p_outpic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line2_start
+
x
+
8
],
&
p_pic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line2_start
+
x
+
8
],
&
p_pic
->
p
[
Y_PLANE
].
p_pixels
[
i_dy_line2_start
+
x
+
8
],
i_intensity_spread
);
i_intensity_spread
);
/* Copy precomputed values to destination memory location */
/* Copy precomputed values to destination memory location */
memset
(
memset
(
&
p_outpic
->
p
[
U_PLANE
].
p_pixels
[
i_du_line_start
+
(
x
/
2
)],
&
p_outpic
->
p
[
U_PLANE
].
p_pixels
[
i_du_line_start
+
(
x
/
2
)],
filling_const_8u
,
8
);
filling_const_8u
,
8
);
memset
(
memset
(
&
p_outpic
->
p
[
V_PLANE
].
p_pixels
[
i_dv_line_start
+
(
x
/
2
)],
&
p_outpic
->
p
[
V_PLANE
].
p_pixels
[
i_dv_line_start
+
(
x
/
2
)],
filling_const_8v
,
8
);
filling_const_8v
,
8
);
}
}
/* Completing the job, the cycle above takes really big chunks, so
/* Completing the job, the cycle above takes really big chunks, so
...
@@ -322,10 +302,29 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
...
@@ -322,10 +302,29 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
filling_const_8v
;
filling_const_8v
;
}
}
}
}
}
}
else
#endif
#endif
{
/*****************************************************************************
* PlanarI420Sepia: Applies sepia to one frame of the planar I420 video
*****************************************************************************
* This function applies sepia effect to one frame of the video by iterating
* through video lines. We iterate for every two lines and for every two pixels
* in line to calculate new sepia values for four y components as well for u
* and v components.
*****************************************************************************/
static
void
PlanarI420Sepia
(
picture_t
*
p_pic
,
picture_t
*
p_outpic
,
int
i_intensity
)
{
#if defined(CAN_COMPILE_SSE2)
if
(
vlc_CPU_SSE2
())
return
PlanarI420SepiaSSE
(
p_pic
,
p_outpic
,
i_intensity
);
#endif
// prepared values to copy for U and V channels
const
uint8_t
filling_const_8u
=
128
-
i_intensity
/
6
;
const
uint8_t
filling_const_8v
=
128
+
i_intensity
/
14
;
/* iterate for every two visible line in the frame */
/* iterate for every two visible line in the frame */
for
(
int
y
=
0
;
y
<
p_pic
->
p
[
Y_PLANE
].
i_visible_lines
-
1
;
y
+=
2
)
for
(
int
y
=
0
;
y
<
p_pic
->
p
[
Y_PLANE
].
i_visible_lines
-
1
;
y
+=
2
)
{
{
...
@@ -366,7 +365,6 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
...
@@ -366,7 +365,6 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
filling_const_8v
;
filling_const_8v
;
}
}
}
}
}
}
}
/*****************************************************************************
/*****************************************************************************
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment