Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-2-2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-2-2
Commits
86ca8255
Commit
86ca8255
authored
Dec 17, 2012
by
Rémi Denis-Courmont
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
deinterlace: move Phosphor MMX acceleration to a separate function
parent
4c983898
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
124 additions
and
67 deletions
+124
-67
modules/video_filter/deinterlace/algo_phosphor.c
modules/video_filter/deinterlace/algo_phosphor.c
+124
-67
No files found.
modules/video_filter/deinterlace/algo_phosphor.c
View file @
86ca8255
...
@@ -78,11 +78,6 @@ static void DarkenField( picture_t *p_dst,
...
@@ -78,11 +78,6 @@ static void DarkenField( picture_t *p_dst,
/* Bitwise ANDing with this clears the i_strength highest bits
/* Bitwise ANDing with this clears the i_strength highest bits
of each byte */
of each byte */
#ifdef CAN_COMPILE_MMXEXT
const
bool
mmxext
=
vlc_CPU_MMXEXT
();
uint64_t
i_strength_u64
=
i_strength
;
/* for MMX version (needs to know
number of bits) */
#endif
const
uint8_t
remove_high_u8
=
0xFF
>>
i_strength
;
const
uint8_t
remove_high_u8
=
0xFF
>>
i_strength
;
const
uint64_t
remove_high_u64
=
remove_high_u8
*
const
uint64_t
remove_high_u64
=
remove_high_u8
*
INT64_C
(
0x0101010101010101
);
INT64_C
(
0x0101010101010101
);
...
@@ -92,7 +87,7 @@ static void DarkenField( picture_t *p_dst,
...
@@ -92,7 +87,7 @@ static void DarkenField( picture_t *p_dst,
For luma, the operation is just a shift + bitwise AND, so we vectorize
For luma, the operation is just a shift + bitwise AND, so we vectorize
even in the C version.
even in the C version.
There is an MMX version
,
too, because it performs about twice faster.
There is an MMX version too, because it performs about twice faster.
*/
*/
int
i_plane
=
Y_PLANE
;
int
i_plane
=
Y_PLANE
;
uint8_t
*
p_out
,
*
p_out_end
;
uint8_t
*
p_out
,
*
p_out_end
;
...
@@ -112,9 +107,80 @@ static void DarkenField( picture_t *p_dst,
...
@@ -112,9 +107,80 @@ static void DarkenField( picture_t *p_dst,
uint64_t
*
po
=
(
uint64_t
*
)
p_out
;
uint64_t
*
po
=
(
uint64_t
*
)
p_out
;
int
x
=
0
;
int
x
=
0
;
for
(
;
x
<
w8
;
x
+=
8
,
++
po
)
(
*
po
)
=
(
((
*
po
)
>>
i_strength
)
&
remove_high_u64
);
/* handle the width remainder */
uint8_t
*
po_temp
=
(
uint8_t
*
)
po
;
for
(
;
x
<
w
;
++
x
,
++
po_temp
)
(
*
po_temp
)
=
(
((
*
po_temp
)
>>
i_strength
)
&
remove_high_u8
);
}
/* Process chroma if the field chromas are independent.
The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
The chroma processing is a bit more complicated than luma,
and needs MMX for vectorization.
*/
if
(
process_chroma
)
{
for
(
i_plane
++
/* luma already handled*/
;
i_plane
<
p_dst
->
i_planes
;
i_plane
++
)
{
int
w
=
p_dst
->
p
[
i_plane
].
i_visible_pitch
;
p_out
=
p_dst
->
p
[
i_plane
].
p_pixels
;
p_out_end
=
p_out
+
p_dst
->
p
[
i_plane
].
i_pitch
*
p_dst
->
p
[
i_plane
].
i_visible_lines
;
/* skip first line for bottom field */
if
(
i_field
==
1
)
p_out
+=
p_dst
->
p
[
i_plane
].
i_pitch
;
for
(
;
p_out
<
p_out_end
;
p_out
+=
2
*
p_dst
->
p
[
i_plane
].
i_pitch
)
{
/* Handle the width remainder */
uint8_t
*
po
=
p_out
;
for
(
int
x
=
0
;
x
<
w
;
++
x
,
++
po
)
(
*
po
)
=
128
+
(
((
*
po
)
-
128
)
/
(
1
<<
i_strength
)
);
}
/* for p_out... */
}
/* for i_plane... */
}
/* if process_chroma */
}
#ifdef CAN_COMPILE_MMXEXT
#ifdef CAN_COMPILE_MMXEXT
if
(
mmxext
)
VLC_MMX
static
void
DarkenFieldMMX
(
picture_t
*
p_dst
,
const
int
i_field
,
const
int
i_strength
,
bool
process_chroma
)
{
assert
(
p_dst
!=
NULL
);
assert
(
i_field
==
0
||
i_field
==
1
);
assert
(
i_strength
>=
1
&&
i_strength
<=
3
);
uint64_t
i_strength_u64
=
i_strength
;
/* needs to know number of bits */
const
uint8_t
remove_high_u8
=
0xFF
>>
i_strength
;
const
uint64_t
remove_high_u64
=
remove_high_u8
*
INT64_C
(
0x0101010101010101
);
int
i_plane
=
Y_PLANE
;
uint8_t
*
p_out
,
*
p_out_end
;
int
w
=
p_dst
->
p
[
i_plane
].
i_visible_pitch
;
p_out
=
p_dst
->
p
[
i_plane
].
p_pixels
;
p_out_end
=
p_out
+
p_dst
->
p
[
i_plane
].
i_pitch
*
p_dst
->
p
[
i_plane
].
i_visible_lines
;
/* skip first line for bottom field */
if
(
i_field
==
1
)
p_out
+=
p_dst
->
p
[
i_plane
].
i_pitch
;
int
wm8
=
w
%
8
;
/* remainder */
int
w8
=
w
-
wm8
;
/* part of width that is divisible by 8 */
for
(
;
p_out
<
p_out_end
;
p_out
+=
2
*
p_dst
->
p
[
i_plane
].
i_pitch
)
{
{
uint64_t
*
po
=
(
uint64_t
*
)
p_out
;
int
x
=
0
;
movq_m2r
(
i_strength_u64
,
mm1
);
movq_m2r
(
i_strength_u64
,
mm1
);
movq_m2r
(
remove_high_u64
,
mm2
);
movq_m2r
(
remove_high_u64
,
mm2
);
for
(
;
x
<
w8
;
x
+=
8
)
for
(
;
x
<
w8
;
x
+=
8
)
...
@@ -126,13 +192,6 @@ static void DarkenField( picture_t *p_dst,
...
@@ -126,13 +192,6 @@ static void DarkenField( picture_t *p_dst,
movq_r2m
(
mm0
,
(
*
po
++
)
);
movq_r2m
(
mm0
,
(
*
po
++
)
);
}
}
}
else
#endif
{
for
(
;
x
<
w8
;
x
+=
8
,
++
po
)
(
*
po
)
=
(
((
*
po
)
>>
i_strength
)
&
remove_high_u64
);
}
/* handle the width remainder */
/* handle the width remainder */
uint8_t
*
po_temp
=
(
uint8_t
*
)
po
;
uint8_t
*
po_temp
=
(
uint8_t
*
)
po
;
...
@@ -148,16 +207,14 @@ static void DarkenField( picture_t *p_dst,
...
@@ -148,16 +207,14 @@ static void DarkenField( picture_t *p_dst,
*/
*/
if
(
process_chroma
)
if
(
process_chroma
)
{
{
for
(
i_plane
=
0
;
i_plane
<
p_dst
->
i_planes
;
i_plane
++
)
for
(
i_plane
++
/* luma already handled */
;
i_plane
<
p_dst
->
i_planes
;
i_plane
++
)
{
{
if
(
i_plane
==
Y_PLANE
)
continue
;
/* luma already handled */
int
w
=
p_dst
->
p
[
i_plane
].
i_visible_pitch
;
int
w
=
p_dst
->
p
[
i_plane
].
i_visible_pitch
;
#ifdef CAN_COMPILE_MMXEXT
int
wm8
=
w
%
8
;
/* remainder */
int
wm8
=
w
%
8
;
/* remainder */
int
w8
=
w
-
wm8
;
/* part of width that is divisible by 8 */
int
w8
=
w
-
wm8
;
/* part of width that is divisible by 8 */
#endif
p_out
=
p_dst
->
p
[
i_plane
].
p_pixels
;
p_out
=
p_dst
->
p
[
i_plane
].
p_pixels
;
p_out_end
=
p_out
+
p_dst
->
p
[
i_plane
].
i_pitch
p_out_end
=
p_out
+
p_dst
->
p
[
i_plane
].
i_pitch
*
p_dst
->
p
[
i_plane
].
i_visible_lines
;
*
p_dst
->
p
[
i_plane
].
i_visible_lines
;
...
@@ -170,19 +227,16 @@ static void DarkenField( picture_t *p_dst,
...
@@ -170,19 +227,16 @@ static void DarkenField( picture_t *p_dst,
{
{
int
x
=
0
;
int
x
=
0
;
#ifdef CAN_COMPILE_MMXEXT
/* See also easy-to-read C version below. */
/* See also easy-to-read C version below. */
if
(
mmxext
)
{
static
const
mmx_t
b128
=
{
.
uq
=
0x8080808080808080ULL
};
static
const
mmx_t
b128
=
{
.
uq
=
0x8080808080808080ULL
};
movq_m2r
(
b128
,
mm5
);
movq_m2r
(
b128
,
mm5
);
movq_m2r
(
i_strength_u64
,
mm6
);
movq_m2r
(
i_strength_u64
,
mm6
);
movq_m2r
(
remove_high_u64
,
mm7
);
movq_m2r
(
remove_high_u64
,
mm7
);
uint64_t
*
po
=
(
uint64_t
*
)
p_out
;
uint64_t
*
po8
=
(
uint64_t
*
)
p_out
;
for
(
;
x
<
w8
;
x
+=
8
)
for
(
;
x
<
w8
;
x
+=
8
)
{
{
movq_m2r
(
(
*
po
),
mm0
);
movq_m2r
(
(
*
po8
),
mm0
);
movq_r2r
(
mm5
,
mm2
);
/* 128 */
movq_r2r
(
mm5
,
mm2
);
/* 128 */
movq_r2r
(
mm0
,
mm1
);
/* copy of data */
movq_r2r
(
mm0
,
mm1
);
/* copy of data */
...
@@ -199,13 +253,10 @@ static void DarkenField( picture_t *p_dst,
...
@@ -199,13 +253,10 @@ static void DarkenField( picture_t *p_dst,
psubb_r2r
(
mm2
,
mm1
);
psubb_r2r
(
mm2
,
mm1
);
paddb_r2r
(
mm5
,
mm1
);
paddb_r2r
(
mm5
,
mm1
);
movq_r2m
(
mm1
,
(
*
po
++
)
);
movq_r2m
(
mm1
,
(
*
po8
++
)
);
}
}
}
#endif
/* C version - handle the width remainder
/* C version - handle the width remainder */
(or everything if no MMX) */
uint8_t
*
po
=
p_out
;
uint8_t
*
po
=
p_out
;
for
(
;
x
<
w
;
++
x
,
++
po
)
for
(
;
x
<
w
;
++
x
,
++
po
)
(
*
po
)
=
128
+
(
((
*
po
)
-
128
)
/
(
1
<<
i_strength
)
);
(
*
po
)
=
128
+
(
((
*
po
)
-
128
)
/
(
1
<<
i_strength
)
);
...
@@ -213,11 +264,9 @@ static void DarkenField( picture_t *p_dst,
...
@@ -213,11 +264,9 @@ static void DarkenField( picture_t *p_dst,
}
/* for i_plane... */
}
/* for i_plane... */
}
/* if process_chroma */
}
/* if process_chroma */
#ifdef CAN_COMPILE_MMXEXT
if
(
mmxext
)
emms
();
emms
();
#endif
}
}
#endif
/*****************************************************************************
/*****************************************************************************
* Public functions
* Public functions
...
@@ -303,9 +352,17 @@ int RenderPhosphor( filter_t *p_filter,
...
@@ -303,9 +352,17 @@ int RenderPhosphor( filter_t *p_filter,
In most use cases the dimmer is used.
In most use cases the dimmer is used.
*/
*/
if
(
p_sys
->
phosphor
.
i_dimmer_strength
>
0
)
if
(
p_sys
->
phosphor
.
i_dimmer_strength
>
0
)
{
#ifdef CAN_COMPILE_MMXEXT
if
(
vlc_CPU_MMXEXT
()
)
DarkenFieldMMX
(
p_dst
,
!
i_field
,
p_sys
->
phosphor
.
i_dimmer_strength
,
p_sys
->
chroma
->
p
[
1
].
h
.
num
==
p_sys
->
chroma
->
p
[
1
].
h
.
den
&&
p_sys
->
chroma
->
p
[
2
].
h
.
num
==
p_sys
->
chroma
->
p
[
2
].
h
.
den
);
else
#endif
DarkenField
(
p_dst
,
!
i_field
,
p_sys
->
phosphor
.
i_dimmer_strength
,
DarkenField
(
p_dst
,
!
i_field
,
p_sys
->
phosphor
.
i_dimmer_strength
,
p_sys
->
chroma
->
p
[
1
].
h
.
num
==
p_sys
->
chroma
->
p
[
1
].
h
.
den
&&
p_sys
->
chroma
->
p
[
1
].
h
.
num
==
p_sys
->
chroma
->
p
[
1
].
h
.
den
&&
p_sys
->
chroma
->
p
[
2
].
h
.
num
==
p_sys
->
chroma
->
p
[
2
].
h
.
den
);
p_sys
->
chroma
->
p
[
2
].
h
.
num
==
p_sys
->
chroma
->
p
[
2
].
h
.
den
);
}
return
VLC_SUCCESS
;
return
VLC_SUCCESS
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment