Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
3d64b908
Commit
3d64b908
authored
Aug 04, 2012
by
Rémi Denis-Courmont
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
deinterlace: ARM optimizations for 16-bits merge
parent
95eb7971
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
71 additions
and
4 deletions
+71
-4
modules/video_filter/deinterlace/deinterlace.c
modules/video_filter/deinterlace/deinterlace.c
+6
-4
modules/video_filter/deinterlace/merge.h
modules/video_filter/deinterlace/merge.h
+2
-0
modules/video_filter/deinterlace/merge_arm.S
modules/video_filter/deinterlace/merge_arm.S
+63
-0
No files found.
modules/video_filter/deinterlace/deinterlace.c
View file @
3d64b908
...
...
@@ -657,11 +657,13 @@ int Open( vlc_object_t *p_this )
else
#endif
#if defined(__arm__)
if
(
chroma
->
pixel_size
==
1
&&
vlc_CPU_ARM_NEON
()
)
p_sys
->
pf_merge
=
merge8_arm_neon
;
if
(
vlc_CPU_ARM_NEON
()
)
p_sys
->
pf_merge
=
(
chroma
->
pixel_size
==
1
)
?
merge8_arm_neon
:
merge16_arm_neon
;
else
if
(
chroma
->
pixel_size
==
1
&&
vlc_CPU_ARMv6
()
)
p_sys
->
pf_merge
=
merge8_armv6
;
if
(
vlc_CPU_ARMv6
()
)
p_sys
->
pf_merge
=
(
chroma
->
pixel_size
==
1
)
?
merge8_armv6
:
merge16_armv6
;
else
#endif
{
...
...
modules/video_filter/deinterlace/merge.h
View file @
3d64b908
...
...
@@ -163,11 +163,13 @@ void Merge16BitSSE2( void *, const void *, const void *, size_t );
* ARM NEON routine to blend pixels from two picture lines.
*/
void
merge8_arm_neon
(
void
*
,
const
void
*
,
const
void
*
,
size_t
);
void
merge16_arm_neon
(
void
*
,
const
void
*
,
const
void
*
,
size_t
);
/**
* ARMv6 SIMD routine to blend pixels from two picture lines.
*/
void
merge8_armv6
(
void
*
,
const
void
*
,
const
void
*
,
size_t
);
void
merge16_armv6
(
void
*
,
const
void
*
,
const
void
*
,
size_t
);
#endif
/*****************************************************************************
...
...
modules/video_filter/deinterlace/merge_arm.S
View file @
3d64b908
...
...
@@ -71,6 +71,47 @@ merge8_arm_neon:
vst1.u8
{
q0
},
[
DEST
,:
128
]!
bx
lr
.
align
2
.
global
merge16_arm_neon
.
type
merge16_arm_neon
,
%
function
merge16_arm_neon
:
cmp
SIZE
,
#
64
blo
2
f
1
:
pld
[
SRC1
,
#
64
]
vld1.u16
{
q0
-
q1
},
[
SRC1
,:
128
]!
pld
[
SRC2
,
#
64
]
vld1.u16
{
q8
-
q9
},
[
SRC2
,:
128
]!
vhadd.u16
q0
,
q0
,
q8
sub
SIZE
,
SIZE
,
#
64
vld1.u16
{
q2
-
q3
},
[
SRC1
,:
128
]!
vhadd.u16
q1
,
q1
,
q9
vld1.u16
{
q10
-
q11
},
[
SRC2
,:
128
]!
vhadd.u16
q2
,
q2
,
q10
cmp
SIZE
,
#
64
vhadd.u16
q3
,
q3
,
q11
vst1.u16
{
q0
-
q1
},
[
DEST
,:
128
]!
vst1.u16
{
q2
-
q3
},
[
DEST
,:
128
]!
bhs
1
b
2
:
cmp
SIZE
,
#
32
blo
3
f
vld1.u16
{
q0
-
q1
},
[
SRC1
,:
128
]!
sub
SIZE
,
SIZE
,
#
32
vld1.u16
{
q8
-
q9
},
[
SRC2
,:
128
]!
vhadd.u16
q0
,
q0
,
q8
vhadd.u16
q1
,
q1
,
q9
vst1.u16
{
q0
-
q1
},
[
DEST
,:
128
]!
3
:
cmp
SIZE
,
#
16
bxlo
lr
vld1.u16
{
q0
},
[
SRC1
,:
128
]!
sub
SIZE
,
SIZE
,
#
16
vld1.u16
{
q8
},
[
SRC2
,:
128
]!
vhadd.u16
q0
,
q0
,
q8
vst1.u16
{
q0
},
[
DEST
,:
128
]!
bx
lr
.
align
2
.
global
merge8_armv6
.
type
merge8_armv6
,
%
function
...
...
@@ -92,3 +133,25 @@ merge8_armv6:
stm
DEST
!,
{
r6
-
r7
}
popeq
{
r4
-
r9
,
pc
}
b
1
b
.
align
2
.
global
merge16_armv6
.
type
merge16_armv6
,
%
function
merge16_armv6
:
push
{
r4
-
r9
,
lr
}
1
:
pld
[
SRC1
,
#
64
]
ldm
SRC1
!,
{
r4
-
r5
}
pld
[
SRC2
,
#
64
]
ldm
SRC2
!,
{
r8
-
r9
}
subs
SIZE
,
SIZE
,
#
16
uhadd16
r4
,
r4
,
r8
ldm
SRC1
!,
{
r6
-
r7
}
uhadd16
r5
,
r5
,
r9
ldm
SRC2
!,
{
ip
,
lr
}
uhadd16
r6
,
r6
,
ip
stm
DEST
!,
{
r4
-
r5
}
uhadd16
r7
,
r7
,
lr
stm
DEST
!,
{
r6
-
r7
}
popeq
{
r4
-
r9
,
pc
}
b
1
b
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment