Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-gpu
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-gpu
Commits
0e770b17
Commit
0e770b17
authored
Sep 20, 2009
by
Rémi Denis-Courmont
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
i420->YUYV NEON: rewrite using VZIP
This is over twice faster. Thanks to Måns Rullgård for the hint.
parent
af4dd740
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
45 additions
and
48 deletions
+45
-48
modules/video_chroma/i420_yuyv_neon.S
modules/video_chroma/i420_yuyv_neon.S
+45
-48
No files found.
modules/video_chroma/i420_yuyv_neon.S
View file @
0e770b17
@***************************************************************************
*
@****************************************************************************
*
@
i420_yuyv_neon
.
S
:
ARM
NEONv1
I420
to
YUYV
chroma
conversion
@****************************************************************************
*
@
Copyright
(
C
)
2009
R
é
mi
Denis
-
Courmont
...
...
@@ -14,8 +14,8 @@
@
GNU
General
Public
License
for
more
details
.
@
@
You
should
have
received
a
copy
of
the
GNU
General
Public
License
@
along
with
this
program
; if not, write to the Free Software
@
Foundation
,
Inc
.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
MA
02110
-
1301
,
USA
.
@
along
with
this
program
; if not, write to the Free Software
Foundation,
@
Inc
.
,
51
Franklin
Street
,
Fifth
Floor
,
Boston
MA
02110
-
1301
,
USA
.
@****************************************************************************/
.
fpu
neon
...
...
@@ -32,41 +32,26 @@
#define END_O1 r12
.
align
.
global
i420_uyvy_neon
.
type
i420_uyvy_neon
,
%
function
i420_uyvy_neon
:
adr
r12
,
indexes
+
64
b
i420_pack_neon
.
global
i420_yuyv_neon
.
type
i420_yuyv_neon
,
%
function
i420_yuyv_neon
:
adr
r12
,
indexes
.
hidden
i420_pack_neon
i420_pack_neon
:
push
{
r4
-
r7
,
lr
}
vld1.u8
{
d24
-
d27
},
[
r12
]!
ldmia
r1
,
{
Y1
,
U
,
V
}
vld1.u8
{
d28
-
d31
},
[
r12
]
add
O2
,
O1
,
PITCH
,
lsl
#
1
add
Y2
,
Y1
,
PITCH
1
:
mov
END_O1
,
O2
2
:
vld1.u8
{
d0
-
d1
},
[
Y1
,:
128
]!
vld1.u8
{
d2
},
[
U
,:
64
]!
vld1.u8
{
d3
},
[
V
,:
64
]!
vld1.u8
{
d4
-
d5
},
[
Y2
,:
128
]!
vtbl.u8
d16
,
{
d0
-
d3
},
d24
vtbl.u8
d17
,
{
d0
-
d3
},
d25
vtbl.u8
d18
,
{
d0
-
d3
},
d26
vtbl.u8
d19
,
{
d0
-
d3
},
d27
vtbl.u8
d20
,
{
d2
-
d5
},
d28
vtbl.u8
d21
,
{
d2
-
d5
},
d29
vtbl.u8
d22
,
{
d2
-
d5
},
d30
vtbl.u8
d23
,
{
d2
-
d5
},
d31
vst1.u8
{
d16
-
d19
},
[
O1
,:
128
]!
vst1.u8
{
d20
-
d23
},
[
O2
,:
128
]!
vzip.u8
d2
,
d3
vld1.u8
{
q0
},
[
Y1
,:
128
]!
vmov
q3
,
q1
vzip.u8
q0
,
q1
vld1.u8
{
q2
},
[
Y2
,:
128
]!
vzip.u8
q2
,
q3
vst1.u8
{
q0
-
q1
},
[
O1
,:
128
]!
vst1.u8
{
q2
-
q3
},
[
O2
,:
128
]!
cmp
O1
,
END_O1
bne
2
b
...
...
@@ -82,25 +67,37 @@ i420_pack_neon:
pop
{
r4
-
r7
,
pc
}
.
hidden
indexes
indexes
:
@
YUYV1
.
byte
0x00
,
0x10
,
0x01
,
0x18
,
0x02
,
0x11
,
0x03
,
0x19
.
byte
0x04
,
0x12
,
0x05
,
0x1A
,
0x06
,
0x13
,
0x07
,
0x1B
.
byte
0x08
,
0x14
,
0x09
,
0x1C
,
0x0A
,
0x15
,
0x0B
,
0x1D
.
byte
0x0C
,
0x16
,
0x0D
,
0x1E
,
0x0E
,
0x17
,
0x0F
,
0x1F
@
YUYV2
.
byte
0x10
,
0x00
,
0x11
,
0x08
,
0x12
,
0x01
,
0x13
,
0x09
.
byte
0x14
,
0x02
,
0x15
,
0x0A
,
0x16
,
0x03
,
0x17
,
0x0B
.
byte
0x18
,
0x04
,
0x19
,
0x0C
,
0x1A
,
0x05
,
0x1B
,
0x0D
.
byte
0x1C
,
0x06
,
0x1D
,
0x0E
,
0x1E
,
0x07
,
0x1F
,
0x0F
@
UYVY1
.
byte
0x10
,
0x00
,
0x18
,
0x01
,
0x11
,
0x02
,
0x19
,
0x03
.
byte
0x12
,
0x04
,
0x1A
,
0x05
,
0x13
,
0x06
,
0x1B
,
0x07
.
byte
0x14
,
0x08
,
0x1C
,
0x09
,
0x15
,
0x0A
,
0x1D
,
0x0B
.
byte
0x16
,
0x0C
,
0x1E
,
0x0D
,
0x17
,
0x0E
,
0x1F
,
0x0F
@
UYVY2
.
byte
0x00
,
0x10
,
0x08
,
0x11
,
0x01
,
0x12
,
0x09
,
0x13
.
byte
0x02
,
0x14
,
0x0A
,
0x15
,
0x03
,
0x16
,
0x0B
,
0x17
.
byte
0x04
,
0x18
,
0x0C
,
0x19
,
0x05
,
0x1A
,
0x0D
,
0x1B
.
byte
0x06
,
0x1C
,
0x0E
,
0x1D
,
0x07
,
0x1E
,
0x0F
,
0x1F
.
global
i420_uyvy_neon
.
type
i420_uyvy_neon
,
%
function
i420_uyvy_neon
:
push
{
r4
-
r7
,
lr
}
ldmia
r1
,
{
Y1
,
U
,
V
}
add
O2
,
O1
,
PITCH
,
lsl
#
1
add
Y2
,
Y1
,
PITCH
1
:
mov
END_O1
,
O2
2
:
vld1.u8
{
d0
},
[
U
,:
64
]!
vld1.u8
{
d1
},
[
V
,:
64
]!
vzip.u8
d0
,
d1
vld1.u8
{
q1
},
[
Y1
,:
128
]!
vmov
q2
,
q0
vzip.u8
q0
,
q1
vld1.u8
{
q3
},
[
Y2
,:
128
]!
vzip.u8
q2
,
q3
vst1.u8
{
q0
-
q1
},
[
O1
,:
128
]!
vst1.u8
{
q2
-
q3
},
[
O2
,:
128
]!
cmp
O1
,
END_O1
bne
2
b
sub
HEIGHT
,
#
2
mov
O1
,
O2
add
O2
,
PITCH
,
lsl
#
1
mov
Y1
,
Y2
add
Y2
,
PITCH
cmp
HEIGHT
,
#
0
bne
1
b
pop
{
r4
-
r7
,
pc
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment