Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-2-2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-2-2
Commits
afff7f0a
Commit
afff7f0a
authored
Mar 12, 2013
by
Sébastien Toque
Committed by
Jean-Baptiste Kempf
Mar 18, 2013
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
i420->rv32 neon: improve scheduling & registers usage
Signed-off-by:
Jean-Baptiste Kempf
<
jb@videolan.org
>
parent
7ad605f9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
58 additions
and
54 deletions
+58
-54
modules/arm_neon/i420_rgb.S
modules/arm_neon/i420_rgb.S
+58
-54
No files found.
modules/arm_neon/i420_rgb.S
View file @
afff7f0a
...
...
@@ -50,16 +50,20 @@
#define u D24
#define v D25
#define y1 D
2
8
#define y2 D
2
9
#define y1 D
1
8
#define y2 D
1
9
#define chro_r Q6
#define chro_g Q7
#define chro_b Q8
#define red Q9
#define green Q10
#define blue Q11
#define lumi Q15
#define lumi1 Q15
#define lumi2 Q10
#define red16_1 Q9
#define green16_1 Q10
#define blue16_1 Q11
#define red16_2 Q12
#define green16_2 Q13
#define blue16_2 Q14
#define red1 D24
#define green1 D25
...
...
@@ -123,69 +127,69 @@ loop_col:
vld1.u8
{
u
},
[
U
,:
64
]!
vld1.u8
{
v
},
[
V
,:
64
]!
vmull.u8
chro_r
,
v
,
coefRV
vmull.u8
chro_g
,
u
,
coefGU
vmlal.u8
chro_g
,
v
,
coefGV
vmull.u8
chro_b
,
u
,
coefBU
/
*
Y
Top
Row
*/
vld2.u8
{
y1
,
y2
},
[
Y1
,:
128
]!
vadd.s16
chro_r
,
Rc
,
chro_r
vsub.s16
chro_g
,
Gc
,
chro_g
vadd.s16
chro_b
,
Bc
,
chro_b
vmull.u8
Q14
,
v
,
coefRV
vmull.u8
Q11
,
u
,
coefGU
vmull.u8
Q13
,
u
,
coefBU
vmlal.u8
Q11
,
v
,
coefGV
vmull.u8
lumi2
,
y2
,
coefY
vmull.u8
lumi1
,
y1
,
coefY
vadd.s16
chro_r
,
Rc
,
Q14
vadd.s16
chro_b
,
Bc
,
Q13
vsub.s16
chro_g
,
Gc
,
Q11
pld
[
U
]
pld
[
V
]
/
*
Y
Top
Row
*/
vld2.u8
{
y1
,
y2
},
[
Y1
,:
128
]!
/
*
y1
:
chrominance
+
luminance
,
then
clamp
(
divide
by
64
)
*/
vmull.u8
lumi
,
y1
,
coefY
vqadd.s16
red
,
lumi
,
chro_r
vqadd.s16
green
,
lumi
,
chro_g
vqadd.s16
blue
,
lumi
,
chro_b
vqrshrun.s16
red1
,
red
,
#
6
vqrshrun.s16
green1
,
green
,
#
6
vqrshrun.s16
blue1
,
blue
,
#
6
/
*
y2
:
chrominance
+
luminance
,
then
clamp
(
divide
by
64
)
*/
vmull.u8
lumi
,
y2
,
coefY
vqadd.s16
red
,
lumi
,
chro_r
vqadd.s16
green
,
lumi
,
chro_g
vqadd.s16
blue
,
lumi
,
chro_b
vqrshrun.s16
red2
,
red
,
#
6
vqrshrun.s16
green2
,
green
,
#
6
vqrshrun.s16
blue2
,
blue
,
#
6
/
*
chrominance
+
luminance
*/
vqadd.s16
red16_2
,
lumi2
,
chro_r
vqadd.s16
blue16_2
,
lumi2
,
chro_b
vqadd.s16
green16_2
,
lumi2
,
chro_g
vqadd.s16
red16_1
,
lumi1
,
chro_r
vqadd.s16
green16_1
,
lumi1
,
chro_g
vqadd.s16
blue16_1
,
lumi1
,
chro_b
/
*
clamp
(
divide
by
64
)
*/
vqrshrun.s16
blue2
,
blue16_2
,
#
6
vqrshrun.s16
red2
,
red16_2
,
#
6
vqrshrun.s16
green2
,
green16_2
,
#
6
vqrshrun.s16
red1
,
red16_1
,
#
6
vqrshrun.s16
green1
,
green16_1
,
#
6
vqrshrun.s16
blue1
,
blue16_1
,
#
6
pld
[
Y1
]
vmov.u8
alpha2
,
#
255
/
*
Y
Bottom
Row
*/
vld2.u8
{
y1
,
y2
},
[
Y2
,:
128
]!
vmov.u8
alpha1
,
#
255
vzip.u8
red1
,
red2
vzip.u8
green1
,
green2
vzip.u8
blue1
,
blue2
vmull.u8
lumi2
,
y2
,
coefY
vst4.u8
{
red1
,
green1
,
blue1
,
alpha1
},
[
O1
,:
128
]!
vst4.u8
{
red2
,
green2
,
blue2
,
alpha2
},
[
O1
,:
128
]!
/
*
Y
Bottom
Row
*/
vld2.u8
{
y1
,
y2
},
[
Y2
,:
128
]!
/
*
y1
:
chrominance
+
luminance
,
then
clamp
(
divide
by
64
)
*/
vmull.u8
lumi
,
y1
,
coefY
vqadd.s16
red
,
lumi
,
chro_r
vqadd.s16
green
,
lumi
,
chro_g
vqadd.s16
blue
,
lumi
,
chro_b
vqrshrun.s16
red1
,
red
,
#
6
vqrshrun.s16
green1
,
green
,
#
6
vqrshrun.s16
blue1
,
blue
,
#
6
/
*
y2
:
chrominance
+
luminance
,
then
clamp
(
divide
by
64
)
*/
vmull.u8
lumi
,
y2
,
coefY
vqadd.s16
red
,
lumi
,
chro_r
vqadd.s16
green
,
lumi
,
chro_g
vqadd.s16
blue
,
lumi
,
chro_b
vqrshrun.s16
red2
,
red
,
#
6
vqrshrun.s16
green2
,
green
,
#
6
vqrshrun.s16
blue2
,
blue
,
#
6
/
*
chrominance
+
luminance
*/
vmull.u8
lumi1
,
y1
,
coefY
vqadd.s16
red16_2
,
lumi2
,
chro_r
vqadd.s16
green16_2
,
lumi2
,
chro_g
vqadd.s16
blue16_2
,
lumi2
,
chro_b
vqadd.s16
red16_1
,
lumi1
,
chro_r
vqadd.s16
green16_1
,
lumi1
,
chro_g
vqadd.s16
blue16_1
,
lumi1
,
chro_b
/
*
clamp
(
divide
by
64
)
*/
vqrshrun.s16
blue2
,
blue16_2
,
#
6
vqrshrun.s16
red2
,
red16_2
,
#
6
vqrshrun.s16
green2
,
green16_2
,
#
6
vqrshrun.s16
red1
,
red16_1
,
#
6
vqrshrun.s16
green1
,
green16_1
,
#
6
vqrshrun.s16
blue1
,
blue16_1
,
#
6
pld
[
Y2
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment