Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-gpu
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-gpu
Commits
6c50d21c
Commit
6c50d21c
authored
Mar 26, 2008
by
Rafaël Carré
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
let gcc choose how to reference memory addresses in i420_rgx mmx asm
patch by Alexis Ballier, fix #1383
parent
df95113d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
41 additions
and
42 deletions
+41
-42
modules/video_chroma/i420_rgb_mmx.h
modules/video_chroma/i420_rgb_mmx.h
+41
-42
No files found.
modules/video_chroma/i420_rgb_mmx.h
View file @
6c50d21c
...
...
@@ -34,18 +34,19 @@
#define USED_U64(foo) \
static const uint64_t foo __asm__ (#foo) __attribute__((unused))
#endif
USED_U64
(
mmx_80w
)
=
0x0080008000800080ULL
;
USED_U64
(
mmx_10w
)
=
0x1010101010101010ULL
;
USED_U64
(
mmx_00ffw
)
=
0x00ff00ff00ff00ffULL
;
USED_U64
(
mmx_Y_coeff
)
=
0x253f253f253f253fULL
;
USED_U64
(
mmx_U_green
)
=
0xf37df37df37df37dULL
;
USED_U64
(
mmx_U_blue
)
=
0x4093409340934093ULL
;
USED_U64
(
mmx_V_red
)
=
0x3312331233123312ULL
;
USED_U64
(
mmx_V_green
)
=
0xe5fce5fce5fce5fcULL
;
USED_U64
(
mmx_mask_f8
)
=
0xf8f8f8f8f8f8f8f8ULL
;
USED_U64
(
mmx_mask_fc
)
=
0xfcfcfcfcfcfcfcfcULL
;
USED_U64
(
mmx_80w
)
=
0x0080008000800080ULL
;
/* Will be referenced as %4
* in inline asm */
USED_U64
(
mmx_10w
)
=
0x1010101010101010ULL
;
/* -- as %5 */
USED_U64
(
mmx_00ffw
)
=
0x00ff00ff00ff00ffULL
;
/* -- as %6 */
USED_U64
(
mmx_Y_coeff
)
=
0x253f253f253f253fULL
;
/* -- as %7 */
USED_U64
(
mmx_U_green
)
=
0xf37df37df37df37dULL
;
/* -- as %8 */
USED_U64
(
mmx_U_blue
)
=
0x4093409340934093ULL
;
/* -- as %9 */
USED_U64
(
mmx_V_red
)
=
0x3312331233123312ULL
;
/* -- as %10 */
USED_U64
(
mmx_V_green
)
=
0xe5fce5fce5fce5fcULL
;
/* -- as %11 */
USED_U64
(
mmx_mask_f8
)
=
0xf8f8f8f8f8f8f8f8ULL
;
/* -- as %12 */
USED_U64
(
mmx_mask_fc
)
=
0xfcfcfcfcfcfcfcfcULL
;
/* -- as %13 */
#undef USED_U64
#if defined(CAN_COMPILE_MMX)
...
...
@@ -59,18 +60,16 @@ USED_U64(mmx_mask_fc) = 0xfcfcfcfcfcfcfcfcULL;
MMX_INSTRUCTIONS \
: \
: "r" (p_y), "r" (p_u), \
"r" (p_v), "r" (p_buffer) ); \
"r" (p_v), "r" (p_buffer), \
"m" (mmx_80w), "m" (mmx_10w), \
"m" (mmx_00ffw), "m" (mmx_Y_coeff), \
"m" (mmx_U_green), "m" (mmx_U_blue), \
"m" (mmx_V_red), "m" (mmx_V_green), \
"m" (mmx_mask_f8), "m" (mmx_mask_fc) ); \
} while(0)
#define MMX_END __asm__ __volatile__ ( "emms" )
/* Use RIP-relative code in PIC mode on amd64 */
#if defined(__x86_64__) && defined(__PIC__)
# define G "(%%rip)"
#else
# define G
#endif
#define MMX_INIT_16 " \n\
movd (%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0 \n\
movd (%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0 \n\
...
...
@@ -103,27 +102,27 @@ movq (%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
# convert the chroma part \n\
punpcklbw %%mm4, %%mm0 # scatter 4 Cb 00 u3 00 u2 00 u1 00 u0 \n\
punpcklbw %%mm4, %%mm1 # scatter 4 Cr 00 v3 00 v2 00 v1 00 v0 \n\
psubsw
mmx_80w"G", %%mm0
# Cb -= 128 \n\
psubsw
mmx_80w"G", %%mm1
# Cr -= 128 \n\
psubsw
%4, %%mm0
# Cb -= 128 \n\
psubsw
%4, %%mm1
# Cr -= 128 \n\
psllw $3, %%mm0 # Promote precision \n\
psllw $3, %%mm1 # Promote precision \n\
movq %%mm0, %%mm2 # Copy 4 Cb 00 u3 00 u2 00 u1 00 u0 \n\
movq %%mm1, %%mm3 # Copy 4 Cr 00 v3 00 v2 00 v1 00 v0 \n\
pmulhw
mmx_U_green"G", %%mm2
# Mul Cb with green coeff -> Cb green \n\
pmulhw
mmx_V_green"G", %%mm3
# Mul Cr with green coeff -> Cr green \n\
pmulhw
mmx_U_blue"G", %%mm0
# Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0 \n\
pmulhw
mmx_V_red"G", %%mm1
# Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0 \n\
pmulhw
%8, %%mm2
# Mul Cb with green coeff -> Cb green \n\
pmulhw
%11, %%mm3
# Mul Cr with green coeff -> Cr green \n\
pmulhw
%9, %%mm0
# Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0 \n\
pmulhw
%10, %%mm1
# Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0 \n\
paddsw %%mm3, %%mm2 # Cb green + Cr green -> Cgreen \n\
\n\
# convert the luma part \n\
psubusb
mmx_10w"G", %%mm6
# Y -= 16 \n\
psubusb
%5, %%mm6
# Y -= 16 \n\
movq %%mm6, %%mm7 # Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
pand
mmx_00ffw"G", %%mm6
# get Y even 00 Y6 00 Y4 00 Y2 00 Y0 \n\
pand
%6, %%mm6
# get Y even 00 Y6 00 Y4 00 Y2 00 Y0 \n\
psrlw $8, %%mm7 # get Y odd 00 Y7 00 Y5 00 Y3 00 Y1 \n\
psllw $3, %%mm6 # Promote precision \n\
psllw $3, %%mm7 # Promote precision \n\
pmulhw
mmx_Y_coeff"G", %%mm6
# Mul 4 Y even 00 y6 00 y4 00 y2 00 y0 \n\
pmulhw
mmx_Y_coeff"G", %%mm7
# Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1 \n\
pmulhw
%7, %%mm6
# Mul 4 Y even 00 y6 00 y4 00 y2 00 y0 \n\
pmulhw
%7, %%mm7
# Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1 \n\
"
/*
...
...
@@ -168,14 +167,14 @@ punpcklbw %%mm5, %%mm2 # G7 G6 G5 G4 G3 G2 G1 G0 \n\
#define MMX_YUV_GRAY " \n\
# convert the luma part \n\
psubusb
mmx_10w"G", %%mm6
\n\
psubusb
%5, %%mm6
\n\
movq %%mm6, %%mm7 \n\
pand
mmx_00ffw"G", %%mm6
\n\
pand
%6, %%mm6
\n\
psrlw $8, %%mm7 \n\
psllw $3, %%mm6 \n\
psllw $3, %%mm7 \n\
pmulhw
mmx_Y_coeff"G", %%mm6
\n\
pmulhw
mmx_Y_coeff"G", %%mm7
\n\
pmulhw
%7, %%mm6
\n\
pmulhw
%7, %%mm7
\n\
packuswb %%mm6, %%mm6 \n\
packuswb %%mm7, %%mm7 \n\
punpcklbw %%mm7, %%mm6 \n\
...
...
@@ -183,8 +182,8 @@ punpcklbw %%mm7, %%mm6 \n\
#define MMX_UNPACK_16_GRAY " \n\
movq %%mm6, %%mm5 \n\
pand
mmx_mask_f8"G", %%mm6
\n\
pand
mmx_mask_fc"G", %%mm5
\n\
pand
%12, %%mm6
\n\
pand
%13, %%mm5
\n\
movq %%mm6, %%mm7 \n\
psrlw $3, %%mm7 \n\
pxor %%mm3, %%mm3 \n\
...
...
@@ -213,10 +212,10 @@ movq %%mm2, 8(%3) \n\
#define MMX_UNPACK_15 " \n\
# mask unneeded bits off \n\
pand
mmx_mask_f8"G", %%mm0
# b7b6b5b4 b3______ b7b6b5b4 b3______ \n\
pand
%12, %%mm0
# b7b6b5b4 b3______ b7b6b5b4 b3______ \n\
psrlw $3,%%mm0 # ______b7 b6b5b4b3 ______b7 b6b5b4b3 \n\
pand
mmx_mask_f8"G", %%mm2
# g7g6g5g4 g3______ g7g6g5g4 g3______ \n\
pand
mmx_mask_f8"G", %%mm1
# r7r6r5r4 r3______ r7r6r5r4 r3______ \n\
pand
%12, %%mm2
# g7g6g5g4 g3______ g7g6g5g4 g3______ \n\
pand
%12, %%mm1
# r7r6r5r4 r3______ r7r6r5r4 r3______ \n\
psrlw $1,%%mm1 # __r7r6r5 r4r3____ __r7r6r5 r4r3____ \n\
pxor %%mm4, %%mm4 # zero mm4 \n\
movq %%mm0, %%mm5 # Copy B7-B0 \n\
...
...
@@ -249,9 +248,9 @@ movq %%mm5, 8(%3) # store pixel 4-7 \n\
#define MMX_UNPACK_16 " \n\
# mask unneeded bits off \n\
pand
mmx_mask_f8"G", %%mm0
# b7b6b5b4 b3______ b7b6b5b4 b3______ \n\
pand
mmx_mask_fc"G", %%mm2
# g7g6g5g4 g3g2____ g7g6g5g4 g3g2____ \n\
pand
mmx_mask_f8"G", %%mm1
# r7r6r5r4 r3______ r7r6r5r4 r3______ \n\
pand
%12, %%mm0
# b7b6b5b4 b3______ b7b6b5b4 b3______ \n\
pand
%13, %%mm2
# g7g6g5g4 g3g2____ g7g6g5g4 g3g2____ \n\
pand
%12, %%mm1
# r7r6r5r4 r3______ r7r6r5r4 r3______ \n\
psrlw $3,%%mm0 # ______b7 b6b5b4b3 ______b7 b6b5b4b3 \n\
pxor %%mm4, %%mm4 # zero mm4 \n\
movq %%mm0, %%mm5 # Copy B7-B0 \n\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment