Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc-gpu
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc-gpu
Commits
9acaa4b2
Commit
9acaa4b2
authored
Jun 15, 2007
by
Damien Fouilleul
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- video_chromas: more SSE2 and MMX support and optimization, added SSE2 i420 -> RGB acceleration
parent
7b8ea9c3
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
2046 additions
and
297 deletions
+2046
-297
configure.ac
configure.ac
+2
-2
modules/video_chroma/Modules.am
modules/video_chroma/Modules.am
+7
-0
modules/video_chroma/i420_rgb.c
modules/video_chroma/i420_rgb.c
+47
-16
modules/video_chroma/i420_rgb.h
modules/video_chroma/i420_rgb.h
+6
-1
modules/video_chroma/i420_rgb16.c
modules/video_chroma/i420_rgb16.c
+1295
-189
modules/video_chroma/i420_rgb_mmx.h
modules/video_chroma/i420_rgb_mmx.h
+661
-73
modules/video_chroma/i420_yuy2.c
modules/video_chroma/i420_yuy2.c
+16
-4
modules/video_chroma/i420_yuy2.h
modules/video_chroma/i420_yuy2.h
+12
-12
No files found.
configure.ac
View file @
9acaa4b2
...
@@ -1274,7 +1274,7 @@ MMXEXT_MODULES="memcpymmxext"
...
@@ -1274,7 +1274,7 @@ MMXEXT_MODULES="memcpymmxext"
#MMXEXT_MODULES="${MMXEXT_MODULES} idctmmxext motionmmxext"
#MMXEXT_MODULES="${MMXEXT_MODULES} idctmmxext motionmmxext"
THREEDNOW_MODULES="memcpy3dn"
THREEDNOW_MODULES="memcpy3dn"
SSE_MODULES=""
SSE_MODULES=""
SSE2_MODULES="i420_yuy2_sse2"
SSE2_MODULES="i420_
rgb_sse2 i420_
yuy2_sse2"
ALTIVEC_MODULES="memcpyaltivec i420_yuy2_altivec"
ALTIVEC_MODULES="memcpyaltivec i420_yuy2_altivec"
#ALTIVEC_MODULES="${ALTIVEC_MODULES} idctaltivec motionaltivec"
#ALTIVEC_MODULES="${ALTIVEC_MODULES} idctaltivec motionaltivec"
...
@@ -1325,7 +1325,7 @@ AC_CACHE_CHECK([if \$CC groks SSE2 intrinsics],
...
@@ -1325,7 +1325,7 @@ AC_CACHE_CHECK([if \$CC groks SSE2 intrinsics],
[ac_cv_c_sse2_intrinsics=no])])
[ac_cv_c_sse2_intrinsics=no])])
if test "${ac_cv_c_sse2_intrinsics}" != "no"; then
if test "${ac_cv_c_sse2_intrinsics}" != "no"; then
AC_DEFINE(HAVE_SSE2_INTRINSICS, 1, Define if SSE2 intrinsics are available.)
AC_DEFINE(HAVE_SSE2_INTRINSICS, 1, Define if SSE2 intrinsics are available.)
dnl
VLC_ADD_CFLAGS([i420_rgb_sse2],[-msse2])
VLC_ADD_CFLAGS([i420_rgb_sse2],[-msse2])
fi
fi
AC_CACHE_CHECK([if \$CC groks MMX inline assembly],
AC_CACHE_CHECK([if \$CC groks MMX inline assembly],
...
...
modules/video_chroma/Modules.am
View file @
9acaa4b2
...
@@ -13,6 +13,13 @@ SOURCES_i420_rgb_mmx = \
...
@@ -13,6 +13,13 @@ SOURCES_i420_rgb_mmx = \
i420_rgb_mmx.h \
i420_rgb_mmx.h \
$(NULL)
$(NULL)
SOURCES_i420_rgb_sse2 = \
i420_rgb.c \
i420_rgb.h \
i420_rgb16.c \
i420_rgb_mmx.h \
$(NULL)
SOURCES_i420_yuy2 = \
SOURCES_i420_yuy2 = \
i420_yuy2.c \
i420_yuy2.c \
i420_yuy2.h \
i420_yuy2.h \
...
...
modules/video_chroma/i420_rgb.c
View file @
9acaa4b2
...
@@ -4,7 +4,8 @@
...
@@ -4,7 +4,8 @@
* Copyright (C) 2000, 2001, 2004 the VideoLAN team
* Copyright (C) 2000, 2001, 2004 the VideoLAN team
* $Id$
* $Id$
*
*
* Author: Sam Hocevar <sam@zoy.org>
* Authors: Sam Hocevar <sam@zoy.org>
* Damien Fouilleul <damienf@videolan.org>
*
*
* This program is free software; you can redistribute it and/or modify
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* it under the terms of the GNU General Public License as published by
...
@@ -72,6 +73,11 @@ vlc_module_begin();
...
@@ -72,6 +73,11 @@ vlc_module_begin();
"RV15,RV16,RV24,RV32 conversions"
)
);
"RV15,RV16,RV24,RV32 conversions"
)
);
set_capability
(
"chroma"
,
100
);
set_capability
(
"chroma"
,
100
);
add_requirement
(
MMX
);
add_requirement
(
MMX
);
#elif defined (MODULE_NAME_IS_i420_rgb_sse2)
set_description
(
_
(
"SSE2 I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions"
)
);
set_capability
(
"chroma"
,
120
);
add_requirement
(
SSE2
);
#endif
#endif
set_callbacks
(
Activate
,
Deactivate
);
set_callbacks
(
Activate
,
Deactivate
);
vlc_module_end
();
vlc_module_end
();
...
@@ -107,19 +113,30 @@ static int Activate( vlc_object_t *p_this )
...
@@ -107,19 +113,30 @@ static int Activate( vlc_object_t *p_this )
#endif
#endif
case
VLC_FOURCC
(
'R'
,
'V'
,
'1'
,
'5'
):
case
VLC_FOURCC
(
'R'
,
'V'
,
'1'
,
'5'
):
case
VLC_FOURCC
(
'R'
,
'V'
,
'1'
,
'6'
):
case
VLC_FOURCC
(
'R'
,
'V'
,
'1'
,
'6'
):
#if
defined (MODULE_NAME_IS_i420_rgb_mmx
)
#if
! defined (MODULE_NAME_IS_i420_rgb
)
/* If we don't have support for the bitmasks, bail out */
/* If we don't have support for the bitmasks, bail out */
if
(
(
p_vout
->
output
.
i_rmask
!=
0x7c00
if
(
(
p_vout
->
output
.
i_rmask
==
0x7c00
||
p_vout
->
output
.
i_gmask
!=
0x03e0
&&
p_vout
->
output
.
i_gmask
==
0x03e0
||
p_vout
->
output
.
i_bmask
!=
0x001f
)
&&
p_vout
->
output
.
i_bmask
==
0x001f
)
)
&&
(
p_vout
->
output
.
i_rmask
!=
0xf800
||
p_vout
->
output
.
i_gmask
!=
0x07e0
||
p_vout
->
output
.
i_bmask
!=
0x001f
)
)
{
{
return
-
1
;
/* R5G5B6 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is R5G5B5"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_R5G5B5
);
}
}
#endif
else
if
(
(
p_vout
->
output
.
i_rmask
==
0xf800
&&
p_vout
->
output
.
i_gmask
==
0x07e0
&&
p_vout
->
output
.
i_bmask
==
0x001f
)
)
{
/* R5G6B5 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is R5G6B5"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_R5G6B5
);
}
else
return
-
1
;
#else
// generic C chroma converter */
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_RGB16
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_RGB16
);
#endif
break
;
break
;
#if 0
#if 0
...
@@ -128,16 +145,30 @@ static int Activate( vlc_object_t *p_this )
...
@@ -128,16 +145,30 @@ static int Activate( vlc_object_t *p_this )
#endif
#endif
case
VLC_FOURCC
(
'R'
,
'V'
,
'3'
,
'2'
):
case
VLC_FOURCC
(
'R'
,
'V'
,
'3'
,
'2'
):
#if
defined (MODULE_NAME_IS_i420_rgb_mmx
)
#if
! defined (MODULE_NAME_IS_i420_rgb
)
/* If we don't have support for the bitmasks, bail out */
/* If we don't have support for the bitmasks, bail out */
if
(
p_vout
->
output
.
i_rmask
!
=
0x00ff0000
if
(
p_vout
->
output
.
i_rmask
=
=
0x00ff0000
||
p_vout
->
output
.
i_gmask
!
=
0x0000ff00
&&
p_vout
->
output
.
i_gmask
=
=
0x0000ff00
||
p_vout
->
output
.
i_bmask
!
=
0x000000ff
)
&&
p_vout
->
output
.
i_bmask
=
=
0x000000ff
)
{
{
return
-
1
;
/* A8R8G8B8 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is A8R8G8B8"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_A8R8G8B8
);
}
}
#endif
else
if
(
p_vout
->
output
.
i_rmask
==
0x0000ff00
&&
p_vout
->
output
.
i_gmask
==
0x00ff0000
&&
p_vout
->
output
.
i_bmask
==
0xff000000
)
{
/* B8G8R8A8 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is B8G8R8A8"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_B8G8R8A8
);
}
else
return
-
1
;
#else
// generic C chroma converter */
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_RGB32
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_RGB32
);
#endif
break
;
break
;
default:
default:
...
...
modules/video_chroma/i420_rgb.h
View file @
9acaa4b2
...
@@ -58,9 +58,14 @@ struct chroma_sys_t
...
@@ -58,9 +58,14 @@ struct chroma_sys_t
#ifdef MODULE_NAME_IS_i420_rgb
#ifdef MODULE_NAME_IS_i420_rgb
void
E_
(
I420_RGB8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_RGB8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_RGB16_dither
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_RGB16_dither
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#endif
void
E_
(
I420_RGB16
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_RGB16
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_RGB32
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_RGB32
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#else // if defined(MODULE_NAME_IS_i420_rgb_mmx)
void
E_
(
I420_R5G5B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_R5G6B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_A8R8G8B8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_B8G8R8A8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#endif
/*****************************************************************************
/*****************************************************************************
* CONVERT_*_PIXEL: pixel conversion macros
* CONVERT_*_PIXEL: pixel conversion macros
...
...
modules/video_chroma/i420_rgb16.c
View file @
9acaa4b2
This diff is collapsed.
Click to expand it.
modules/video_chroma/i420_rgb_mmx.h
View file @
9acaa4b2
This diff is collapsed.
Click to expand it.
modules/video_chroma/i420_yuy2.c
View file @
9acaa4b2
...
@@ -306,7 +306,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
...
@@ -306,7 +306,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
}
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__
__volatile__
(
"emms"
::
);
/* re-enable FPU registers */
__asm__
__volatile__
(
"emms"
);
#endif
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
...
@@ -347,6 +348,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
...
@@ -347,6 +348,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
p_line1
+=
i_dest_margin
;
p_line1
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
}
}
/* make sure all SSE2 stores are visible thereafter */
__asm__
__volatile__
(
"sfence"
);
}
}
else
else
{
{
...
@@ -514,7 +517,8 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
...
@@ -514,7 +517,8 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
}
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__
__volatile__
(
"emms"
::
);
/* re-enable FPU registers */
__asm__
__volatile__
(
"emms"
);
#endif
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
...
@@ -554,6 +558,8 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
...
@@ -554,6 +558,8 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
p_line1
+=
i_dest_margin
;
p_line1
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
}
}
/* make sure all SSE2 stores are visible thereafter */
__asm__
__volatile__
(
"sfence"
);
}
}
else
else
{
{
...
@@ -720,7 +726,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
...
@@ -720,7 +726,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
}
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__
__volatile__
(
"emms"
::
);
/* re-enable FPU registers */
__asm__
__volatile__
(
"emms"
);
#endif
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
...
@@ -760,6 +767,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
...
@@ -760,6 +767,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
p_line1
+=
i_dest_margin
;
p_line1
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
}
}
/* make sure all SSE2 stores are visible thereafter */
__asm__
__volatile__
(
"sfence"
);
}
}
else
else
{
{
...
@@ -861,7 +870,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
...
@@ -861,7 +870,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
}
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__
__volatile__
(
"emms"
::
);
/* re-enable FPU registers */
__asm__
__volatile__
(
"emms"
);
#endif
#endif
#else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
#else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
...
@@ -897,6 +907,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
...
@@ -897,6 +907,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
p_line1
+=
i_dest_margin
;
p_line1
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
}
}
/* make sure all SSE2 stores are visible thereafter */
__asm__
__volatile__
(
"sfence"
);
}
}
else
else
{
{
...
...
modules/video_chroma/i420_yuy2.h
View file @
9acaa4b2
...
@@ -136,14 +136,14 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
...
@@ -136,14 +136,14 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # v7 u7 v6 u6 .. u1 v0 u0 \n\
punpcklbw %%xmm2, %%xmm1 # v7 u7 v6 u6 .. u1 v0 u0 \n\
movdqa %%xmm0, %%xmm2 # y15 y14 y13 .. y2 y1 y0 \n\
movdqa %%xmm0, %%xmm2 # y15 y14 y13 .. y2 y1 y0 \n\
punpcklbw %%xmm1, %%xmm2 # v3 y7 u3 .. v0 y1 u0 y0 \n\
punpcklbw %%xmm1, %%xmm2 # v3 y7 u3 .. v0 y1 u0 y0 \n\
mov
dqa
%%xmm2, (%0) # Store low YUYV \n\
mov
ntdq
%%xmm2, (%0) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm0 # v3 y7 u3 y6 v2 y5 u2 y4 \n\
punpckhbw %%xmm1, %%xmm0 # v3 y7 u3 y6 v2 y5 u2 y4 \n\
mov
dqa
%%xmm0, 16(%0) # Store high YUYV \n\
mov
ntdq
%%xmm0, 16(%0) # Store high YUYV \n\
movdqa %%xmm3, %%xmm4 # Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
movdqa %%xmm3, %%xmm4 # Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm1, %%xmm4 # v1 Y3 u1 Y2 v0 Y1 u0 Y0 \n\
punpcklbw %%xmm1, %%xmm4 # v1 Y3 u1 Y2 v0 Y1 u0 Y0 \n\
mov
dqa
%%xmm4, (%1) # Store low YUYV \n\
mov
ntdq
%%xmm4, (%1) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm3 # v3 Y7 u3 Y6 v2 Y5 u2 Y4 \n\
punpckhbw %%xmm1, %%xmm3 # v3 Y7 u3 Y6 v2 Y5 u2 Y4 \n\
mov
dqa
%%xmm3, 16(%1) # Store high YUYV \n\
mov
ntdq
%%xmm3, 16(%1) # Store high YUYV \n\
"
"
#define SSE2_YUV420_YUYV_UNALIGNED " \n\
#define SSE2_YUV420_YUYV_UNALIGNED " \n\
...
@@ -172,14 +172,14 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
...
@@ -172,14 +172,14 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
punpcklbw %%xmm2, %%xmm1 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
movdqa %%xmm0, %%xmm2 # y7 y6 y5 y4 y3 y2 y1 y0 \n\
movdqa %%xmm0, %%xmm2 # y7 y6 y5 y4 y3 y2 y1 y0 \n\
punpcklbw %%xmm1, %%xmm2 # u1 y3 v1 y2 u0 y1 v0 y0 \n\
punpcklbw %%xmm1, %%xmm2 # u1 y3 v1 y2 u0 y1 v0 y0 \n\
mov
dqa
%%xmm2, (%0) # Store low YUYV \n\
mov
ntdq
%%xmm2, (%0) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm0 # u3 y7 v3 y6 u2 y5 v2 y4 \n\
punpckhbw %%xmm1, %%xmm0 # u3 y7 v3 y6 u2 y5 v2 y4 \n\
mov
dqa
%%xmm0, 16(%0) # Store high YUYV \n\
mov
ntdq
%%xmm0, 16(%0) # Store high YUYV \n\
movdqa %%xmm3, %%xmm4 # Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
movdqa %%xmm3, %%xmm4 # Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm1, %%xmm4 # u1 Y3 v1 Y2 u0 Y1 v0 Y0 \n\
punpcklbw %%xmm1, %%xmm4 # u1 Y3 v1 Y2 u0 Y1 v0 Y0 \n\
mov
dqa
%%xmm4, (%1) # Store low YUYV \n\
mov
ntdq
%%xmm4, (%1) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm3 # u3 Y7 v3 Y6 u2 Y5 v2 Y4 \n\
punpckhbw %%xmm1, %%xmm3 # u3 Y7 v3 Y6 u2 Y5 v2 Y4 \n\
mov
dqa
%%xmm3, 16(%1) # Store high YUYV \n\
mov
ntdq
%%xmm3, 16(%1) # Store high YUYV \n\
"
"
#define SSE2_YUV420_YVYU_UNALIGNED " \n\
#define SSE2_YUV420_YVYU_UNALIGNED " \n\
...
@@ -208,15 +208,15 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
...
@@ -208,15 +208,15 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
punpcklbw %%xmm2, %%xmm1 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
movdqa %%xmm1, %%xmm2 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
movdqa %%xmm1, %%xmm2 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
punpcklbw %%xmm0, %%xmm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
punpcklbw %%xmm0, %%xmm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
mov
dqa
%%xmm2, (%0) # Store low UYVY \n\
mov
ntdq
%%xmm2, (%0) # Store low UYVY \n\
movdqa %%xmm1, %%xmm2 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
movdqa %%xmm1, %%xmm2 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
punpckhbw %%xmm0, %%xmm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
punpckhbw %%xmm0, %%xmm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
mov
dqa
%%xmm2, 16(%0) # Store high UYVY \n\
mov
ntdq
%%xmm2, 16(%0) # Store high UYVY \n\
movdqa %%xmm1, %%xmm4 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
movdqa %%xmm1, %%xmm4 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
punpcklbw %%xmm3, %%xmm4 # Y3 v1 Y2 u1 Y1 v0 Y0 u0 \n\
punpcklbw %%xmm3, %%xmm4 # Y3 v1 Y2 u1 Y1 v0 Y0 u0 \n\
mov
dqa
%%xmm4, (%1) # Store low UYVY \n\
mov
ntdq
%%xmm4, (%1) # Store low UYVY \n\
punpckhbw %%xmm3, %%xmm1 # Y7 v3 Y6 u3 Y5 v2 Y4 u2 \n\
punpckhbw %%xmm3, %%xmm1 # Y7 v3 Y6 u3 Y5 v2 Y4 u2 \n\
mov
dqa
%%xmm1, 16(%1) # Store high UYVY \n\
mov
ntdq
%%xmm1, 16(%1) # Store high UYVY \n\
"
"
#define SSE2_YUV420_UYVY_UNALIGNED " \n\
#define SSE2_YUV420_UYVY_UNALIGNED " \n\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment