Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
V
vlc
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Redmine
Redmine
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Metrics
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
videolan
vlc
Commits
9acaa4b2
Commit
9acaa4b2
authored
Jun 15, 2007
by
Damien Fouilleul
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- video_chromas: more SSE2 and MMX support and optimization, added SSE2 i420 -> RGB acceleration
parent
7b8ea9c3
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
2046 additions
and
297 deletions
+2046
-297
configure.ac
configure.ac
+2
-2
modules/video_chroma/Modules.am
modules/video_chroma/Modules.am
+7
-0
modules/video_chroma/i420_rgb.c
modules/video_chroma/i420_rgb.c
+47
-16
modules/video_chroma/i420_rgb.h
modules/video_chroma/i420_rgb.h
+6
-1
modules/video_chroma/i420_rgb16.c
modules/video_chroma/i420_rgb16.c
+1295
-189
modules/video_chroma/i420_rgb_mmx.h
modules/video_chroma/i420_rgb_mmx.h
+661
-73
modules/video_chroma/i420_yuy2.c
modules/video_chroma/i420_yuy2.c
+16
-4
modules/video_chroma/i420_yuy2.h
modules/video_chroma/i420_yuy2.h
+12
-12
No files found.
configure.ac
View file @
9acaa4b2
...
...
@@ -1274,7 +1274,7 @@ MMXEXT_MODULES="memcpymmxext"
#MMXEXT_MODULES="${MMXEXT_MODULES} idctmmxext motionmmxext"
THREEDNOW_MODULES="memcpy3dn"
SSE_MODULES=""
SSE2_MODULES="i420_yuy2_sse2"
SSE2_MODULES="i420_
rgb_sse2 i420_
yuy2_sse2"
ALTIVEC_MODULES="memcpyaltivec i420_yuy2_altivec"
#ALTIVEC_MODULES="${ALTIVEC_MODULES} idctaltivec motionaltivec"
...
...
@@ -1325,7 +1325,7 @@ AC_CACHE_CHECK([if \$CC groks SSE2 intrinsics],
[ac_cv_c_sse2_intrinsics=no])])
if test "${ac_cv_c_sse2_intrinsics}" != "no"; then
AC_DEFINE(HAVE_SSE2_INTRINSICS, 1, Define if SSE2 intrinsics are available.)
dnl
VLC_ADD_CFLAGS([i420_rgb_sse2],[-msse2])
VLC_ADD_CFLAGS([i420_rgb_sse2],[-msse2])
fi
AC_CACHE_CHECK([if \$CC groks MMX inline assembly],
...
...
modules/video_chroma/Modules.am
View file @
9acaa4b2
...
...
@@ -13,6 +13,13 @@ SOURCES_i420_rgb_mmx = \
i420_rgb_mmx.h \
$(NULL)
SOURCES_i420_rgb_sse2 = \
i420_rgb.c \
i420_rgb.h \
i420_rgb16.c \
i420_rgb_mmx.h \
$(NULL)
SOURCES_i420_yuy2 = \
i420_yuy2.c \
i420_yuy2.h \
...
...
modules/video_chroma/i420_rgb.c
View file @
9acaa4b2
...
...
@@ -4,7 +4,8 @@
* Copyright (C) 2000, 2001, 2004 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
* Authors: Sam Hocevar <sam@zoy.org>
* Damien Fouilleul <damienf@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
...
...
@@ -72,6 +73,11 @@ vlc_module_begin();
"RV15,RV16,RV24,RV32 conversions"
)
);
set_capability
(
"chroma"
,
100
);
add_requirement
(
MMX
);
#elif defined (MODULE_NAME_IS_i420_rgb_sse2)
set_description
(
_
(
"SSE2 I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions"
)
);
set_capability
(
"chroma"
,
120
);
add_requirement
(
SSE2
);
#endif
set_callbacks
(
Activate
,
Deactivate
);
vlc_module_end
();
...
...
@@ -107,19 +113,30 @@ static int Activate( vlc_object_t *p_this )
#endif
case
VLC_FOURCC
(
'R'
,
'V'
,
'1'
,
'5'
):
case
VLC_FOURCC
(
'R'
,
'V'
,
'1'
,
'6'
):
#if
defined (MODULE_NAME_IS_i420_rgb_mmx
)
#if
! defined (MODULE_NAME_IS_i420_rgb
)
/* If we don't have support for the bitmasks, bail out */
if
(
(
p_vout
->
output
.
i_rmask
!=
0x7c00
||
p_vout
->
output
.
i_gmask
!=
0x03e0
||
p_vout
->
output
.
i_bmask
!=
0x001f
)
&&
(
p_vout
->
output
.
i_rmask
!=
0xf800
||
p_vout
->
output
.
i_gmask
!=
0x07e0
||
p_vout
->
output
.
i_bmask
!=
0x001f
)
)
if
(
(
p_vout
->
output
.
i_rmask
==
0x7c00
&&
p_vout
->
output
.
i_gmask
==
0x03e0
&&
p_vout
->
output
.
i_bmask
==
0x001f
)
)
{
return
-
1
;
/* R5G5B6 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is R5G5B5"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_R5G5B5
);
}
#endif
else
if
(
(
p_vout
->
output
.
i_rmask
==
0xf800
&&
p_vout
->
output
.
i_gmask
==
0x07e0
&&
p_vout
->
output
.
i_bmask
==
0x001f
)
)
{
/* R5G6B5 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is R5G6B5"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_R5G6B5
);
}
else
return
-
1
;
#else
// generic C chroma converter */
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_RGB16
);
#endif
break
;
#if 0
...
...
@@ -128,16 +145,30 @@ static int Activate( vlc_object_t *p_this )
#endif
case
VLC_FOURCC
(
'R'
,
'V'
,
'3'
,
'2'
):
#if
defined (MODULE_NAME_IS_i420_rgb_mmx
)
#if
! defined (MODULE_NAME_IS_i420_rgb
)
/* If we don't have support for the bitmasks, bail out */
if
(
p_vout
->
output
.
i_rmask
!
=
0x00ff0000
||
p_vout
->
output
.
i_gmask
!
=
0x0000ff00
||
p_vout
->
output
.
i_bmask
!
=
0x000000ff
)
if
(
p_vout
->
output
.
i_rmask
=
=
0x00ff0000
&&
p_vout
->
output
.
i_gmask
=
=
0x0000ff00
&&
p_vout
->
output
.
i_bmask
=
=
0x000000ff
)
{
return
-
1
;
/* A8R8G8B8 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is A8R8G8B8"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_A8R8G8B8
);
}
#endif
else
if
(
p_vout
->
output
.
i_rmask
==
0x0000ff00
&&
p_vout
->
output
.
i_gmask
==
0x00ff0000
&&
p_vout
->
output
.
i_bmask
==
0xff000000
)
{
/* B8G8R8A8 pixel format */
msg_Dbg
(
p_this
,
"RGB pixel format is B8G8R8A8"
);
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_B8G8R8A8
);
}
else
return
-
1
;
#else
// generic C chroma converter */
p_vout
->
chroma
.
pf_convert
=
E_
(
I420_RGB32
);
#endif
break
;
default:
...
...
modules/video_chroma/i420_rgb.h
View file @
9acaa4b2
...
...
@@ -58,9 +58,14 @@ struct chroma_sys_t
#ifdef MODULE_NAME_IS_i420_rgb
void
E_
(
I420_RGB8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_RGB16_dither
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#endif
void
E_
(
I420_RGB16
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_RGB32
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#else // if defined(MODULE_NAME_IS_i420_rgb_mmx)
void
E_
(
I420_R5G5B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_R5G6B5
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_A8R8G8B8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
void
E_
(
I420_B8G8R8A8
)
(
vout_thread_t
*
,
picture_t
*
,
picture_t
*
);
#endif
/*****************************************************************************
* CONVERT_*_PIXEL: pixel conversion macros
...
...
modules/video_chroma/i420_rgb16.c
View file @
9acaa4b2
This diff is collapsed.
Click to expand it.
modules/video_chroma/i420_rgb_mmx.h
View file @
9acaa4b2
This diff is collapsed.
Click to expand it.
modules/video_chroma/i420_yuy2.c
View file @
9acaa4b2
...
...
@@ -306,7 +306,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__
__volatile__
(
"emms"
::
);
/* re-enable FPU registers */
__asm__
__volatile__
(
"emms"
);
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
...
...
@@ -347,6 +348,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
p_line1
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
}
/* make sure all SSE2 stores are visible thereafter */
__asm__
__volatile__
(
"sfence"
);
}
else
{
...
...
@@ -514,7 +517,8 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__
__volatile__
(
"emms"
::
);
/* re-enable FPU registers */
__asm__
__volatile__
(
"emms"
);
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
...
...
@@ -554,6 +558,8 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
p_line1
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
}
/* make sure all SSE2 stores are visible thereafter */
__asm__
__volatile__
(
"sfence"
);
}
else
{
...
...
@@ -720,7 +726,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__
__volatile__
(
"emms"
::
);
/* re-enable FPU registers */
__asm__
__volatile__
(
"emms"
);
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
...
...
@@ -760,6 +767,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
p_line1
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
}
/* make sure all SSE2 stores are visible thereafter */
__asm__
__volatile__
(
"sfence"
);
}
else
{
...
...
@@ -861,7 +870,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__
__volatile__
(
"emms"
::
);
/* re-enable FPU registers */
__asm__
__volatile__
(
"emms"
);
#endif
#else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
...
...
@@ -897,6 +907,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
p_line1
+=
i_dest_margin
;
p_line2
+=
i_dest_margin
;
}
/* make sure all SSE2 stores are visible thereafter */
__asm__
__volatile__
(
"sfence"
);
}
else
{
...
...
modules/video_chroma/i420_yuy2.h
View file @
9acaa4b2
...
...
@@ -136,14 +136,14 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # v7 u7 v6 u6 .. u1 v0 u0 \n\
movdqa %%xmm0, %%xmm2 # y15 y14 y13 .. y2 y1 y0 \n\
punpcklbw %%xmm1, %%xmm2 # v3 y7 u3 .. v0 y1 u0 y0 \n\
mov
dqa
%%xmm2, (%0) # Store low YUYV \n\
mov
ntdq
%%xmm2, (%0) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm0 # v3 y7 u3 y6 v2 y5 u2 y4 \n\
mov
dqa
%%xmm0, 16(%0) # Store high YUYV \n\
mov
ntdq
%%xmm0, 16(%0) # Store high YUYV \n\
movdqa %%xmm3, %%xmm4 # Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm1, %%xmm4 # v1 Y3 u1 Y2 v0 Y1 u0 Y0 \n\
mov
dqa
%%xmm4, (%1) # Store low YUYV \n\
mov
ntdq
%%xmm4, (%1) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm3 # v3 Y7 u3 Y6 v2 Y5 u2 Y4 \n\
mov
dqa
%%xmm3, 16(%1) # Store high YUYV \n\
mov
ntdq
%%xmm3, 16(%1) # Store high YUYV \n\
"
#define SSE2_YUV420_YUYV_UNALIGNED " \n\
...
...
@@ -172,14 +172,14 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
movdqa %%xmm0, %%xmm2 # y7 y6 y5 y4 y3 y2 y1 y0 \n\
punpcklbw %%xmm1, %%xmm2 # u1 y3 v1 y2 u0 y1 v0 y0 \n\
mov
dqa
%%xmm2, (%0) # Store low YUYV \n\
mov
ntdq
%%xmm2, (%0) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm0 # u3 y7 v3 y6 u2 y5 v2 y4 \n\
mov
dqa
%%xmm0, 16(%0) # Store high YUYV \n\
mov
ntdq
%%xmm0, 16(%0) # Store high YUYV \n\
movdqa %%xmm3, %%xmm4 # Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm1, %%xmm4 # u1 Y3 v1 Y2 u0 Y1 v0 Y0 \n\
mov
dqa
%%xmm4, (%1) # Store low YUYV \n\
mov
ntdq
%%xmm4, (%1) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm3 # u3 Y7 v3 Y6 u2 Y5 v2 Y4 \n\
mov
dqa
%%xmm3, 16(%1) # Store high YUYV \n\
mov
ntdq
%%xmm3, 16(%1) # Store high YUYV \n\
"
#define SSE2_YUV420_YVYU_UNALIGNED " \n\
...
...
@@ -208,15 +208,15 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
movdqa %%xmm1, %%xmm2 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
punpcklbw %%xmm0, %%xmm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
mov
dqa
%%xmm2, (%0) # Store low UYVY \n\
mov
ntdq
%%xmm2, (%0) # Store low UYVY \n\
movdqa %%xmm1, %%xmm2 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
punpckhbw %%xmm0, %%xmm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
mov
dqa
%%xmm2, 16(%0) # Store high UYVY \n\
mov
ntdq
%%xmm2, 16(%0) # Store high UYVY \n\
movdqa %%xmm1, %%xmm4 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
punpcklbw %%xmm3, %%xmm4 # Y3 v1 Y2 u1 Y1 v0 Y0 u0 \n\
mov
dqa
%%xmm4, (%1) # Store low UYVY \n\
mov
ntdq
%%xmm4, (%1) # Store low UYVY \n\
punpckhbw %%xmm3, %%xmm1 # Y7 v3 Y6 u3 Y5 v2 Y4 u2 \n\
mov
dqa
%%xmm1, 16(%1) # Store high UYVY \n\
mov
ntdq
%%xmm1, 16(%1) # Store high UYVY \n\
"
#define SSE2_YUV420_UYVY_UNALIGNED " \n\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment