Commit 9acaa4b2 authored by Damien Fouilleul's avatar Damien Fouilleul

- video_chromas: more SSE2 and MMX support and optimization, added SSE2 i420 -> RGB acceleration

parent 7b8ea9c3
......@@ -1274,7 +1274,7 @@ MMXEXT_MODULES="memcpymmxext"
#MMXEXT_MODULES="${MMXEXT_MODULES} idctmmxext motionmmxext"
THREEDNOW_MODULES="memcpy3dn"
SSE_MODULES=""
SSE2_MODULES="i420_yuy2_sse2"
SSE2_MODULES="i420_rgb_sse2 i420_yuy2_sse2"
ALTIVEC_MODULES="memcpyaltivec i420_yuy2_altivec"
#ALTIVEC_MODULES="${ALTIVEC_MODULES} idctaltivec motionaltivec"
......@@ -1325,7 +1325,7 @@ AC_CACHE_CHECK([if \$CC groks SSE2 intrinsics],
[ac_cv_c_sse2_intrinsics=no])])
if test "${ac_cv_c_sse2_intrinsics}" != "no"; then
AC_DEFINE(HAVE_SSE2_INTRINSICS, 1, Define if SSE2 intrinsics are available.)
dnl VLC_ADD_CFLAGS([i420_rgb_sse2],[-msse2])
VLC_ADD_CFLAGS([i420_rgb_sse2],[-msse2])
fi
AC_CACHE_CHECK([if \$CC groks MMX inline assembly],
......
......@@ -13,6 +13,13 @@ SOURCES_i420_rgb_mmx = \
i420_rgb_mmx.h \
$(NULL)
SOURCES_i420_rgb_sse2 = \
i420_rgb.c \
i420_rgb.h \
i420_rgb16.c \
i420_rgb_mmx.h \
$(NULL)
SOURCES_i420_yuy2 = \
i420_yuy2.c \
i420_yuy2.h \
......
......@@ -4,7 +4,8 @@
* Copyright (C) 2000, 2001, 2004 the VideoLAN team
* $Id$
*
* Author: Sam Hocevar <sam@zoy.org>
* Authors: Sam Hocevar <sam@zoy.org>
* Damien Fouilleul <damienf@videolan.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -72,6 +73,11 @@ vlc_module_begin();
"RV15,RV16,RV24,RV32 conversions") );
set_capability( "chroma", 100 );
add_requirement( MMX );
#elif defined (MODULE_NAME_IS_i420_rgb_sse2)
set_description( _( "SSE2 I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions") );
set_capability( "chroma", 120 );
add_requirement( SSE2 );
#endif
set_callbacks( Activate, Deactivate );
vlc_module_end();
......@@ -107,19 +113,30 @@ static int Activate( vlc_object_t *p_this )
#endif
case VLC_FOURCC('R','V','1','5'):
case VLC_FOURCC('R','V','1','6'):
#if defined (MODULE_NAME_IS_i420_rgb_mmx)
#if ! defined (MODULE_NAME_IS_i420_rgb)
/* If we don't have support for the bitmasks, bail out */
if( ( p_vout->output.i_rmask != 0x7c00
|| p_vout->output.i_gmask != 0x03e0
|| p_vout->output.i_bmask != 0x001f )
&& ( p_vout->output.i_rmask != 0xf800
|| p_vout->output.i_gmask != 0x07e0
|| p_vout->output.i_bmask != 0x001f ) )
if( ( p_vout->output.i_rmask == 0x7c00
&& p_vout->output.i_gmask == 0x03e0
&& p_vout->output.i_bmask == 0x001f ) )
{
return -1;
/* R5G5B6 pixel format */
msg_Dbg(p_this, "RGB pixel format is R5G5B5");
p_vout->chroma.pf_convert = E_(I420_R5G5B5);
}
#endif
else if( ( p_vout->output.i_rmask == 0xf800
&& p_vout->output.i_gmask == 0x07e0
&& p_vout->output.i_bmask == 0x001f ) )
{
/* R5G6B5 pixel format */
msg_Dbg(p_this, "RGB pixel format is R5G6B5");
p_vout->chroma.pf_convert = E_(I420_R5G6B5);
}
else
return -1;
#else
// generic C chroma converter */
p_vout->chroma.pf_convert = E_(I420_RGB16);
#endif
break;
#if 0
......@@ -128,16 +145,30 @@ static int Activate( vlc_object_t *p_this )
#endif
case VLC_FOURCC('R','V','3','2'):
#if defined (MODULE_NAME_IS_i420_rgb_mmx)
#if ! defined (MODULE_NAME_IS_i420_rgb)
/* If we don't have support for the bitmasks, bail out */
if( p_vout->output.i_rmask != 0x00ff0000
|| p_vout->output.i_gmask != 0x0000ff00
|| p_vout->output.i_bmask != 0x000000ff )
if( p_vout->output.i_rmask == 0x00ff0000
&& p_vout->output.i_gmask == 0x0000ff00
&& p_vout->output.i_bmask == 0x000000ff )
{
return -1;
/* A8R8G8B8 pixel format */
msg_Dbg(p_this, "RGB pixel format is A8R8G8B8");
p_vout->chroma.pf_convert = E_(I420_A8R8G8B8);
}
#endif
else if( p_vout->output.i_rmask == 0x0000ff00
&& p_vout->output.i_gmask == 0x00ff0000
&& p_vout->output.i_bmask == 0xff000000 )
{
/* B8G8R8A8 pixel format */
msg_Dbg(p_this, "RGB pixel format is B8G8R8A8");
p_vout->chroma.pf_convert = E_(I420_B8G8R8A8);
}
else
return -1;
#else
// generic C chroma converter */
p_vout->chroma.pf_convert = E_(I420_RGB32);
#endif
break;
default:
......
......@@ -58,9 +58,14 @@ struct chroma_sys_t
#ifdef MODULE_NAME_IS_i420_rgb
void E_(I420_RGB8) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_RGB16_dither) ( vout_thread_t *, picture_t *, picture_t * );
#endif
void E_(I420_RGB16) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_RGB32) ( vout_thread_t *, picture_t *, picture_t * );
#else // if defined(MODULE_NAME_IS_i420_rgb_mmx)
void E_(I420_R5G5B5) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_R5G6B5) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_A8R8G8B8) ( vout_thread_t *, picture_t *, picture_t * );
void E_(I420_B8G8R8A8) ( vout_thread_t *, picture_t *, picture_t * );
#endif
/*****************************************************************************
* CONVERT_*_PIXEL: pixel conversion macros
......
This diff is collapsed.
This diff is collapsed.
......@@ -306,7 +306,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__ __volatile__("emms" :: );
/* re-enable FPU registers */
__asm__ __volatile__ ( "emms" );
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
......@@ -347,6 +348,8 @@ static void I420_YUY2( vout_thread_t *p_vout, picture_t *p_source,
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}
/* make sure all SSE2 stores are visible thereafter */
__asm__ __volatile__ ( "sfence" );
}
else
{
......@@ -514,7 +517,8 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__ __volatile__("emms" :: );
/* re-enable FPU registers */
__asm__ __volatile__ ( "emms" );
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
......@@ -554,6 +558,8 @@ static void I420_YVYU( vout_thread_t *p_vout, picture_t *p_source,
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}
/* make sure all SSE2 stores are visible thereafter */
__asm__ __volatile__ ( "sfence" );
}
else
{
......@@ -720,7 +726,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__ __volatile__("emms" :: );
/* re-enable FPU registers */
__asm__ __volatile__ ( "emms" );
#endif
#if defined (MODULE_NAME_IS_i420_yuy2_altivec)
......@@ -760,6 +767,8 @@ static void I420_UYVY( vout_thread_t *p_vout, picture_t *p_source,
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}
/* make sure all SSE2 stores are visible thereafter */
__asm__ __volatile__ ( "sfence" );
}
else
{
......@@ -861,7 +870,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
}
#if defined (MODULE_NAME_IS_i420_yuy2_mmx)
__asm__ __volatile__("emms" :: );
/* re-enable FPU registers */
__asm__ __volatile__ ( "emms" );
#endif
#else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
......@@ -897,6 +907,8 @@ static void I420_cyuv( vout_thread_t *p_vout, picture_t *p_source,
p_line1 += i_dest_margin;
p_line2 += i_dest_margin;
}
/* make sure all SSE2 stores are visible thereafter */
__asm__ __volatile__ ( "sfence" );
}
else
{
......
......@@ -136,14 +136,14 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # v7 u7 v6 u6 .. u1 v0 u0 \n\
movdqa %%xmm0, %%xmm2 # y15 y14 y13 .. y2 y1 y0 \n\
punpcklbw %%xmm1, %%xmm2 # v3 y7 u3 .. v0 y1 u0 y0 \n\
movdqa %%xmm2, (%0) # Store low YUYV \n\
movntdq %%xmm2, (%0) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm0 # v3 y7 u3 y6 v2 y5 u2 y4 \n\
movdqa %%xmm0, 16(%0) # Store high YUYV \n\
movntdq %%xmm0, 16(%0) # Store high YUYV \n\
movdqa %%xmm3, %%xmm4 # Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm1, %%xmm4 # v1 Y3 u1 Y2 v0 Y1 u0 Y0 \n\
movdqa %%xmm4, (%1) # Store low YUYV \n\
movntdq %%xmm4, (%1) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm3 # v3 Y7 u3 Y6 v2 Y5 u2 Y4 \n\
movdqa %%xmm3, 16(%1) # Store high YUYV \n\
movntdq %%xmm3, 16(%1) # Store high YUYV \n\
"
#define SSE2_YUV420_YUYV_UNALIGNED " \n\
......@@ -172,14 +172,14 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
movdqa %%xmm0, %%xmm2 # y7 y6 y5 y4 y3 y2 y1 y0 \n\
punpcklbw %%xmm1, %%xmm2 # u1 y3 v1 y2 u0 y1 v0 y0 \n\
movdqa %%xmm2, (%0) # Store low YUYV \n\
movntdq %%xmm2, (%0) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm0 # u3 y7 v3 y6 u2 y5 v2 y4 \n\
movdqa %%xmm0, 16(%0) # Store high YUYV \n\
movntdq %%xmm0, 16(%0) # Store high YUYV \n\
movdqa %%xmm3, %%xmm4 # Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm1, %%xmm4 # u1 Y3 v1 Y2 u0 Y1 v0 Y0 \n\
movdqa %%xmm4, (%1) # Store low YUYV \n\
movntdq %%xmm4, (%1) # Store low YUYV \n\
punpckhbw %%xmm1, %%xmm3 # u3 Y7 v3 Y6 u2 Y5 v2 Y4 \n\
movdqa %%xmm3, 16(%1) # Store high YUYV \n\
movntdq %%xmm3, 16(%1) # Store high YUYV \n\
"
#define SSE2_YUV420_YVYU_UNALIGNED " \n\
......@@ -208,15 +208,15 @@ movdqa (%3), %%xmm3 # Load 16 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 \n\
punpcklbw %%xmm2, %%xmm1 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
movdqa %%xmm1, %%xmm2 # v3 u3 v2 u2 v1 u1 v0 u0 \n\
punpcklbw %%xmm0, %%xmm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
movdqa %%xmm2, (%0) # Store low UYVY \n\
movntdq %%xmm2, (%0) # Store low UYVY \n\
movdqa %%xmm1, %%xmm2 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
punpckhbw %%xmm0, %%xmm2 # y3 v1 y2 u1 y1 v0 y0 u0 \n\
movdqa %%xmm2, 16(%0) # Store high UYVY \n\
movntdq %%xmm2, 16(%0) # Store high UYVY \n\
movdqa %%xmm1, %%xmm4 # u3 v3 u2 v2 u1 v1 u0 v0 \n\
punpcklbw %%xmm3, %%xmm4 # Y3 v1 Y2 u1 Y1 v0 Y0 u0 \n\
movdqa %%xmm4, (%1) # Store low UYVY \n\
movntdq %%xmm4, (%1) # Store low UYVY \n\
punpckhbw %%xmm3, %%xmm1 # Y7 v3 Y6 u3 Y5 v2 Y4 u2 \n\
movdqa %%xmm1, 16(%1) # Store high UYVY \n\
movntdq %%xmm1, 16(%1) # Store high UYVY \n\
"
#define SSE2_YUV420_UYVY_UNALIGNED " \n\
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment