Commit edfb6e4a authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

Check for SSE2 at build-time if possible

parent 615a016b
......@@ -34,7 +34,7 @@ VLC_API unsigned vlc_CPU(void);
# define CPU_CAPABILITY_3DNOW (1<<4)
# define VLC_CPU_MMXEXT 32
# define VLC_CPU_SSE 64
# define CPU_CAPABILITY_SSE2 (1<<7)
# define VLC_CPU_SSE2 128
# define CPU_CAPABILITY_SSE3 (1<<8)
# define CPU_CAPABILITY_SSSE3 (1<<9)
# define CPU_CAPABILITY_SSE4_1 (1<<10)
......@@ -67,6 +67,12 @@ VLC_API unsigned vlc_CPU(void);
# endif
# endif
# ifdef __SSE2__
# define vlc_CPU_SSE2() (1)
# else
# define vlc_CPU_SSE2() ((vlc_CPU() & VLC_CPU_SSE2) != 0)
# endif
# elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
# define HAVE_FPU 1
# define VLC_CPU_ALTIVEC 2
......
......@@ -340,7 +340,7 @@ static int OpenDecoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
if( !vlc_CPU_SSE2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) )
......
......@@ -49,12 +49,18 @@
/* Execute the instruction op only if SSE2 is supported. */
#ifdef CAN_COMPILE_SSE2
# define ASM_SSE2(cpu, op) do { \
if (cpu & CPU_CAPABILITY_SSE2) \
asm volatile (op); \
# ifdef __SSE2__
# define ASM_SSE2(cpu, op) asm volatile (op)
# else
# define ASM_SSE2(cpu, op) do { \
if (cpu & VLC_CPU_SSE2) \
asm volatile (op); \
} while (0)
# undef vlc_CPU_SSE2
# define vlc_CPU_SSE2() ((cpu & VLC_CPU_SSE2) != 0)
# endif
#else
# define ASM_SSE2(cpu, op)
# define ASM_SSE2(cpu, op)
#endif
/* Optimized copy from "Uncacheable Speculative Write Combining" memory
......@@ -88,7 +94,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
} else
#endif
#ifdef CAN_COMPILE_SSE2
if (cpu & CPU_CAPABILITY_SSE2) {
if (vlc_CPU_SSE2()) {
if (!unaligned) {
for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movdqa", "movdqa");
......@@ -121,7 +127,7 @@ static void Copy2d(uint8_t *dst, size_t dst_pitch,
bool unaligned = ((intptr_t)dst & 0x0f) != 0;
#ifdef CAN_COMPILE_SSE2
if (cpu & CPU_CAPABILITY_SSE2) {
if (vlc_CPU_SSE2()) {
if (!unaligned) {
for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movdqa", "movntdq");
......@@ -189,7 +195,7 @@ static void SplitUV(uint8_t *dstu, size_t dstu_pitch,
} else
#endif
#ifdef CAN_COMPILE_SSE2
if (cpu & CPU_CAPABILITY_SSE2) {
if (vlc_CPU_SSE2()) {
for (x = 0; x < (width & ~31); x += 32) {
asm volatile (
"movdqu (%[mask]), %%xmm7\n"
......
......@@ -334,7 +334,7 @@ int OpenEncoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
if( !vlc_CPU_SSE2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) )
......
......@@ -1266,7 +1266,7 @@ static int Open ( vlc_object_t *p_this )
p_sys->param.cpu &= ~X264_CPU_MMXEXT;
if( !vlc_CPU_SSE() )
p_sys->param.cpu &= ~X264_CPU_SSE;
if( !(vlc_CPU() & CPU_CAPABILITY_SSE2) )
if( !vlc_CPU_SSE2() )
p_sys->param.cpu &= ~X264_CPU_SSE2;
#endif
......
......@@ -389,7 +389,7 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
if( !vlc_cpu_SSE2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) )
......@@ -810,7 +810,7 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) )
if( !vlc_CPU_SSE2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) )
......
......@@ -94,7 +94,7 @@ vlc_module_begin ()
set_description( N_( "SSE2 I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions") )
set_capability( "video filter2", 120 )
# define vlc_CPU_capable() ((vlc_CPU() & CPU_CAPABILITY_SSE2) != 0)
# define vlc_CPU_capable() vlc_CPU_SSE2()
#endif
set_callbacks( Activate, Deactivate )
vlc_module_end ()
......
......@@ -96,12 +96,12 @@ vlc_module_begin ()
#elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
set_capability( "video filter2", 250 )
# define vlc_CPU_capable() (vlc_CPU() & CPU_CAPABILITY_SSE2)
# define vlc_CPU_capable() vlc_CPU_SSE2()
#elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
set_description(
_("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
set_capability( "video filter2", 250 )
# define vlc_CPU_capable() (vlc_CPU_ALTIVEC())
# define vlc_CPU_capable() vlc_CPU_ALTIVEC()
#endif
set_callbacks( Activate, NULL )
vlc_module_end ()
......
......@@ -81,7 +81,7 @@ vlc_module_begin ()
#elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
set_capability( "video filter2", 120 )
# define vlc_CPU_capable() ((vlc_CPU() & CPU_CAPABILITY_SSE2) != 0)
# define vlc_CPU_capable() vlc_CPU_SSE2()
# define VLC_TARGET VLC_SSE
#endif
set_callbacks( Activate, NULL )
......
......@@ -108,19 +108,22 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
void (*filter)(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next,
int w, int prefs, int mrefs, int parity, int mode);
filter = yadif_filter_line_c;
#if defined(HAVE_YADIF_MMX)
if( vlc_CPU_MMX() )
filter = yadif_filter_line_mmx;
#if defined(HAVE_YADIF_SSSE3)
if( vlc_CPU() & CPU_CAPABILITY_SSSE3 )
filter = yadif_filter_line_ssse3;
else
#endif
#if defined(HAVE_YADIF_SSE2)
if( vlc_CPU() & CPU_CAPABILITY_SSE2 )
if( vlc_CPU_SSE2() )
filter = yadif_filter_line_sse2;
else
#endif
#if defined(HAVE_YADIF_SSSE3)
if( vlc_CPU() & CPU_CAPABILITY_SSSE3 )
filter = yadif_filter_line_ssse3;
#if defined(HAVE_YADIF_MMX)
if( vlc_CPU_MMX() )
filter = yadif_filter_line_mmx;
else
#endif
filter = yadif_filter_line_c;
for( int n = 0; n < p_dst->i_planes; n++ )
{
......
......@@ -632,8 +632,8 @@ int Open( vlc_object_t *p_this )
p_sys->pf_merge = MergeAltivec;
else
#endif
#if defined(CAN_COMPILE_SSE)
if( (vlc_CPU() & CPU_CAPABILITY_SSE2) )
#if defined(CAN_COMPILE_SSE2)
if( vlc_CPU_SSE2() )
{
p_sys->pf_merge = chroma->pixel_size == 1 ? Merge8BitSSE2 : Merge16BitSSE2;
p_sys->pf_end_merge = EndMMX;
......
......@@ -135,7 +135,7 @@ static int Open(vlc_object_t *object)
cfg->buf = NULL;
#if HAVE_SSE2 && HAVE_6REGS
if (vlc_CPU() & CPU_CAPABILITY_SSE2)
if (vlc_CPU_SSE2())
cfg->blur_line = blur_line_sse2;
else
#endif
......
......@@ -409,7 +409,7 @@ static int Open(vlc_object_t *object)
sys->blend = BlockBlendC;
sys->emms = NULL;
#if defined(CAN_COMPILE_SSE2) && 1
if (vlc_CPU() & CPU_CAPABILITY_SSE2) {
if (vlc_CPU_SSE2()) {
sys->blend = BlockBlendSse2;
sys->emms = Emms;
}
......
......@@ -245,7 +245,7 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
const uint8_t filling_const_8v = 128 + i_intensity / 14;
#if defined(CAN_COMPILE_SSE2)
if (vlc_CPU() & CPU_CAPABILITY_SSE2)
if (vlc_CPU_SSE2())
{
/* prepared value for faster broadcasting in xmm register */
int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
......
......@@ -232,11 +232,9 @@ void vlc_CPU_init (void)
# endif
}
# if defined (__SSE2__)
i_capabilities |= CPU_CAPABILITY_SSE2;
# elif defined (CAN_COMPILE_SSE2)
# if defined (CAN_COMPILE_SSE2)
if ((i_edx & 0x04000000) && vlc_CPU_check ("SSE2", SSE2_test))
i_capabilities |= CPU_CAPABILITY_SSE2;
i_capabilities |= VLC_CPU_SSE2;
# endif
# if defined (__SSE3__)
......@@ -348,7 +346,7 @@ void vlc_CPU_dump (vlc_object_t *obj)
if (vlc_CPU_MMX()) p += sprintf (p, "MMX ");
if (vlc_CPU_MMXEXT()) p += sprintf (p, "MMXEXT ");
if (vlc_CPU_SSE()) p += sprintf (p, "SSE ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2");
if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
......
......@@ -73,10 +73,8 @@ static void vlc_CPU_init (void)
core_caps |= VLC_CPU_SSE | VLC_CPU_MMXEXT;
if (!strcmp (cap, "mmxext"))
core_caps |= VLC_CPU_MMXEXT;
# ifndef __SSE2__
if (!strcmp (cap, "sse2"))
core_caps |= CPU_CAPABILITY_SSE2;
# endif
core_caps |= VLC_CPU_SSE2;
# ifndef __SSE3__
if (!strcmp (cap, "pni"))
core_caps |= CPU_CAPABILITY_SSE3;
......@@ -117,9 +115,6 @@ static void vlc_CPU_init (void)
/* Always enable capabilities that were forced during compilation */
#if defined (__i386__) || defined (__x86_64__)
# ifdef __SSE2__
all_caps |= CPU_CAPABILITY_SSE2;
# endif
# ifdef __SSE3__
all_caps |= CPU_CAPABILITY_SSE3;
# endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment