Commit 0ed12fd6 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

Check for SSE4 at build time where possible

parent 58dd2253
...@@ -37,9 +37,9 @@ VLC_API unsigned vlc_CPU(void); ...@@ -37,9 +37,9 @@ VLC_API unsigned vlc_CPU(void);
# define VLC_CPU_SSE2 128 # define VLC_CPU_SSE2 128
# define VLC_CPU_SSE3 256 # define VLC_CPU_SSE3 256
# define VLC_CPU_SSSE3 512 # define VLC_CPU_SSSE3 512
# define CPU_CAPABILITY_SSE4_1 (1<<10) # define VLC_CPU_SSE4_1 1024
# define CPU_CAPABILITY_SSE4_2 (1<<11) # define VLC_CPU_SSE4_2 2048
# define CPU_CAPABILITY_SSE4A (1<<12) # define VLC_CPU_SSE4A 4096
# if defined (__MMX__) # if defined (__MMX__)
# define vlc_CPU_MMX() (1) # define vlc_CPU_MMX() (1)
...@@ -85,6 +85,24 @@ VLC_API unsigned vlc_CPU(void); ...@@ -85,6 +85,24 @@ VLC_API unsigned vlc_CPU(void);
# define vlc_CPU_SSSE3() ((vlc_CPU() & VLC_CPU_SSSE3) != 0) # define vlc_CPU_SSSE3() ((vlc_CPU() & VLC_CPU_SSSE3) != 0)
# endif # endif
# ifdef __SSE4_1__
# define vlc_CPU_SSE4_1() (1)
# else
# define vlc_CPU_SSE4_1() ((vlc_CPU() & VLC_CPU_SSE4_1) != 0)
# endif
# ifdef __SSE4_2__
# define vlc_CPU_SSE4_2() (1)
# else
# define vlc_CPU_SSE4_2() ((vlc_CPU() & VLC_CPU_SSE4_2) != 0)
# endif
# ifdef __SSE4A__
# define vlc_CPU_SSE4A() (1)
# else
# define vlc_CPU_SSE4A() ((vlc_CPU() & VLC_CPU_SSE4A) != 0)
# endif
# elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__) # elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
# define HAVE_FPU 1 # define HAVE_FPU 1
# define VLC_CPU_ALTIVEC 2 # define VLC_CPU_ALTIVEC 2
......
...@@ -331,12 +331,11 @@ static int OpenDecoder( vlc_object_t *p_this ) ...@@ -331,12 +331,11 @@ static int OpenDecoder( vlc_object_t *p_this )
/* Set CPU capabilities */ /* Set CPU capabilities */
p_context->dsp_mask = 0; p_context->dsp_mask = 0;
#if defined (__i386__) || defined (__x86_64__) #if defined (__i386__) || defined (__x86_64__)
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() ) if( !vlc_CPU_MMX() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX; p_context->dsp_mask |= AV_CPU_FLAG_MMX;
if( !vlc_CPU_MMXEXT() ) if( !vlc_CPU_MMXEXT() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX2; p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
if( !(i_cpu & CPU_CAPABILITY_3DNOW) ) if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW; p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() ) if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE; p_context->dsp_mask |= AV_CPU_FLAG_SSE;
...@@ -351,11 +350,11 @@ static int OpenDecoder( vlc_object_t *p_this ) ...@@ -351,11 +350,11 @@ static int OpenDecoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_SSSE3; p_context->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif # endif
# ifdef AV_CPU_FLAG_SSE4 # ifdef AV_CPU_FLAG_SSE4
if( !(i_cpu & CPU_CAPABILITY_SSE4_1) ) if( !vlc_CPU_SSE4_1() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE4; p_context->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif # endif
# ifdef AV_CPU_FLAG_SSE42 # ifdef AV_CPU_FLAG_SSE42
if( !(i_cpu & CPU_CAPABILITY_SSE4_2) ) if( !vlc_CPU_SSE4_2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE42; p_context->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif # endif
#endif #endif
......
...@@ -47,6 +47,11 @@ ...@@ -47,6 +47,11 @@
store " %%xmm4, 48(%[dst])\n" \ store " %%xmm4, 48(%[dst])\n" \
: : [dst]"r"(dstp), [src]"r"(srcp) : "memory") : : [dst]"r"(dstp), [src]"r"(srcp) : "memory")
#ifndef __SSE4A__
# undef vlc_CPU_SSE4A
# define vlc_CPU_SSE4A() ((cpu & VLC_CPU_SSE4A) != 0)
#endif
#ifndef __SSSE3__ #ifndef __SSSE3__
# undef vlc_CPU_SSSE3 # undef vlc_CPU_SSSE3
# define vlc_CPU_SSSE3() ((cpu & VLC_CPU_SSSE3) != 0) # define vlc_CPU_SSSE3() ((cpu & VLC_CPU_SSSE3) != 0)
...@@ -88,7 +93,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch, ...@@ -88,7 +93,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
dst[x] = src[x]; dst[x] = src[x];
#ifdef CAN_COMPILE_SSE4_1 #ifdef CAN_COMPILE_SSE4_1
if (cpu & CPU_CAPABILITY_SSE4_1) { if (vlc_CPU_SSE4_1()) {
if (!unaligned) { if (!unaligned) {
for (; x+63 < width; x += 64) for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movntdqa", "movdqa"); COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
......
...@@ -325,12 +325,11 @@ int OpenEncoder( vlc_object_t *p_this ) ...@@ -325,12 +325,11 @@ int OpenEncoder( vlc_object_t *p_this )
/* Set CPU capabilities */ /* Set CPU capabilities */
p_context->dsp_mask = 0; p_context->dsp_mask = 0;
#if defined (__i386__) || defined (__x86_64__) #if defined (__i386__) || defined (__x86_64__)
unsigned i_cpu = vlc_CPU();
if( !vlc_CPU_MMX() ) if( !vlc_CPU_MMX() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX; p_context->dsp_mask |= AV_CPU_FLAG_MMX;
if( !vlc_CPU_MMXEXT() ) if( !vlc_CPU_MMXEXT() )
p_context->dsp_mask |= AV_CPU_FLAG_MMX2; p_context->dsp_mask |= AV_CPU_FLAG_MMX2;
if( !(i_cpu & CPU_CAPABILITY_3DNOW) ) if( !(vlc_CPU() & CPU_CAPABILITY_3DNOW) )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW; p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() ) if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE; p_context->dsp_mask |= AV_CPU_FLAG_SSE;
...@@ -345,11 +344,11 @@ int OpenEncoder( vlc_object_t *p_this ) ...@@ -345,11 +344,11 @@ int OpenEncoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_SSSE3; p_context->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif # endif
# ifdef AV_CPU_FLAG_SSE4 # ifdef AV_CPU_FLAG_SSE4
if( !(i_cpu & CPU_CAPABILITY_SSE4_1) ) if( !vlc_CPU_SSE4_1() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE4; p_context->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif # endif
# ifdef AV_CPU_FLAG_SSE42 # ifdef AV_CPU_FLAG_SSE42
if( !(i_cpu & CPU_CAPABILITY_SSE4_2) ) if( !vlc_CPU_SSE4_2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE42; p_context->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif # endif
#endif #endif
......
...@@ -400,11 +400,11 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt ) ...@@ -400,11 +400,11 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif # endif
# ifdef AV_CPU_FLAG_SSE4 # ifdef AV_CPU_FLAG_SSE4
if( !(i_cpu & CPU_CAPABILITY_SSE4_1) ) if( !vlc_CPU_SSE4_1() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif # endif
# ifdef AV_CPU_FLAG_SSE42 # ifdef AV_CPU_FLAG_SSE42
if( !(i_cpu & CPU_CAPABILITY_SSE4_2) ) if( !vlc_CPU_SSE4_2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif # endif
#endif #endif
...@@ -821,11 +821,11 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id ) ...@@ -821,11 +821,11 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSSE3;
# endif # endif
# ifdef AV_CPU_FLAG_SSE4 # ifdef AV_CPU_FLAG_SSE4
if( !(i_cpu & CPU_CAPABILITY_SSE4_1) ) if( !vlc_CPU_SSE4_1() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE4;
# endif # endif
# ifdef AV_CPU_FLAG_SSE42 # ifdef AV_CPU_FLAG_SSE42
if( !(i_cpu & CPU_CAPABILITY_SSE4_2) ) if( !vlc_CPU_SSE4_2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE42;
# endif # endif
#endif #endif
......
...@@ -247,18 +247,14 @@ void vlc_CPU_init (void) ...@@ -247,18 +247,14 @@ void vlc_CPU_init (void)
i_capabilities |= VLC_CPU_SSSE3; i_capabilities |= VLC_CPU_SSSE3;
# endif # endif
# if defined (__SSE4_1__) # if defined (CAN_COMPILE_SSE4_1)
i_capabilities |= CPU_CAPABILITY_SSE4_1;
# elif defined (CAN_COMPILE_SSE4_1)
if ((i_ecx & 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test)) if ((i_ecx & 0x00080000) && vlc_CPU_check ("SSE4.1", SSE4_1_test))
i_capabilities |= CPU_CAPABILITY_SSE4_1; i_capabilities |= VLC_CPU_SSE4_1;
# endif # endif
# if defined (__SSE4_2__) # if defined (CAN_COMPILE_SSE4_2)
i_capabilities |= CPU_CAPABILITY_SSE4_2;
# elif defined (CAN_COMPILE_SSE4_2)
if ((i_ecx & 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test)) if ((i_ecx & 0x00100000) && vlc_CPU_check ("SSE4.2", SSE4_2_test))
i_capabilities |= CPU_CAPABILITY_SSE4_2; i_capabilities |= VLC_CPU_SSE4_2;
# endif # endif
/* test for additional capabilities */ /* test for additional capabilities */
...@@ -345,9 +341,9 @@ void vlc_CPU_dump (vlc_object_t *obj) ...@@ -345,9 +341,9 @@ void vlc_CPU_dump (vlc_object_t *obj)
if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");; if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");;
if (vlc_CPU_SSE3()) p += sprintf (p, "SSE2 ");; if (vlc_CPU_SSE3()) p += sprintf (p, "SSE2 ");;
if (vlc_CPU_SSSE3()) p += sprintf (p, "SSSE3 ");; if (vlc_CPU_SSSE3()) p += sprintf (p, "SSSE3 ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1"); if (vlc_CPU_SSE4_1()) p += sprintf (p, "SSE4.1 ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_2, "SSE4.2"); if (vlc_CPU_SSE4_2()) p += sprintf (p, "SSE4.2 ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4A, "SSE4A"); if (vlc_CPU_SSE4A()) p += sprintf (p, "SSE4A ");;
PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!"); PRINT_CAPABILITY(CPU_CAPABILITY_3DNOW, "3DNow!");
#elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__) #elif defined (__powerpc__) || defined (__ppc__) || defined (__ppc64__)
......
...@@ -79,16 +79,12 @@ static void vlc_CPU_init (void) ...@@ -79,16 +79,12 @@ static void vlc_CPU_init (void)
core_caps |= VLC_CPU_SSE3; core_caps |= VLC_CPU_SSE3;
if (!strcmp (cap, "ssse3")) if (!strcmp (cap, "ssse3"))
core_caps |= VLC_CPU_SSSE3; core_caps |= VLC_CPU_SSSE3;
# ifndef __SSE4_1__
if (!strcmp (cap, "sse4_1")) if (!strcmp (cap, "sse4_1"))
core_caps |= CPU_CAPABILITY_SSE4_1; core_caps |= VLC_CPU_SSE4_1;
# endif
# ifndef __SSE4_2__
if (!strcmp (cap, "sse4_2")) if (!strcmp (cap, "sse4_2"))
core_caps |= CPU_CAPABILITY_SSE4_1; core_caps |= VLC_CPU_SSE4_1;
# endif
if (!strcmp (cap, "sse4a")) if (!strcmp (cap, "sse4a"))
core_caps |= CPU_CAPABILITY_SSE4A; core_caps |= VLC_CPU_SSE4A;
# ifndef __3dNOW__ # ifndef __3dNOW__
if (!strcmp (cap, "3dnow")) if (!strcmp (cap, "3dnow"))
core_caps |= CPU_CAPABILITY_3DNOW; core_caps |= CPU_CAPABILITY_3DNOW;
...@@ -111,12 +107,6 @@ static void vlc_CPU_init (void) ...@@ -111,12 +107,6 @@ static void vlc_CPU_init (void)
/* Always enable capabilities that were forced during compilation */ /* Always enable capabilities that were forced during compilation */
#if defined (__i386__) || defined (__x86_64__) #if defined (__i386__) || defined (__x86_64__)
# ifdef __SSE4_1__
all_caps |= CPU_CAPABILITY_SSE4_1;
# endif
# ifdef __SSE4_2__
all_caps |= CPU_CAPABILITY_SSE4_2;
# endif
# ifdef __3dNOW__ # ifdef __3dNOW__
all_caps |= CPU_CAPABILITY_3DNOW; all_caps |= CPU_CAPABILITY_3DNOW;
# endif # endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment