Commit edfb6e4a authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

Check for SSE2 at build-time if possible

parent 615a016b
...@@ -34,7 +34,7 @@ VLC_API unsigned vlc_CPU(void); ...@@ -34,7 +34,7 @@ VLC_API unsigned vlc_CPU(void);
# define CPU_CAPABILITY_3DNOW (1<<4) # define CPU_CAPABILITY_3DNOW (1<<4)
# define VLC_CPU_MMXEXT 32 # define VLC_CPU_MMXEXT 32
# define VLC_CPU_SSE 64 # define VLC_CPU_SSE 64
# define CPU_CAPABILITY_SSE2 (1<<7) # define VLC_CPU_SSE2 128
# define CPU_CAPABILITY_SSE3 (1<<8) # define CPU_CAPABILITY_SSE3 (1<<8)
# define CPU_CAPABILITY_SSSE3 (1<<9) # define CPU_CAPABILITY_SSSE3 (1<<9)
# define CPU_CAPABILITY_SSE4_1 (1<<10) # define CPU_CAPABILITY_SSE4_1 (1<<10)
...@@ -67,6 +67,12 @@ VLC_API unsigned vlc_CPU(void); ...@@ -67,6 +67,12 @@ VLC_API unsigned vlc_CPU(void);
# endif # endif
# endif # endif
# ifdef __SSE2__
# define vlc_CPU_SSE2() (1)
# else
# define vlc_CPU_SSE2() ((vlc_CPU() & VLC_CPU_SSE2) != 0)
# endif
# elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__) # elif defined (__ppc__) || defined (__ppc64__) || defined (__powerpc__)
# define HAVE_FPU 1 # define HAVE_FPU 1
# define VLC_CPU_ALTIVEC 2 # define VLC_CPU_ALTIVEC 2
......
...@@ -340,7 +340,7 @@ static int OpenDecoder( vlc_object_t *p_this ) ...@@ -340,7 +340,7 @@ static int OpenDecoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW; p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() ) if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE; p_context->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) ) if( !vlc_CPU_SSE2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE2; p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3 # ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) ) if( !(i_cpu & CPU_CAPABILITY_SSE3) )
......
...@@ -49,12 +49,18 @@ ...@@ -49,12 +49,18 @@
/* Execute the instruction op only if SSE2 is supported. */ /* Execute the instruction op only if SSE2 is supported. */
#ifdef CAN_COMPILE_SSE2 #ifdef CAN_COMPILE_SSE2
# define ASM_SSE2(cpu, op) do { \ # ifdef __SSE2__
if (cpu & CPU_CAPABILITY_SSE2) \ # define ASM_SSE2(cpu, op) asm volatile (op)
asm volatile (op); \ # else
# define ASM_SSE2(cpu, op) do { \
if (cpu & VLC_CPU_SSE2) \
asm volatile (op); \
} while (0) } while (0)
# undef vlc_CPU_SSE2
# define vlc_CPU_SSE2() ((cpu & VLC_CPU_SSE2) != 0)
# endif
#else #else
# define ASM_SSE2(cpu, op) # define ASM_SSE2(cpu, op)
#endif #endif
/* Optimized copy from "Uncacheable Speculative Write Combining" memory /* Optimized copy from "Uncacheable Speculative Write Combining" memory
...@@ -88,7 +94,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch, ...@@ -88,7 +94,7 @@ static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
} else } else
#endif #endif
#ifdef CAN_COMPILE_SSE2 #ifdef CAN_COMPILE_SSE2
if (cpu & CPU_CAPABILITY_SSE2) { if (vlc_CPU_SSE2()) {
if (!unaligned) { if (!unaligned) {
for (; x+63 < width; x += 64) for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movdqa", "movdqa"); COPY64(&dst[x], &src[x], "movdqa", "movdqa");
...@@ -121,7 +127,7 @@ static void Copy2d(uint8_t *dst, size_t dst_pitch, ...@@ -121,7 +127,7 @@ static void Copy2d(uint8_t *dst, size_t dst_pitch,
bool unaligned = ((intptr_t)dst & 0x0f) != 0; bool unaligned = ((intptr_t)dst & 0x0f) != 0;
#ifdef CAN_COMPILE_SSE2 #ifdef CAN_COMPILE_SSE2
if (cpu & CPU_CAPABILITY_SSE2) { if (vlc_CPU_SSE2()) {
if (!unaligned) { if (!unaligned) {
for (; x+63 < width; x += 64) for (; x+63 < width; x += 64)
COPY64(&dst[x], &src[x], "movdqa", "movntdq"); COPY64(&dst[x], &src[x], "movdqa", "movntdq");
...@@ -189,7 +195,7 @@ static void SplitUV(uint8_t *dstu, size_t dstu_pitch, ...@@ -189,7 +195,7 @@ static void SplitUV(uint8_t *dstu, size_t dstu_pitch,
} else } else
#endif #endif
#ifdef CAN_COMPILE_SSE2 #ifdef CAN_COMPILE_SSE2
if (cpu & CPU_CAPABILITY_SSE2) { if (vlc_CPU_SSE2()) {
for (x = 0; x < (width & ~31); x += 32) { for (x = 0; x < (width & ~31); x += 32) {
asm volatile ( asm volatile (
"movdqu (%[mask]), %%xmm7\n" "movdqu (%[mask]), %%xmm7\n"
......
...@@ -334,7 +334,7 @@ int OpenEncoder( vlc_object_t *p_this ) ...@@ -334,7 +334,7 @@ int OpenEncoder( vlc_object_t *p_this )
p_context->dsp_mask |= AV_CPU_FLAG_3DNOW; p_context->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() ) if( !vlc_CPU_SSE() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE; p_context->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) ) if( !vlc_CPU_SSE2() )
p_context->dsp_mask |= AV_CPU_FLAG_SSE2; p_context->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3 # ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) ) if( !(i_cpu & CPU_CAPABILITY_SSE3) )
......
...@@ -1266,7 +1266,7 @@ static int Open ( vlc_object_t *p_this ) ...@@ -1266,7 +1266,7 @@ static int Open ( vlc_object_t *p_this )
p_sys->param.cpu &= ~X264_CPU_MMXEXT; p_sys->param.cpu &= ~X264_CPU_MMXEXT;
if( !vlc_CPU_SSE() ) if( !vlc_CPU_SSE() )
p_sys->param.cpu &= ~X264_CPU_SSE; p_sys->param.cpu &= ~X264_CPU_SSE;
if( !(vlc_CPU() & CPU_CAPABILITY_SSE2) ) if( !vlc_CPU_SSE2() )
p_sys->param.cpu &= ~X264_CPU_SSE2; p_sys->param.cpu &= ~X264_CPU_SSE2;
#endif #endif
......
...@@ -389,7 +389,7 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt ) ...@@ -389,7 +389,7 @@ static sout_stream_id_t *Add( sout_stream_t *p_stream, es_format_t *p_fmt )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() ) if( !vlc_CPU_SSE() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) ) if( !vlc_cpu_SSE2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3 # ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) ) if( !(i_cpu & CPU_CAPABILITY_SSE3) )
...@@ -810,7 +810,7 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id ) ...@@ -810,7 +810,7 @@ static mtime_t VideoCommand( sout_stream_t *p_stream, sout_stream_id_t *id )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_3DNOW;
if( !vlc_CPU_SSE() ) if( !vlc_CPU_SSE() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE;
if( !(i_cpu & CPU_CAPABILITY_SSE2) ) if( !vlc_CPU_SSE2() )
id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2; id->ff_enc_c->dsp_mask |= AV_CPU_FLAG_SSE2;
# ifdef AV_CPU_FLAG_SSE3 # ifdef AV_CPU_FLAG_SSE3
if( !(i_cpu & CPU_CAPABILITY_SSE3) ) if( !(i_cpu & CPU_CAPABILITY_SSE3) )
......
...@@ -94,7 +94,7 @@ vlc_module_begin () ...@@ -94,7 +94,7 @@ vlc_module_begin ()
set_description( N_( "SSE2 I420,IYUV,YV12 to " set_description( N_( "SSE2 I420,IYUV,YV12 to "
"RV15,RV16,RV24,RV32 conversions") ) "RV15,RV16,RV24,RV32 conversions") )
set_capability( "video filter2", 120 ) set_capability( "video filter2", 120 )
# define vlc_CPU_capable() ((vlc_CPU() & CPU_CAPABILITY_SSE2) != 0) # define vlc_CPU_capable() vlc_CPU_SSE2()
#endif #endif
set_callbacks( Activate, Deactivate ) set_callbacks( Activate, Deactivate )
vlc_module_end () vlc_module_end ()
......
...@@ -96,12 +96,12 @@ vlc_module_begin () ...@@ -96,12 +96,12 @@ vlc_module_begin ()
#elif defined (MODULE_NAME_IS_i420_yuy2_sse2) #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) ) set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
set_capability( "video filter2", 250 ) set_capability( "video filter2", 250 )
# define vlc_CPU_capable() (vlc_CPU() & CPU_CAPABILITY_SSE2) # define vlc_CPU_capable() vlc_CPU_SSE2()
#elif defined (MODULE_NAME_IS_i420_yuy2_altivec) #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
set_description( set_description(
_("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) ); _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
set_capability( "video filter2", 250 ) set_capability( "video filter2", 250 )
# define vlc_CPU_capable() (vlc_CPU_ALTIVEC()) # define vlc_CPU_capable() vlc_CPU_ALTIVEC()
#endif #endif
set_callbacks( Activate, NULL ) set_callbacks( Activate, NULL )
vlc_module_end () vlc_module_end ()
......
...@@ -81,7 +81,7 @@ vlc_module_begin () ...@@ -81,7 +81,7 @@ vlc_module_begin ()
#elif defined (MODULE_NAME_IS_i422_yuy2_sse2) #elif defined (MODULE_NAME_IS_i422_yuy2_sse2)
set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) ) set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
set_capability( "video filter2", 120 ) set_capability( "video filter2", 120 )
# define vlc_CPU_capable() ((vlc_CPU() & CPU_CAPABILITY_SSE2) != 0) # define vlc_CPU_capable() vlc_CPU_SSE2()
# define VLC_TARGET VLC_SSE # define VLC_TARGET VLC_SSE
#endif #endif
set_callbacks( Activate, NULL ) set_callbacks( Activate, NULL )
......
...@@ -108,19 +108,22 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src, ...@@ -108,19 +108,22 @@ int RenderYadif( filter_t *p_filter, picture_t *p_dst, picture_t *p_src,
void (*filter)(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, void (*filter)(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next,
int w, int prefs, int mrefs, int parity, int mode); int w, int prefs, int mrefs, int parity, int mode);
filter = yadif_filter_line_c; #if defined(HAVE_YADIF_SSSE3)
#if defined(HAVE_YADIF_MMX) if( vlc_CPU() & CPU_CAPABILITY_SSSE3 )
if( vlc_CPU_MMX() ) filter = yadif_filter_line_ssse3;
filter = yadif_filter_line_mmx; else
#endif #endif
#if defined(HAVE_YADIF_SSE2) #if defined(HAVE_YADIF_SSE2)
if( vlc_CPU() & CPU_CAPABILITY_SSE2 ) if( vlc_CPU_SSE2() )
filter = yadif_filter_line_sse2; filter = yadif_filter_line_sse2;
else
#endif #endif
#if defined(HAVE_YADIF_SSSE3) #if defined(HAVE_YADIF_MMX)
if( vlc_CPU() & CPU_CAPABILITY_SSSE3 ) if( vlc_CPU_MMX() )
filter = yadif_filter_line_ssse3; filter = yadif_filter_line_mmx;
else
#endif #endif
filter = yadif_filter_line_c;
for( int n = 0; n < p_dst->i_planes; n++ ) for( int n = 0; n < p_dst->i_planes; n++ )
{ {
......
...@@ -632,8 +632,8 @@ int Open( vlc_object_t *p_this ) ...@@ -632,8 +632,8 @@ int Open( vlc_object_t *p_this )
p_sys->pf_merge = MergeAltivec; p_sys->pf_merge = MergeAltivec;
else else
#endif #endif
#if defined(CAN_COMPILE_SSE) #if defined(CAN_COMPILE_SSE2)
if( (vlc_CPU() & CPU_CAPABILITY_SSE2) ) if( vlc_CPU_SSE2() )
{ {
p_sys->pf_merge = chroma->pixel_size == 1 ? Merge8BitSSE2 : Merge16BitSSE2; p_sys->pf_merge = chroma->pixel_size == 1 ? Merge8BitSSE2 : Merge16BitSSE2;
p_sys->pf_end_merge = EndMMX; p_sys->pf_end_merge = EndMMX;
......
...@@ -135,7 +135,7 @@ static int Open(vlc_object_t *object) ...@@ -135,7 +135,7 @@ static int Open(vlc_object_t *object)
cfg->buf = NULL; cfg->buf = NULL;
#if HAVE_SSE2 && HAVE_6REGS #if HAVE_SSE2 && HAVE_6REGS
if (vlc_CPU() & CPU_CAPABILITY_SSE2) if (vlc_CPU_SSE2())
cfg->blur_line = blur_line_sse2; cfg->blur_line = blur_line_sse2;
else else
#endif #endif
......
...@@ -409,7 +409,7 @@ static int Open(vlc_object_t *object) ...@@ -409,7 +409,7 @@ static int Open(vlc_object_t *object)
sys->blend = BlockBlendC; sys->blend = BlockBlendC;
sys->emms = NULL; sys->emms = NULL;
#if defined(CAN_COMPILE_SSE2) && 1 #if defined(CAN_COMPILE_SSE2) && 1
if (vlc_CPU() & CPU_CAPABILITY_SSE2) { if (vlc_CPU_SSE2()) {
sys->blend = BlockBlendSse2; sys->blend = BlockBlendSse2;
sys->emms = Emms; sys->emms = Emms;
} }
......
...@@ -245,7 +245,7 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic, ...@@ -245,7 +245,7 @@ static void PlanarI420Sepia( picture_t *p_pic, picture_t *p_outpic,
const uint8_t filling_const_8v = 128 + i_intensity / 14; const uint8_t filling_const_8v = 128 + i_intensity / 14;
#if defined(CAN_COMPILE_SSE2) #if defined(CAN_COMPILE_SSE2)
if (vlc_CPU() & CPU_CAPABILITY_SSE2) if (vlc_CPU_SSE2())
{ {
/* prepared value for faster broadcasting in xmm register */ /* prepared value for faster broadcasting in xmm register */
int i_intensity_spread = 0x10001 * (uint8_t) i_intensity; int i_intensity_spread = 0x10001 * (uint8_t) i_intensity;
......
...@@ -232,11 +232,9 @@ void vlc_CPU_init (void) ...@@ -232,11 +232,9 @@ void vlc_CPU_init (void)
# endif # endif
} }
# if defined (__SSE2__) # if defined (CAN_COMPILE_SSE2)
i_capabilities |= CPU_CAPABILITY_SSE2;
# elif defined (CAN_COMPILE_SSE2)
if ((i_edx & 0x04000000) && vlc_CPU_check ("SSE2", SSE2_test)) if ((i_edx & 0x04000000) && vlc_CPU_check ("SSE2", SSE2_test))
i_capabilities |= CPU_CAPABILITY_SSE2; i_capabilities |= VLC_CPU_SSE2;
# endif # endif
# if defined (__SSE3__) # if defined (__SSE3__)
...@@ -348,7 +346,7 @@ void vlc_CPU_dump (vlc_object_t *obj) ...@@ -348,7 +346,7 @@ void vlc_CPU_dump (vlc_object_t *obj)
if (vlc_CPU_MMX()) p += sprintf (p, "MMX "); if (vlc_CPU_MMX()) p += sprintf (p, "MMX ");
if (vlc_CPU_MMXEXT()) p += sprintf (p, "MMXEXT "); if (vlc_CPU_MMXEXT()) p += sprintf (p, "MMXEXT ");
if (vlc_CPU_SSE()) p += sprintf (p, "SSE ");; if (vlc_CPU_SSE()) p += sprintf (p, "SSE ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE2, "SSE2"); if (vlc_CPU_SSE2()) p += sprintf (p, "SSE2 ");;
PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3"); PRINT_CAPABILITY(CPU_CAPABILITY_SSE3, "SSE3");
PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3"); PRINT_CAPABILITY(CPU_CAPABILITY_SSSE3, "SSSE3");
PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1"); PRINT_CAPABILITY(CPU_CAPABILITY_SSE4_1, "SSE4.1");
......
...@@ -73,10 +73,8 @@ static void vlc_CPU_init (void) ...@@ -73,10 +73,8 @@ static void vlc_CPU_init (void)
core_caps |= VLC_CPU_SSE | VLC_CPU_MMXEXT; core_caps |= VLC_CPU_SSE | VLC_CPU_MMXEXT;
if (!strcmp (cap, "mmxext")) if (!strcmp (cap, "mmxext"))
core_caps |= VLC_CPU_MMXEXT; core_caps |= VLC_CPU_MMXEXT;
# ifndef __SSE2__
if (!strcmp (cap, "sse2")) if (!strcmp (cap, "sse2"))
core_caps |= CPU_CAPABILITY_SSE2; core_caps |= VLC_CPU_SSE2;
# endif
# ifndef __SSE3__ # ifndef __SSE3__
if (!strcmp (cap, "pni")) if (!strcmp (cap, "pni"))
core_caps |= CPU_CAPABILITY_SSE3; core_caps |= CPU_CAPABILITY_SSE3;
...@@ -117,9 +115,6 @@ static void vlc_CPU_init (void) ...@@ -117,9 +115,6 @@ static void vlc_CPU_init (void)
/* Always enable capabilities that were forced during compilation */ /* Always enable capabilities that were forced during compilation */
#if defined (__i386__) || defined (__x86_64__) #if defined (__i386__) || defined (__x86_64__)
# ifdef __SSE2__
all_caps |= CPU_CAPABILITY_SSE2;
# endif
# ifdef __SSE3__ # ifdef __SSE3__
all_caps |= CPU_CAPABILITY_SSE3; all_caps |= CPU_CAPABILITY_SSE3;
# endif # endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment