cpu.c 9.48 KB
Newer Older
1 2 3
/*****************************************************************************
 * cpu.c: CPU detection code
 *****************************************************************************
4
 * Copyright (C) 1998-2004 the VideoLAN team
5
 * $Id$
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * Authors: Samuel Hocevar <sam@zoy.org>
 *          Christophe Massiot <massiot@via.ecp.fr>
 *          Eugenio Jarosiewicz <ej0@cise.ufl.eduEujenio>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
Antoine Cellerier's avatar
Antoine Cellerier committed
23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 25 26 27 28
 *****************************************************************************/

/*****************************************************************************
 * Preamble
 *****************************************************************************/
29 30 31 32
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

33
#include <vlc_common.h>
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
34
#include <vlc_cpu.h>
35

36 37 38 39 40
#include <sys/types.h>
#ifndef WIN32
#include <unistd.h>
#include <sys/wait.h>
#endif
41

Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
42 43
#include "libvlc.h"

44
#if defined(__APPLE__) && (defined(__ppc__) || defined(__ppc64__))
45
#include <sys/sysctl.h>
46 47
#endif

48 49
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( __powerpc__ ) \
 || defined( __ppc__ ) || defined( __ppc64__ ) || defined( __powerpc64__ )
50 51 52 53
static bool check_OS_capability( const char *psz_capability, pid_t pid )
{
#ifndef WIN32
    int status;
54

55 56 57 58 59 60 61
    if( pid == -1 )
        return false; /* fail safe :-/ */

    while( waitpid( pid, &status, 0 ) == -1 );

    if( WIFEXITED( status ) && WEXITSTATUS( status ) == 0 )
        return true;
62 63 64 65 66

    fprintf( stderr, "warning: your CPU has %s instructions, but not your "
                     "operating system.\n", psz_capability );
    fprintf( stderr, "         some optimizations will be disabled unless "
                     "you upgrade your OS\n" );
67 68 69 70 71 72 73 74
    return false;
#else
# warning FIXME!
# define fork() (errno = ENOSYS, -1)
    (void)pid;
    (void)psz_capability;
    return true;
#endif
75
}
Sam Hocevar's avatar
Sam Hocevar committed
76
#endif
77 78

/*****************************************************************************
79
 * CPUCapabilities: get the CPU capabilities
80
 *****************************************************************************
81 82
 * This function is called to list extensions the CPU may have.
 *****************************************************************************/
Christophe Mutricy's avatar
Christophe Mutricy committed
83
uint32_t CPUCapabilities( void )
84
{
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
85
    uint32_t i_capabilities = 0;
86

87
#if defined( __i386__ ) || defined( __x86_64__ )
88 89
     unsigned int i_eax, i_ebx, i_ecx, i_edx;
     bool b_amd;
90 91

    /* Needed for x86 CPU capabilities detection */
92 93
#   if defined( __x86_64__ )
#       define cpuid( reg )                    \
94
            asm volatile ( "cpuid\n\t"         \
95 96
                           "movl %%ebx,%1\n\t" \
                         : "=a" ( i_eax ),     \
Sam Hocevar's avatar
Sam Hocevar committed
97
                           "=b" ( i_ebx ),     \
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
                           "=c" ( i_ecx ),     \
                           "=d" ( i_edx )      \
                         : "a"  ( reg )        \
                         : "cc" );
#   else
#       define cpuid( reg )                    \
            asm volatile ( "push %%ebx\n\t"    \
                           "cpuid\n\t"         \
                           "movl %%ebx,%1\n\t" \
                           "pop %%ebx\n\t"     \
                         : "=a" ( i_eax ),     \
                           "=r" ( i_ebx ),     \
                           "=c" ( i_ecx ),     \
                           "=d" ( i_edx )      \
                         : "a"  ( reg )        \
                         : "cc" );
#   endif
115

116 117 118
# if defined (__i386__) && !defined (__i486__) && !defined (__i586__) \
  && !defined (__i686__) && !defined (__pentium4__) \
  && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
119
    /* check if cpuid instruction is supported */
Christophe Massiot's avatar
Christophe Massiot committed
120 121 122
    asm volatile ( "push %%ebx\n\t"
                   "pushf\n\t"
                   "pop %%eax\n\t"
123 124
                   "movl %%eax, %%ebx\n\t"
                   "xorl $0x200000, %%eax\n\t"
Christophe Massiot's avatar
Christophe Massiot committed
125 126 127 128
                   "push %%eax\n\t"
                   "popf\n\t"
                   "pushf\n\t"
                   "pop %%eax\n\t"
129
                   "movl %%ebx,%1\n\t"
Christophe Massiot's avatar
Christophe Massiot committed
130
                   "pop %%ebx\n\t"
131 132 133 134 135 136
                 : "=a" ( i_eax ),
                   "=r" ( i_ebx )
                 :
                 : "cc" );

    if( i_eax == i_ebx )
137
        goto out;
138
# endif
139 140 141 142

    /* the CPU supports the CPUID instruction - get its level */
    cpuid( 0x00000000 );

143 144 145
# if defined (__i386__) && !defined (__i586__) \
  && !defined (__i686__) && !defined (__pentium4__) \
  && !defined (__k6__) && !defined (__athlon__) && !defined (__k8__)
146
    if( !i_eax )
147
        goto out;
148
#endif
149 150 151 152 153 154 155

    /* borrowed from mpeg2dec */
    b_amd = ( i_ebx == 0x68747541 ) && ( i_ecx == 0x444d4163 )
                    && ( i_edx == 0x69746e65 );

    /* test for the MMX flag */
    cpuid( 0x00000001 );
156
# if !defined (__MMX__)
157
    if( ! (i_edx & 0x00800000) )
158
        goto out;
159
# endif
160 161
    i_capabilities |= CPU_CAPABILITY_MMX;

162 163 164
# if defined (__SSE__)
    i_capabilities |= CPU_CAPABILITY_MMXEXT | CPU_CAPABILITY_SSE;
# else
165 166 167 168 169 170
    if( i_edx & 0x02000000 )
    {
        i_capabilities |= CPU_CAPABILITY_MMXEXT;

#   ifdef CAN_COMPILE_SSE
        /* We test if OS supports the SSE instructions */
171 172
        pid_t pid = fork();
        if( pid == 0 )
173 174 175
        {
            /* Test a SSE instruction */
            __asm__ __volatile__ ( "xorps %%xmm0,%%xmm0\n" : : );
176
            exit(0);
177
        }
178
        if( check_OS_capability( "SSE", pid ) )
179 180 181
            i_capabilities |= CPU_CAPABILITY_SSE;
#   endif
    }
182
# endif
183

184 185 186
# if defined (__SSE2__)
    i_capabilities |= CPU_CAPABILITY_SSE2;
# elif defined (CAN_COMPILE_SSE)
Sigmund Augdal Helberg's avatar
Sigmund Augdal Helberg committed
187 188
    if( i_edx & 0x04000000 )
    {
189 190 191
        /* We test if OS supports the SSE2 instructions */
        pid_t pid = fork();
        if( pid == 0 )
Sigmund Augdal Helberg's avatar
Sigmund Augdal Helberg committed
192 193 194
        {
            /* Test a SSE2 instruction */
            __asm__ __volatile__ ( "movupd %%xmm0, %%xmm0\n" : : );
195
            exit(0);
Sigmund Augdal Helberg's avatar
Sigmund Augdal Helberg committed
196
        }
197
        if( check_OS_capability( "SSE2", pid ) )
Sigmund Augdal Helberg's avatar
Sigmund Augdal Helberg committed
198 199
            i_capabilities |= CPU_CAPABILITY_SSE2;
    }
200
# endif
201

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
# if defined (__SSE3__)
    i_capabilities |= CPU_CAPABILITY_SSE3;
# elif defined (CAN_COMPILE_SSE3)
    if( i_ecx & 0x00000001 )
    {
        /* We test if OS supports the SSE3 instructions */
        pid_t pid = fork();
        if( pid == 0 )
        {
            /* Test a SSE3 instruction */
            __asm__ __volatile__ ( "movsldup %%xmm1, %%xmm0\n" : : );
            exit(0);
        }
        if( check_OS_capability( "SSE3", pid ) )
            i_capabilities |= CPU_CAPABILITY_SSE3;
    }
# endif

220 221 222 223
    /* test for additional capabilities */
    cpuid( 0x80000000 );

    if( i_eax < 0x80000001 )
224
        goto out;
225 226 227 228

    /* list these additional capabilities */
    cpuid( 0x80000001 );

229 230 231
# if defined (__3dNOW__)
    i_capabilities |= CPU_CAPABILITY_3DNOW;
# elif defined (CAN_COMPILE_3DNOW)
232 233
    if( i_edx & 0x80000000 )
    {
234 235
        pid_t pid = fork();
        if( pid == 0 )
236 237 238
        {
            /* Test a 3D Now! instruction */
            __asm__ __volatile__ ( "pfadd %%mm0,%%mm0\n" "femms\n" : : );
239
            exit(0);
240
        }
241
        if( check_OS_capability( "3D Now!", pid ) )
242 243
            i_capabilities |= CPU_CAPABILITY_3DNOW;
    }
244
# endif
245 246 247 248 249

    if( b_amd && ( i_edx & 0x00400000 ) )
    {
        i_capabilities |= CPU_CAPABILITY_MMXEXT;
    }
250
out:
251

252
#elif defined( __arm__ )
253 254 255
#   if defined( __ARM_NEON__ )
    i_capabilities |= CPU_CAPABILITY_NEON;
#   endif
256

257 258
#elif defined( __powerpc__ ) || defined( __ppc__ ) || defined( __powerpc64__ ) \
    || defined( __ppc64__ )
259

260 261 262 263 264 265 266 267 268 269
#   if defined(__APPLE__)
    int selectors[2] = { CTL_HW, HW_VECTORUNIT };
    int i_has_altivec = 0;
    size_t i_length = sizeof( i_has_altivec );
    int i_error = sysctl( selectors, 2, &i_has_altivec, &i_length, NULL, 0);

    if( i_error == 0 && i_has_altivec != 0 )
        i_capabilities |= CPU_CAPABILITY_ALTIVEC;

#   elif defined( CAN_COMPILE_ALTIVEC )
270 271
    pid_t pid = fork();
    if( pid == 0 )
272 273 274 275 276
    {
        asm volatile ("mtspr 256, %0\n\t"
                      "vand %%v0, %%v0, %%v0"
                      :
                      : "r" (-1));
277
        exit(0);
278 279
    }

280
    if( check_OS_capability( "Altivec", pid ) )
281 282 283 284 285
        i_capabilities |= CPU_CAPABILITY_ALTIVEC;

#   endif

#endif
286
    return i_capabilities;
287 288
}

Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
289
uint32_t cpu_flags = 0;
290 291 292 293 294 295 296 297 298 299


/*****************************************************************************
 * vlc_CPU: get pre-computed CPU capability flags
 ****************************************************************************/
unsigned vlc_CPU (void)
{
    return cpu_flags;
}

300
static vlc_memcpy_t pf_vlc_memcpy = memcpy;
Rémi Denis-Courmont's avatar
Rémi Denis-Courmont committed
301
static vlc_memset_t pf_vlc_memset = memset;
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325

void vlc_fastmem_register (vlc_memcpy_t cpy, vlc_memset_t set)
{
    if (cpy)
        pf_vlc_memcpy = cpy;
    if (set)
        pf_vlc_memset = set;
}

/**
 * vlc_memcpy: fast CPU-dependent memcpy
 */
void *vlc_memcpy (void *tgt, const void *src, size_t n)
{
    return pf_vlc_memcpy (tgt, src, n);
}

/**
 * vlc_memset: fast CPU-dependent memset
 */
void *vlc_memset (void *tgt, int c, size_t n)
{
    return pf_vlc_memset (tgt, c, n);
}