cpu.c 7.04 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*****************************************************************************
 * cpu.c: h264 encoder library
 *****************************************************************************
 * Copyright (C) 2003 Laurent Aimar
 * $Id: cpu.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

24 25 26 27
#if defined(HAVE_PTHREAD) && defined(SYS_LINUX)
#define _GNU_SOURCE
#include <sched.h>
#endif
28 29 30
#ifdef SYS_BEOS
#include <kernel/OS.h>
#endif
31
#if defined(SYS_MACOSX) || defined(SYS_FREEBSD)
32 33 34
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
35

Loren Merritt's avatar
Loren Merritt committed
36
#include "common.h"
Laurent Aimar's avatar
Laurent Aimar committed
37

38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
const struct {
    const char name[8];
    int flags;
} x264_cpu_names[] = {
    {"MMX",     X264_CPU_MMX},
    {"MMX2",    X264_CPU_MMX|X264_CPU_MMXEXT},
    {"MMXEXT",  X264_CPU_MMX|X264_CPU_MMXEXT},
    {"SSE",     X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE},
    {"SSE1",    X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE},
    {"SSE2",    X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2},
    {"SSE3",    X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2|X264_CPU_SSE3},
    {"SSSE3",   X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2|X264_CPU_SSE3|X264_CPU_SSSE3},
    {"3DNow",   X264_CPU_3DNOW},
    {"Altivec", X264_CPU_ALTIVEC},
    {"Cache32", X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_32},
    {"Cache64", X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_64},
    {"", 0},
};

Loren Merritt's avatar
Loren Merritt committed
57
#ifdef HAVE_MMX
Laurent Aimar's avatar
Laurent Aimar committed
58 59 60 61 62 63 64
extern int  x264_cpu_cpuid_test( void );
extern uint32_t  x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );

uint32_t x264_cpu_detect( void )
{
    uint32_t cpu = 0;
    uint32_t eax, ebx, ecx, edx;
65 66 67
    uint32_t vendor[4] = {0};
    int max_extended_cap;
    int cache;
Laurent Aimar's avatar
Laurent Aimar committed
68

69
#ifndef ARCH_X86_64
Laurent Aimar's avatar
Laurent Aimar committed
70 71
    if( !x264_cpu_cpuid_test() )
        return 0;
72
#endif
Laurent Aimar's avatar
Laurent Aimar committed
73

74
    x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
Laurent Aimar's avatar
Laurent Aimar committed
75 76 77 78
    if( eax == 0 )
        return 0;

    x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
79 80 81
    if( edx&0x00800000 )
        cpu |= X264_CPU_MMX;
    else
Laurent Aimar's avatar
Laurent Aimar committed
82
        return 0;
83
    if( edx&0x02000000 )
Laurent Aimar's avatar
Laurent Aimar committed
84
        cpu |= X264_CPU_MMXEXT|X264_CPU_SSE;
85
    if( edx&0x04000000 )
Laurent Aimar's avatar
Laurent Aimar committed
86
        cpu |= X264_CPU_SSE2;
87
#ifdef HAVE_SSE3
88
    if( ecx&0x00000001 )
89
        cpu |= X264_CPU_SSE3;
90
    if( ecx&0x00000200 )
91 92
        cpu |= X264_CPU_SSSE3;
#endif
Laurent Aimar's avatar
Laurent Aimar committed
93 94

    x264_cpu_cpuid( 0x80000000, &eax, &ebx, &ecx, &edx );
95 96 97
    max_extended_cap = eax;

    if( !strcmp((char*)vendor, "AuthenticAMD") && max_extended_cap >= 0x80000001 )
Laurent Aimar's avatar
Laurent Aimar committed
98
    {
99 100 101 102 103
        x264_cpu_cpuid( 0x80000001, &eax, &ebx, &ecx, &edx );
        if( edx&0x80000000 )
            cpu |= X264_CPU_3DNOW;
        if( edx&0x00400000 )
            cpu |= X264_CPU_MMXEXT;
Laurent Aimar's avatar
Laurent Aimar committed
104 105
    }

106 107 108 109 110 111
    if( !strcmp((char*)vendor, "GenuineIntel") || !strcmp((char*)vendor, "CyrixInstead") )
        cpu |= X264_CPU_CACHELINE_SPLIT;
    /* cacheline size is specified in 3 places, any of which may be missing */
    x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
    cache = (ebx&0xff00)>>5; // cflush size
    if( !cache && max_extended_cap >= 0x80000006 )
Laurent Aimar's avatar
Laurent Aimar committed
112
    {
113 114
        x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
        cache = ecx&0xff; // cacheline size
Laurent Aimar's avatar
Laurent Aimar committed
115
    }
116
    if( !cache )
Laurent Aimar's avatar
Laurent Aimar committed
117
    {
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
        // Cache and TLB Information
        static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
        static const char cache64_ids[] = { 0x22, 0x23, 0x25, 0x29, 0x2c, 0x46, 0x47, 0x49, 0x60, 0x66, 0x67, 0x68, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7c, 0x7f, 0x86, 0x87, 0 };
        uint32_t buf[4];
        int max, i=0, j;
        do {
            x264_cpu_cpuid( 2, buf+0, buf+1, buf+2, buf+3 );
            max = buf[0]&0xff;
            buf[0] &= ~0xff;
            for(j=0; j<4; j++)
                if( !(buf[j]>>31) )
                    while( buf[j] )
                    {
                        if( strchr( cache32_ids, buf[j]&0xff ) )
                            cache = 32;
                        if( strchr( cache64_ids, buf[j]&0xff ) )
                            cache = 64;
                        buf[j] >>= 8;
                    }
        } while( ++i < max );
Laurent Aimar's avatar
Laurent Aimar committed
138 139
    }

140 141 142 143 144
    if( cache == 32 )
        cpu |= X264_CPU_CACHELINE_32;
    if( cache == 64 )
        cpu |= X264_CPU_CACHELINE_64;

Laurent Aimar's avatar
Laurent Aimar committed
145 146 147
    return cpu;
}

148
#elif defined( ARCH_PPC )
Laurent Aimar's avatar
Laurent Aimar committed
149

150 151
#ifdef SYS_MACOSX
#include <sys/sysctl.h>
Laurent Aimar's avatar
Laurent Aimar committed
152 153
uint32_t x264_cpu_detect( void )
{
154
    /* Thank you VLC */
Laurent Aimar's avatar
Laurent Aimar committed
155 156 157 158 159 160 161 162 163 164 165 166 167 168
    uint32_t cpu = 0;
    int      selectors[2] = { CTL_HW, HW_VECTORUNIT };
    int      has_altivec = 0;
    size_t   length = sizeof( has_altivec );
    int      error = sysctl( selectors, 2, &has_altivec, &length, NULL, 0 );

    if( error == 0 && has_altivec != 0 )
    {
        cpu |= X264_CPU_ALTIVEC;
    }

    return cpu;
}

169
#elif defined( SYS_LINUX )
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
#include <signal.h>
#include <setjmp.h>
static sigjmp_buf jmpbuf;
static volatile sig_atomic_t canjump = 0;

static void sigill_handler( int sig )
{
    if( !canjump )
    {
        signal( sig, SIG_DFL );
        raise( sig );
    }

    canjump = 0;
    siglongjmp( jmpbuf, 1 );
}

187 188
uint32_t x264_cpu_detect( void )
{
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
    static void (* oldsig)( int );

    oldsig = signal( SIGILL, sigill_handler );
    if( sigsetjmp( jmpbuf, 1 ) )
    {
        signal( SIGILL, oldsig );
        return 0;
    }

    canjump = 1;
    asm volatile( "mtspr 256, %0\n\t"
                  "vand 0, 0, 0\n\t"
                  :
                  : "r"(-1) );
    canjump = 0;

    signal( SIGILL, oldsig );

207 208 209 210
    return X264_CPU_ALTIVEC;
}
#endif

Laurent Aimar's avatar
Laurent Aimar committed
211 212 213 214 215 216 217
#else

uint32_t x264_cpu_detect( void )
{
    return 0;
}

Loren Merritt's avatar
Loren Merritt committed
218 219 220 221
#endif

#ifndef HAVE_MMX
void x264_emms( void )
Laurent Aimar's avatar
Laurent Aimar committed
222 223 224
{
}
#endif
225 226 227 228


int x264_cpu_num_processors( void )
{
229 230 231
#if !defined(HAVE_PTHREAD)
    return 1;

232
#elif defined(_WIN32)
Loren Merritt's avatar
Loren Merritt committed
233 234 235
    return pthread_num_processors_np();

#elif defined(SYS_LINUX)
236
    unsigned int bit;
237
    int np;
238 239 240 241 242
    cpu_set_t p_aff;
    memset( &p_aff, 0, sizeof(p_aff) );
    sched_getaffinity( 0, sizeof(p_aff), &p_aff );
    for( np = 0, bit = 0; bit < sizeof(p_aff); bit++ )
        np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
243 244
    return np;

245 246 247 248 249
#elif defined(SYS_BEOS)
    system_info info;
    get_system_info( &info );
    return info.cpu_count;

250
#elif defined(SYS_MACOSX) || defined(SYS_FREEBSD)
251 252 253 254 255
    int numberOfCPUs;
    size_t length = sizeof( numberOfCPUs );
    if( sysctlbyname("hw.ncpu", &numberOfCPUs, &length, NULL, 0) )
    {
        numberOfCPUs = 1;
256
    }
257
    return numberOfCPUs;
258

259
#else
260 261
    return 1;
#endif
262
}