cpu.c 7.26 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*****************************************************************************
 * cpu.c: h264 encoder library
 *****************************************************************************
 * Copyright (C) 2003 Laurent Aimar
 * $Id: cpu.c,v 1.1 2004/06/03 19:27:06 fenrir Exp $
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 *****************************************************************************/

24 25 26 27
#if defined(HAVE_PTHREAD) && defined(SYS_LINUX)
#define _GNU_SOURCE
#include <sched.h>
#endif
28 29 30
#ifdef SYS_BEOS
#include <kernel/OS.h>
#endif
31
#if defined(SYS_MACOSX) || defined(SYS_FREEBSD)
32 33 34
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
35

Loren Merritt's avatar
Loren Merritt committed
36
#include "common.h"
Laurent Aimar's avatar
Laurent Aimar committed
37

38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
const struct {
    const char name[8];
    int flags;
} x264_cpu_names[] = {
    {"MMX",     X264_CPU_MMX},
    {"MMX2",    X264_CPU_MMX|X264_CPU_MMXEXT},
    {"MMXEXT",  X264_CPU_MMX|X264_CPU_MMXEXT},
    {"SSE",     X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE},
    {"SSE1",    X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE},
    {"SSE2",    X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2},
    {"SSE3",    X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2|X264_CPU_SSE3},
    {"SSSE3",   X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2|X264_CPU_SSE3|X264_CPU_SSSE3},
    {"3DNow",   X264_CPU_3DNOW},
    {"Altivec", X264_CPU_ALTIVEC},
    {"Cache32", X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_32},
    {"Cache64", X264_CPU_CACHELINE_SPLIT|X264_CPU_CACHELINE_64},
    {"", 0},
};

Loren Merritt's avatar
Loren Merritt committed
57
#ifdef HAVE_MMX
Laurent Aimar's avatar
Laurent Aimar committed
58 59 60 61 62 63 64 65
extern int  x264_cpu_cpuid_test( void );
extern uint32_t  x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
extern void x264_emms( void );

uint32_t x264_cpu_detect( void )
{
    uint32_t cpu = 0;
    uint32_t eax, ebx, ecx, edx;
66 67 68
    uint32_t vendor[4] = {0};
    int max_extended_cap;
    int cache;
Laurent Aimar's avatar
Laurent Aimar committed
69

70
#ifndef ARCH_X86_64
Laurent Aimar's avatar
Laurent Aimar committed
71 72
    if( !x264_cpu_cpuid_test() )
        return 0;
73
#endif
Laurent Aimar's avatar
Laurent Aimar committed
74

75
    x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );
Laurent Aimar's avatar
Laurent Aimar committed
76 77 78 79
    if( eax == 0 )
        return 0;

    x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
80 81 82
    if( edx&0x00800000 )
        cpu |= X264_CPU_MMX;
    else
Laurent Aimar's avatar
Laurent Aimar committed
83
        return 0;
84
    if( edx&0x02000000 )
Laurent Aimar's avatar
Laurent Aimar committed
85
        cpu |= X264_CPU_MMXEXT|X264_CPU_SSE;
86
    if( edx&0x04000000 )
Laurent Aimar's avatar
Laurent Aimar committed
87
        cpu |= X264_CPU_SSE2;
88
#ifdef HAVE_SSE3
89
    if( ecx&0x00000001 )
90
        cpu |= X264_CPU_SSE3;
91
    if( ecx&0x00000200 )
92 93
        cpu |= X264_CPU_SSSE3;
#endif
Laurent Aimar's avatar
Laurent Aimar committed
94 95

    x264_cpu_cpuid( 0x80000000, &eax, &ebx, &ecx, &edx );
96 97 98
    max_extended_cap = eax;

    if( !strcmp((char*)vendor, "AuthenticAMD") && max_extended_cap >= 0x80000001 )
Laurent Aimar's avatar
Laurent Aimar committed
99
    {
100 101 102 103 104
        x264_cpu_cpuid( 0x80000001, &eax, &ebx, &ecx, &edx );
        if( edx&0x80000000 )
            cpu |= X264_CPU_3DNOW;
        if( edx&0x00400000 )
            cpu |= X264_CPU_MMXEXT;
Laurent Aimar's avatar
Laurent Aimar committed
105 106
    }

107 108 109 110 111 112
    if( !strcmp((char*)vendor, "GenuineIntel") || !strcmp((char*)vendor, "CyrixInstead") )
        cpu |= X264_CPU_CACHELINE_SPLIT;
    /* cacheline size is specified in 3 places, any of which may be missing */
    x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
    cache = (ebx&0xff00)>>5; // cflush size
    if( !cache && max_extended_cap >= 0x80000006 )
Laurent Aimar's avatar
Laurent Aimar committed
113
    {
114 115
        x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
        cache = ecx&0xff; // cacheline size
Laurent Aimar's avatar
Laurent Aimar committed
116
    }
117
    if( !cache )
Laurent Aimar's avatar
Laurent Aimar committed
118
    {
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
        // Cache and TLB Information
        static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
        static const char cache64_ids[] = { 0x22, 0x23, 0x25, 0x29, 0x2c, 0x46, 0x47, 0x49, 0x60, 0x66, 0x67, 0x68, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7c, 0x7f, 0x86, 0x87, 0 };
        uint32_t buf[4];
        int max, i=0, j;
        do {
            x264_cpu_cpuid( 2, buf+0, buf+1, buf+2, buf+3 );
            max = buf[0]&0xff;
            buf[0] &= ~0xff;
            for(j=0; j<4; j++)
                if( !(buf[j]>>31) )
                    while( buf[j] )
                    {
                        if( strchr( cache32_ids, buf[j]&0xff ) )
                            cache = 32;
                        if( strchr( cache64_ids, buf[j]&0xff ) )
                            cache = 64;
                        buf[j] >>= 8;
                    }
        } while( ++i < max );
Laurent Aimar's avatar
Laurent Aimar committed
139 140
    }

141 142 143 144 145
    if( cache == 32 )
        cpu |= X264_CPU_CACHELINE_32;
    if( cache == 64 )
        cpu |= X264_CPU_CACHELINE_64;

Laurent Aimar's avatar
Laurent Aimar committed
146 147 148 149 150 151 152 153 154 155 156
    return cpu;
}

void     x264_cpu_restore( uint32_t cpu )
{
    if( cpu&(X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_3DNOW|X264_CPU_3DNOWEXT) )
    {
        x264_emms();
    }
}

157
#elif defined( ARCH_PPC )
Laurent Aimar's avatar
Laurent Aimar committed
158

159 160
#ifdef SYS_MACOSX
#include <sys/sysctl.h>
Laurent Aimar's avatar
Laurent Aimar committed
161 162
uint32_t x264_cpu_detect( void )
{
163
    /* Thank you VLC */
Laurent Aimar's avatar
Laurent Aimar committed
164 165 166 167 168 169 170 171 172 173 174 175 176 177
    uint32_t cpu = 0;
    int      selectors[2] = { CTL_HW, HW_VECTORUNIT };
    int      has_altivec = 0;
    size_t   length = sizeof( has_altivec );
    int      error = sysctl( selectors, 2, &has_altivec, &length, NULL, 0 );

    if( error == 0 && has_altivec != 0 )
    {
        cpu |= X264_CPU_ALTIVEC;
    }

    return cpu;
}

178
#elif defined( SYS_LINUX )
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
#include <signal.h>
#include <setjmp.h>
static sigjmp_buf jmpbuf;
static volatile sig_atomic_t canjump = 0;

static void sigill_handler( int sig )
{
    if( !canjump )
    {
        signal( sig, SIG_DFL );
        raise( sig );
    }

    canjump = 0;
    siglongjmp( jmpbuf, 1 );
}

196 197
uint32_t x264_cpu_detect( void )
{
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
    static void (* oldsig)( int );

    oldsig = signal( SIGILL, sigill_handler );
    if( sigsetjmp( jmpbuf, 1 ) )
    {
        signal( SIGILL, oldsig );
        return 0;
    }

    canjump = 1;
    asm volatile( "mtspr 256, %0\n\t"
                  "vand 0, 0, 0\n\t"
                  :
                  : "r"(-1) );
    canjump = 0;

    signal( SIGILL, oldsig );

216 217 218 219
    return X264_CPU_ALTIVEC;
}
#endif

Laurent Aimar's avatar
Laurent Aimar committed
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
void     x264_cpu_restore( uint32_t cpu )
{
}

#else

uint32_t x264_cpu_detect( void )
{
    return 0;
}

void     x264_cpu_restore( uint32_t cpu )
{
}

#endif
236 237 238 239


int x264_cpu_num_processors( void )
{
240 241 242
#if !defined(HAVE_PTHREAD)
    return 1;

243
#elif defined(_WIN32)
Loren Merritt's avatar
Loren Merritt committed
244 245 246
    return pthread_num_processors_np();

#elif defined(SYS_LINUX)
247
    unsigned int bit;
248
    int np;
249 250 251 252 253
    cpu_set_t p_aff;
    memset( &p_aff, 0, sizeof(p_aff) );
    sched_getaffinity( 0, sizeof(p_aff), &p_aff );
    for( np = 0, bit = 0; bit < sizeof(p_aff); bit++ )
        np += (((uint8_t *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
254 255
    return np;

256 257 258 259 260
#elif defined(SYS_BEOS)
    system_info info;
    get_system_info( &info );
    return info.cpu_count;

261
#elif defined(SYS_MACOSX) || defined(SYS_FREEBSD)
262 263 264 265 266
    int numberOfCPUs;
    size_t length = sizeof( numberOfCPUs );
    if( sysctlbyname("hw.ncpu", &numberOfCPUs, &length, NULL, 0) )
    {
        numberOfCPUs = 1;
267
    }
268
    return numberOfCPUs;
269

270
#else
271 272
    return 1;
#endif
273
}