Commit ce7ee9d2 authored by Fiona Glaser's avatar Fiona Glaser

Check for OS AVX support in addition to CPUID

Even if not using ymm registers, AVX operations will cause SIGILLs on unsupported OSs.
On Windows, AVX is only available on Windows 7 SP1 or later.
parent e6025413
......@@ -94,7 +94,8 @@ static void sigill_handler( int sig )
#if HAVE_MMX
int x264_cpu_cpuid_test( void );
uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
void x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
void x264_cpu_xgetbv( uint32_t op, int *eax, int *edx );
uint32_t x264_cpu_detect( void )
{
......@@ -130,8 +131,14 @@ uint32_t x264_cpu_detect( void )
cpu |= X264_CPU_SSE4;
if( ecx&0x00100000 )
cpu |= X264_CPU_SSE42;
if( ecx&0x10000000 )
cpu |= X264_CPU_AVX;
/* Check OXSAVE and AVX bits */
if( (ecx&0x18000000) == 0x18000000 )
{
/* Check for OS support */
x264_cpu_xgetbv( 0, &eax, &edx );
if( (eax&0x6) == 0x6 )
cpu |= X264_CPU_AVX;
}
if( cpu & X264_CPU_SSSE3 )
cpu |= X264_CPU_SSE2_IS_FAST;
......
......@@ -29,27 +29,43 @@
SECTION .text
%ifdef ARCH_X86_64
;-----------------------------------------------------------------------------
; int cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
; void cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
;-----------------------------------------------------------------------------
cglobal cpu_cpuid, 5,7
push rbx
mov r11, r1
mov r10, r2
movifnidn r9, r3
movifnidn r8, r4
mov eax, r0d
push rbx
push r4
push r3
push r2
push r1
mov eax, r0d
cpuid
mov [r11], eax
mov [r10], ebx
mov [r9], ecx
mov [r8], edx
pop rbx
pop rsi
mov [rsi], eax
pop rsi
mov [rsi], ebx
pop rsi
mov [rsi], ecx
pop rsi
mov [rsi], edx
pop rbx
RET
;-----------------------------------------------------------------------------
; void cpu_xgetbv( int op, int *eax, int *edx )
;-----------------------------------------------------------------------------
cglobal cpu_xgetbv, 3,7
push r2
push r1
mov ecx, r0d
xgetbv
pop rsi
mov [rsi], eax
pop rsi
mov [rsi], edx
RET
%else
%ifndef ARCH_X86_64
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
......@@ -77,22 +93,6 @@ cglobal cpu_cpuid_test
popfd
ret
;-----------------------------------------------------------------------------
; int cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
;-----------------------------------------------------------------------------
cglobal cpu_cpuid, 0,6
mov eax, r0m
cpuid
mov esi, r1m
mov [esi], eax
mov esi, r2m
mov [esi], ebx
mov esi, r3m
mov [esi], ecx
mov esi, r4m
mov [esi], edx
RET
;-----------------------------------------------------------------------------
; void stack_align( void (*func)(void*), void *arg );
;-----------------------------------------------------------------------------
......
......@@ -122,9 +122,8 @@ typedef struct
#define X264_CPU_FAST_NEON_MRC 0x080000 /* Transfer from NEON to ARM register is fast (Cortex-A9) */
#define X264_CPU_SLOW_CTZ 0x100000 /* BSR/BSF x86 instructions are really slow on some CPUs */
#define X264_CPU_SLOW_ATOM 0x200000 /* The Atom just sucks */
#define X264_CPU_AVX 0x400000 /* AVX support -- we don't currently use YMM registers, just
* the 3-operand capability, so we don't require OS support
* for AVX. */
#define X264_CPU_AVX 0x400000 /* AVX support: requires OS support even if YMM registers
* aren't used. */
/* Analyse flags
*/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment