Commit 8aef0e94 authored by Loren Merritt's avatar Loren Merritt

ssse3 detection. x86_64 ssse3 satd and quant.

requires yasm >= 0.6.0


git-svn-id: svn://svn.videolan.org/x264/trunk@631 df754926-b1dd-0310-bc7b-ec298dee348c
parent 1980de9b
......@@ -50,8 +50,15 @@ cglobal x264_pixel_satd_8x8_sse2
cglobal x264_pixel_satd_16x8_sse2
cglobal x264_pixel_satd_8x16_sse2
cglobal x264_pixel_satd_16x16_sse2
cglobal x264_pixel_satd_8x4_ssse3
cglobal x264_pixel_satd_8x8_ssse3
cglobal x264_pixel_satd_16x8_ssse3
cglobal x264_pixel_satd_8x16_ssse3
cglobal x264_pixel_satd_16x16_ssse3
cglobal x264_pixel_sa8d_8x8_sse2
cglobal x264_pixel_sa8d_16x16_sse2
cglobal x264_pixel_sa8d_8x8_ssse3
cglobal x264_pixel_sa8d_16x16_ssse3
cglobal x264_intra_sa8d_x3_8x8_core_sse2
cglobal x264_pixel_ssim_4x4x2_core_sse2
cglobal x264_pixel_ssim_end4_sse2
......@@ -267,6 +274,20 @@ x264_pixel_ssd_16x8_sse2:
SUMSUB_BADC %5, %6, %7, %8
%endmacro
;;; row transform not used, because phaddw is much slower than paddw on a Conroe
;%macro PHSUMSUB 3
; movdqa %3, %1
; phaddw %1, %2
; phsubw %3, %2
;%endmacro
;%macro HADAMARD4x1_SSSE3 5 ; ABCD-T -> ADTC
; PHSUMSUB %1, %2, %5
; PHSUMSUB %3, %4, %2
; PHSUMSUB %1, %3, %4
; PHSUMSUB %5, %2, %3
;%endmacro
%macro SBUTTERFLY 5
mov%1 %5, %3
punpckl%2 %3, %4
......@@ -318,6 +339,13 @@ x264_pixel_ssd_16x8_sse2:
psubw %1, %2
%endmacro
%macro LOAD_DIFF_4x8P 6 ; 4x dest, 2x temp
LOAD_DIFF_8P %1, %5, [parm1q], [parm3q]
LOAD_DIFF_8P %2, %6, [parm1q+parm2q], [parm3q+parm4q]
LOAD_DIFF_8P %3, %5, [parm1q+2*parm2q], [parm3q+2*parm4q]
LOAD_DIFF_8P %4, %6, [parm1q+r10], [parm3q+r11]
%endmacro
%macro SUM1x8_SSE2 3 ; 01 junk sum
pxor %2, %2
psubw %2, %1
......@@ -338,8 +366,7 @@ x264_pixel_ssd_16x8_sse2:
paddusw %4, %2
%endmacro
;;; two SUM4x4_SSE2 running side-by-side
%macro SUM4x4_TWO_SSE2 7 ; a02 a13 junk1 b02 b13 junk2 (1=4 2=5 3=6) sum
%macro SUM8x4_SSE2 7 ; a02 a13 junk1 b02 b13 junk2 (1=4 2=5 3=6) sum
pxor %3, %3
pxor %6, %6
psubw %3, %1
......@@ -358,18 +385,25 @@ x264_pixel_ssd_16x8_sse2:
paddusw %7, %4
%endmacro
%macro SATD_TWO_SSE2 0
LOAD_DIFF_8P xmm0, xmm4, [parm1q], [parm3q]
LOAD_DIFF_8P xmm1, xmm5, [parm1q+parm2q], [parm3q+parm4q]
LOAD_DIFF_8P xmm2, xmm4, [parm1q+2*parm2q], [parm3q+2*parm4q]
LOAD_DIFF_8P xmm3, xmm5, [parm1q+r10], [parm3q+r11]
lea parm1q, [parm1q+4*parm2q]
lea parm3q, [parm3q+4*parm4q]
%macro SUM8x4_SSSE3 7 ; a02 a13 . b02 b13 . sum
pabsw %1, %1
pabsw %2, %2
pabsw %4, %4
pabsw %5, %5
paddusw %1, %2
paddusw %4, %5
paddusw %7, %1
paddusw %7, %4
%endmacro
%macro SATD_TWO_SSE2 0
LOAD_DIFF_4x8P xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
lea parm1q, [parm1q+4*parm2q]
lea parm3q, [parm3q+4*parm4q]
HADAMARD1x4 xmm0, xmm1, xmm2, xmm3
TRANSPOSE2x4x4W xmm0, xmm1, xmm2, xmm3, xmm4
HADAMARD1x4 xmm0, xmm1, xmm2, xmm3
SUM4x4_TWO_SSE2 xmm0, xmm1, xmm4, xmm2, xmm3, xmm5, xmm6
SUM8x4 xmm0, xmm1, xmm4, xmm2, xmm3, xmm5, xmm6
%endmacro
%macro SATD_START 0
......@@ -385,85 +419,72 @@ x264_pixel_ssd_16x8_sse2:
ret
%endmacro
%macro SATDS 1
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_satd_16x16_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_satd_16x16_sse2:
x264_pixel_satd_16x16_%1:
SATD_START
mov r8, rdi
mov r9, rdx
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
lea rdi, [r8+8]
lea rdx, [r9+8]
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_satd_8x16_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_satd_8x16_sse2:
x264_pixel_satd_8x16_%1:
SATD_START
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_satd_16x8_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_satd_16x8_sse2:
x264_pixel_satd_16x8_%1:
SATD_START
mov r8, rdi
mov r9, rdx
SATD_TWO_SSE2
SATD_TWO_SSE2
lea rdi, [r8+8]
lea rdx, [r9+8]
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_satd_8x8_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_satd_8x8_sse2:
x264_pixel_satd_8x8_%1:
SATD_START
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_satd_8x4_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_satd_8x4_sse2:
x264_pixel_satd_8x4_%1:
SATD_START
SATD_TWO_SSE2
SATD_END
......@@ -471,27 +492,21 @@ ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_sa8d_8x8_sse2( uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sa8d_8x8_sse2:
x264_pixel_sa8d_8x8_%1:
lea r10, [3*parm2q]
lea r11, [3*parm4q]
LOAD_DIFF_8P xmm0, xmm8, [parm1q], [parm3q]
LOAD_DIFF_8P xmm1, xmm9, [parm1q+parm2q], [parm3q+parm4q]
LOAD_DIFF_8P xmm2, xmm8, [parm1q+2*parm2q], [parm3q+2*parm4q]
LOAD_DIFF_8P xmm3, xmm9, [parm1q+r10], [parm3q+r11]
LOAD_DIFF_4x8P xmm0, xmm1, xmm2, xmm3, xmm8, xmm8
lea parm1q, [parm1q+4*parm2q]
lea parm3q, [parm3q+4*parm4q]
LOAD_DIFF_8P xmm4, xmm8, [parm1q], [parm3q]
LOAD_DIFF_8P xmm5, xmm9, [parm1q+parm2q], [parm3q+parm4q]
LOAD_DIFF_8P xmm6, xmm8, [parm1q+2*parm2q], [parm3q+2*parm4q]
LOAD_DIFF_8P xmm7, xmm9, [parm1q+r10], [parm3q+r11]
LOAD_DIFF_4x8P xmm4, xmm5, xmm6, xmm7, xmm8, xmm8
HADAMARD1x8 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
TRANSPOSE8x8 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8
HADAMARD1x8 xmm0, xmm5, xmm7, xmm3, xmm8, xmm4, xmm2, xmm1
pxor xmm10, xmm10
SUM4x4_TWO_SSE2 xmm0, xmm1, xmm6, xmm2, xmm3, xmm9, xmm10
SUM4x4_TWO_SSE2 xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10
SUM8x4 xmm0, xmm1, xmm6, xmm2, xmm3, xmm9, xmm10
SUM8x4 xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10
psrlw xmm10, 1
HADDW xmm10, xmm0
movd eax, xmm10
......@@ -505,26 +520,34 @@ ALIGN 16
; int x264_pixel_sa8d_16x16_sse2( uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
;; violates calling convention
x264_pixel_sa8d_16x16_sse2:
x264_pixel_sa8d_16x16_%1:
xor r8d, r8d
call x264_pixel_sa8d_8x8_sse2 ; pix[0]
call x264_pixel_sa8d_8x8_%1 ; pix[0]
lea parm1q, [parm1q+4*parm2q]
lea parm3q, [parm3q+4*parm4q]
call x264_pixel_sa8d_8x8_sse2 ; pix[8*stride]
call x264_pixel_sa8d_8x8_%1 ; pix[8*stride]
lea r10, [3*parm2q-2]
lea r11, [3*parm4q-2]
shl r10, 2
shl r11, 2
sub parm1q, r10
sub parm3q, r11
call x264_pixel_sa8d_8x8_sse2 ; pix[8]
call x264_pixel_sa8d_8x8_%1 ; pix[8]
lea parm1q, [parm1q+4*parm2q]
lea parm3q, [parm3q+4*parm4q]
call x264_pixel_sa8d_8x8_sse2 ; pix[8*stride+8]
call x264_pixel_sa8d_8x8_%1 ; pix[8*stride+8]
mov eax, r8d
add eax, 1
shr eax, 1
ret
%endmacro ; SATDS
%define SUM8x4 SUM8x4_SSE2
SATDS sse2
%ifdef HAVE_SSE3
%define SUM8x4 SUM8x4_SSSE3
SATDS ssse3
%endif
......@@ -567,7 +590,7 @@ x264_intra_sa8d_x3_8x8_core_sse2:
movdqa xmm9, xmm3
movdqa xmm10, xmm4
movdqa xmm11, xmm5
SUM4x4_TWO_SSE2 xmm8, xmm9, xmm12, xmm10, xmm11, xmm13, xmm15
SUM8x4_SSE2 xmm8, xmm9, xmm12, xmm10, xmm11, xmm13, xmm15
movdqa xmm8, xmm6
movdqa xmm9, xmm7
SUM4x4_SSE2 xmm8, xmm9, xmm10, xmm15
......
......@@ -45,6 +45,10 @@ cglobal x264_quant_4x4_dc_core15_mmx
cglobal x264_quant_4x4_core15_mmx
cglobal x264_quant_8x8_core15_mmx
cglobal x264_quant_4x4_dc_core15_ssse3
cglobal x264_quant_4x4_core15_ssse3
cglobal x264_quant_8x8_core15_ssse3
cglobal x264_quant_2x2_dc_core16_mmxext
cglobal x264_quant_4x4_dc_core16_mmxext
cglobal x264_quant_4x4_core16_mmxext
......@@ -76,6 +80,21 @@ cglobal x264_dequant_8x8_mmx
punpckldq mm7, mm7 ; f in each dword
%endmacro
%macro SSE2_QUANT_AC_START 0
movd xmm6, parm3d ; i_qbits
movd xmm7, parm4d ; f
pshufd xmm7, xmm7, 0 ; f in each dword
%endmacro
%macro SSE2_QUANT15_DC_START 0
movd xmm5, parm2d ; i_qmf
movd xmm6, parm3d ; i_qbits
movd xmm7, parm4d ; f
pshuflw xmm5, xmm5, 0
punpcklqdq xmm5, xmm5 ; i_qmf in each word
pshufd xmm7, xmm7, 0 ; f in each dword
%endmacro
%macro MMX_QUANT15_1x4 4
;;; %1 (m64) dct[y][x]
;;; %2 (m64/mmx) quant_mf[y][x] or quant_mf[0][0] (as int16_t)
......@@ -104,7 +123,30 @@ cglobal x264_dequant_8x8_mmx
packssdw mm0, mm1 ; pack
pxor mm0, mm4 ; restore sign
psubw mm0, mm4
movq %1, mm0 ; store
movq %1, mm0 ; store
%endmacro
%macro SSSE3_QUANT15_1x8 4
movdqa xmm0, %1 ; load dct coeffs
movdqa xmm4, xmm0 ; save sign
pabsw xmm0, xmm0
movdqa xmm2, xmm0
pmullw xmm0, %2
pmulhw xmm2, %2
movdqa xmm1, xmm0
punpcklwd xmm0, xmm2
punpckhwd xmm1, xmm2
paddd xmm0, %4 ; round with f
paddd xmm1, %4
psrad xmm0, %3
psrad xmm1, %3
packssdw xmm0, xmm1 ; pack
psignw xmm0, xmm4 ; restore sign
movdqa %1, xmm0 ; store
%endmacro
ALIGN 16
......@@ -168,6 +210,52 @@ x264_quant_8x8_core15_mmx:
ret
%ifdef HAVE_SSE3
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_quant_4x4_dc_core15_ssse3( int16_t dct[4][4],
; int const i_qmf, int const i_qbits, int const f );
;-----------------------------------------------------------------------------
x264_quant_4x4_dc_core15_ssse3:
SSE2_QUANT15_DC_START
SSSE3_QUANT15_1x8 [parm1q], xmm5, xmm6, xmm7
SSSE3_QUANT15_1x8 [parm1q+16], xmm5, xmm6, xmm7
ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_quant_4x4_core15_ssse3( int16_t dct[4][4],
; int const quant_mf[4][4], int const i_qbits, int const f );
;-----------------------------------------------------------------------------
x264_quant_4x4_core15_ssse3:
SSE2_QUANT_AC_START
%assign x 0
%rep 2
movdqa xmm5, [parm2q+32*x]
packssdw xmm5, [parm2q+32*x+16]
SSSE3_QUANT15_1x8 [parm1q+16*x], xmm5, xmm6, xmm7
%assign x x+1
%endrep
ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_quant_8x8_core15_ssse3( int16_t dct[8][8],
; int const quant_mf[8][8], int const i_qbits, int const f );
;-----------------------------------------------------------------------------
x264_quant_8x8_core15_ssse3:
SSE2_QUANT_AC_START
%assign x 0
%rep 8
movdqa xmm5, [parm2q+32*x]
packssdw xmm5, [parm2q+32*x+16]
SSSE3_QUANT15_1x8 [parm1q+16*x], xmm5, xmm6, xmm7
%assign x x+1
%endrep
ret
%endif ; HAVE_SSE3
; ============================================================================
%macro MMXEXT_QUANT16_DC_START 0
......
......@@ -80,6 +80,16 @@ uint32_t x264_cpu_detect( void )
/* Is it OK ? */
cpu |= X264_CPU_SSE2;
}
#ifdef HAVE_SSE3
if( (ecx&0x00000001) )
{
cpu |= X264_CPU_SSE3;
}
if( (ecx&0x00000200) )
{
cpu |= X264_CPU_SSSE3;
}
#endif
x264_cpu_cpuid( 0x80000000, &eax, &ebx, &ecx, &edx );
if( eax < 0x80000001 )
......
......@@ -87,9 +87,18 @@ int x264_pixel_satd_8x16_sse2( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_8x8_sse2( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_8x4_sse2( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_16x16_ssse3( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_16x8_ssse3( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_8x16_ssse3( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_8x8_ssse3( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_8x4_ssse3( uint8_t *, int, uint8_t *, int );
int x264_pixel_sa8d_16x16_sse2( uint8_t *, int, uint8_t *, int );
int x264_pixel_sa8d_8x8_sse2( uint8_t *, int, uint8_t *, int );
int x264_pixel_sa8d_16x16_ssse3( uint8_t *, int, uint8_t *, int );
int x264_pixel_sa8d_8x8_ssse3( uint8_t *, int, uint8_t *, int );
void x264_intra_satd_x3_4x4_mmxext( uint8_t *, uint8_t *, int * );
void x264_intra_satd_x3_8x8c_mmxext( uint8_t *, uint8_t *, int * );
void x264_intra_satd_x3_16x16_mmxext( uint8_t *, uint8_t *, int * );
......
......@@ -32,6 +32,13 @@ void x264_quant_4x4_dc_core15_mmx( int16_t dct[4][4],
void x264_quant_2x2_dc_core15_mmx( int16_t dct[2][2],
int const i_qmf, int const i_qbits, int const f );
void x264_quant_8x8_core15_ssse3( int16_t dct[8][8],
int quant_mf[8][8], int const i_qbits, int const f );
void x264_quant_4x4_core15_ssse3( int16_t dct[4][4],
int quant_mf[4][4], int const i_qbits, int const f );
void x264_quant_4x4_dc_core15_ssse3( int16_t dct[4][4],
int const i_qmf, int const i_qbits, int const f );
void x264_quant_8x8_core16_mmxext( int16_t dct[8][8],
int quant_mf[8][8], int const i_qbits, int const f );
void x264_quant_4x4_core16_mmxext( int16_t dct[4][4],
......
......@@ -536,6 +536,19 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse2;
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
}
if( cpu&X264_CPU_SSSE3 )
{
#if defined(ARCH_X86_64) && defined(HAVE_SSE3)
pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_ssse3;
pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_ssse3;
pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_ssse3;
pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_ssse3;
pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_ssse3;
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3;
#endif
}
#endif
......
......@@ -229,6 +229,11 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
#ifdef HAVE_MMXEXT
/* select quant_8x8 based on CPU and maxQ8 */
#if defined(ARCH_X86_64) && defined(HAVE_SSE3)
if( maxQ8 < (1<<15) && cpu&X264_CPU_SSSE3 )
pf->quant_8x8_core = x264_quant_8x8_core15_ssse3;
else
#endif
if( maxQ8 < (1<<15) && cpu&X264_CPU_MMX )
pf->quant_8x8_core = x264_quant_8x8_core15_mmx;
else
......@@ -239,6 +244,11 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->quant_8x8_core = x264_quant_8x8_core32_mmxext;
/* select quant_4x4 based on CPU and maxQ4 */
#if defined(ARCH_X86_64) && defined(HAVE_SSE3)
if( maxQ4 < (1<<15) && cpu&X264_CPU_SSSE3 )
pf->quant_4x4_core = x264_quant_4x4_core15_ssse3;
else
#endif
if( maxQ4 < (1<<15) && cpu&X264_CPU_MMX )
pf->quant_4x4_core = x264_quant_4x4_core15_mmx;
else
......@@ -267,6 +277,11 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
pf->quant_2x2_dc_core = x264_quant_2x2_dc_core32_mmxext;
}
#if defined(ARCH_X86_64) && defined(HAVE_SSE3)
if( maxQdc < (1<<15) && cpu&X264_CPU_SSSE3 )
pf->quant_4x4_dc_core = x264_quant_4x4_dc_core15_ssse3;
#endif
if( cpu&X264_CPU_MMX )
{
/* dequant is not subject to the above CQM-dependent overflow issues,
......
......@@ -35,6 +35,15 @@ EOF
return $TMP
}
as_check() {
rm -f conftest*
echo "$1" > conftest.asm
$AS conftest.asm $ASFLAGS $2 -o conftest.o 2>$DEVNULL
TMP="$?"
rm -f conftest*
return $TMP
}
rm -f config.h config.mak x264.pc
prefix='/usr/local'
......@@ -210,6 +219,17 @@ then
fi
fi
if [ $ARCH = X86_64 ] ; then
if ! as_check ; then
echo "No assembler. Please install yasm."
exit 1
fi
if as_check "pabsw xmm0, xmm0" ; then
ASFLAGS="$ASFLAGS -DHAVE_SSE3"
CFLAGS="$CFLAGS -DHAVE_SSE3"
fi
fi
CFLAGS="$CFLAGS -DARCH_$ARCH -DSYS_$SYS"
# parse options
......
......@@ -671,6 +671,7 @@ x264_t *x264_encoder_open ( x264_param_t *param )
param->cpu&X264_CPU_MMXEXT ? "MMXEXT " : "",
param->cpu&X264_CPU_SSE ? "SSE " : "",
param->cpu&X264_CPU_SSE2 ? "SSE2 " : "",
param->cpu&X264_CPU_SSSE3 ? "SSSE3 " : "",
param->cpu&X264_CPU_3DNOW ? "3DNow! " : "",
param->cpu&X264_CPU_ALTIVEC ? "Altivec " : "" );
......
......@@ -738,6 +738,7 @@ int check_all( int cpu_ref, int cpu_new )
int main(int argc, char *argv[])
{
int ret = 0;
int cpu0 = 0, cpu1 = 0;
int i;
buf1 = x264_malloc( 1024 ); /* 32 x 32 */
......@@ -759,13 +760,23 @@ int main(int argc, char *argv[])
#ifdef HAVE_MMXEXT
fprintf( stderr, "x264: MMXEXT against C\n" );
ret = check_all( 0, X264_CPU_MMX | X264_CPU_MMXEXT );
cpu1 = X264_CPU_MMX | X264_CPU_MMXEXT;
ret = check_all( 0, cpu1 );
#ifdef HAVE_SSE2
if( x264_cpu_detect() & X264_CPU_SSE2 )
{
fprintf( stderr, "\nx264: SSE2 against C\n" );
ret |= check_all( X264_CPU_MMX | X264_CPU_MMXEXT,
X264_CPU_MMX | X264_CPU_MMXEXT | X264_CPU_SSE | X264_CPU_SSE2 );
cpu0 = cpu1;
cpu1 |= X264_CPU_SSE | X264_CPU_SSE2;
ret |= check_all( cpu0, cpu1 );
if( x264_cpu_detect() & X264_CPU_SSSE3 )
{
fprintf( stderr, "\nx264: SSSE3 against C\n" );
cpu0 = cpu1;
cpu1 |= X264_CPU_SSE3 | X264_CPU_SSSE3;
ret |= check_all( cpu0, cpu1 );
}
}
#endif
#elif ARCH_PPC
......
......@@ -53,6 +53,8 @@ typedef struct x264_t x264_t;
#define X264_CPU_3DNOW 0x000010 /* 3dnow! */
#define X264_CPU_3DNOWEXT 0x000020 /* 3dnow! ext */
#define X264_CPU_ALTIVEC 0x000040 /* altivec */
#define X264_CPU_SSE3 0x000080 /* sse 3 */
#define X264_CPU_SSSE3 0x000100 /* ssse 3 */
/* Analyse flags
*/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment