Commit 84676d2e authored by Loren Merritt's avatar Loren Merritt

32bit version of ssse3 satd.

switch default assembler to yasm. it will still fallback to nasm if you don't have yasm.


git-svn-id: svn://svn.videolan.org/x264/trunk@645 df754926-b1dd-0310-bc7b-ec298dee348c
parent 71c097b2
......@@ -31,7 +31,6 @@ BITS 64
SECTION .rodata align=16
pb_1: times 16 db 1
pw_1: times 8 dw 1
ssim_c1: times 4 dd 416 ; .01*.01*255*255*64
ssim_c2: times 4 dd 235963 ; .03*.03*255*255*64*63
......
......@@ -46,7 +46,7 @@ BITS 32
SECTION .text align=16
fakegot:
%else
SECTION .rodata data align=16
SECTION .rodata align=16
%endif
%endmacro
......@@ -140,6 +140,6 @@ BITS 32
; This is needed for ELF, otherwise the GNU linker assumes the stack is
; executable by default.
%ifidn __OUTPUT_FORMAT__,elf
SECTION .note.GNU-stack noalloc noexec nowrite progbits
SECTION ".note.GNU-stack" noalloc noexec nowrite progbits
%endif
......@@ -463,8 +463,7 @@ cglobal x264_pixel_ssd_16x8_sse2
paddusw %4, %2
%endmacro
;;; two SUM4x4_SSE2 running side-by-side
%macro SUM4x4_TWO_SSE2 7 ; a02 a13 junk1 b02 b13 junk2 (1=4 2=5 3=6) sum
%macro SUM8x4_SSE2 7 ; a02 a13 junk1 b02 b13 junk2 (1=4 2=5 3=6) sum
pxor %3, %3
pxor %6, %6
psubw %3, %1
......@@ -483,6 +482,17 @@ cglobal x264_pixel_ssd_16x8_sse2
paddusw %7, %4
%endmacro
%macro SUM8x4_SSSE3 7 ; a02 a13 . b02 b13 . sum
pabsw %1, %1
pabsw %2, %2
pabsw %4, %4
pabsw %5, %5
paddusw %1, %2
paddusw %4, %5
paddusw %7, %1
paddusw %7, %4
%endmacro
%macro SATD_TWO_SSE2 0
LOAD_DIFF_8P xmm0, xmm4, [eax], [ecx]
LOAD_DIFF_8P xmm1, xmm5, [eax+ebx], [ecx+edx]
......@@ -496,7 +506,7 @@ cglobal x264_pixel_ssd_16x8_sse2
HADAMARD1x4 xmm0, xmm1, xmm2, xmm3
TRANSPOSE2x4x4W xmm0, xmm1, xmm2, xmm3, xmm4
HADAMARD1x4 xmm0, xmm1, xmm2, xmm3
SUM4x4_TWO_SSE2 xmm0, xmm1, xmm4, xmm2, xmm3, xmm5, xmm6
SUM8x4 xmm0, xmm1, xmm4, xmm2, xmm3, xmm5, xmm6
%endmacro
%macro SATD_START 0
......@@ -519,81 +529,76 @@ cglobal x264_pixel_ssd_16x8_sse2
ret
%endmacro
%macro SATDS 1
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_16x16_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
cglobal x264_pixel_satd_16x16_sse2
cglobal x264_pixel_satd_16x16_%1
SATD_START
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
mov eax, [esp+ 8]
mov ecx, [esp+16]
add eax, 8
add ecx, 8
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x16_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
cglobal x264_pixel_satd_8x16_sse2
cglobal x264_pixel_satd_8x16_%1
SATD_START
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_16x8_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
cglobal x264_pixel_satd_16x8_sse2
cglobal x264_pixel_satd_16x8_%1
SATD_START
SATD_TWO_SSE2
SATD_TWO_SSE2
mov eax, [esp+ 8]
mov ecx, [esp+16]
add eax, 8
add ecx, 8
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x8_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
cglobal x264_pixel_satd_8x8_sse2
cglobal x264_pixel_satd_8x8_%1
SATD_START
SATD_TWO_SSE2
SATD_TWO_SSE2
SATD_END
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_8x4_sse2 (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
cglobal x264_pixel_satd_8x4_sse2
cglobal x264_pixel_satd_8x4_%1
SATD_START
SATD_TWO_SSE2
SATD_END
%endmacro ; SATDS
%define SUM8x4 SUM8x4_SSE2
SATDS sse2
%ifdef HAVE_SSE3
%define SUM8x4 SUM8x4_SSSE3
SATDS ssse3
%endif
......
......@@ -539,14 +539,16 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
if( cpu&X264_CPU_SSSE3 )
{
#if defined(ARCH_X86_64) && defined(HAVE_SSE3)
#ifdef HAVE_SSE3
pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_ssse3;
pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_ssse3;
pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_ssse3;
pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_ssse3;
pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_ssse3;
#ifdef ARCH_X86_64
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3;
#endif
#endif
}
#endif //HAVE_MMX
......
......@@ -68,7 +68,7 @@ CFLAGS="$CFLAGS -Wall -I."
LDFLAGS="$LDFLAGS"
HAVE_GETOPT_LONG=1
AS="nasm"
AS="yasm"
ASFLAGS=""
EXE=""
......@@ -148,7 +148,7 @@ case "${MACHINE%%-*}" in
i*86)
ARCH="X86"
CFLAGS="$CFLAGS -DHAVE_MMX"
AS="nasm"
AS="yasm"
ASFLAGS="-O2"
if [ "$SYS" = MACOSX ]; then
ASFLAGS="$ASFLAGS -f macho -DPREFIX"
......@@ -159,6 +159,7 @@ case "${MACHINE%%-*}" in
else
ASFLAGS="$ASFLAGS -f elf"
fi
as_check || AS="nasm"
;;
x86_64)
ARCH="X86_64"
......@@ -218,7 +219,7 @@ then
fi
fi
if [ $ARCH = X86_64 ] ; then
if [ $ARCH = X86 -o $ARCH = X86_64 ] ; then
if ! as_check ; then
echo "No assembler. Please install yasm."
exit 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment