Commit e3a07e09 authored by Loren Merritt's avatar Loren Merritt
Browse files

cosmetics in asm macros


git-svn-id: svn://svn.videolan.org/x264/trunk@640 df754926-b1dd-0310-bc7b-ec298dee348c
parent 71943e8a
......@@ -37,6 +37,8 @@ BITS 64
%ifdef WIN64
%define %1 pad %1
%endif
align 16
%1:
%endmacro
%macro pad 1
......
......@@ -35,15 +35,10 @@ BITS 64
SECTION .text
cglobal x264_cpu_cpuid_test
cglobal x264_cpu_cpuid
cglobal x264_emms
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_cpu_cpuid_test( void ) return 0 if unsupported
;-----------------------------------------------------------------------------
x264_cpu_cpuid_test:
cglobal x264_cpu_cpuid_test
firstpush rbx
pushreg rbx
push rbp
......@@ -69,11 +64,10 @@ x264_cpu_cpuid_test:
ret
endfunc
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
;-----------------------------------------------------------------------------
x264_cpu_cpuid:
cglobal x264_cpu_cpuid
firstpush rbx
pushreg rbx
endprolog
......@@ -97,11 +91,10 @@ x264_cpu_cpuid:
ret
endfunc
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_emms( void )
;-----------------------------------------------------------------------------
x264_emms:
cglobal x264_emms
emms
ret
......@@ -158,13 +158,10 @@ pw_32: times 8 dw 32
SECTION .text
cglobal x264_dct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_dct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
x264_dct4x4dc_mmx:
cglobal x264_dct4x4dc_mmx
movq mm0, [parm1q+ 0]
movq mm1, [parm1q+ 8]
movq mm2, [parm1q+16]
......@@ -193,13 +190,10 @@ x264_dct4x4dc_mmx:
movq [parm1q+24],mm4
ret
cglobal x264_idct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_idct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
x264_idct4x4dc_mmx:
cglobal x264_idct4x4dc_mmx
movq mm0, [parm1q+ 0]
movq mm1, [parm1q+ 8]
movq mm2, [parm1q+16]
......@@ -219,13 +213,10 @@ x264_idct4x4dc_mmx:
movq [parm1q+24], mm4
ret
cglobal x264_sub4x4_dct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, uint8_t *pix2 )
;-----------------------------------------------------------------------------
x264_sub4x4_dct_mmx:
cglobal x264_sub4x4_dct_mmx
MMX_ZERO mm7
; Load 4 lines
......@@ -253,13 +244,10 @@ x264_sub4x4_dct_mmx:
movq [parm1q+24], mm0
ret
cglobal x264_add4x4_idct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_add4x4_idct_mmx( uint8_t *p_dst, int16_t dct[4][4] )
;-----------------------------------------------------------------------------
x264_add4x4_idct_mmx:
cglobal x264_add4x4_idct_mmx
; Load dct coeffs
movq mm0, [parm2q+ 0] ; dct
movq mm1, [parm2q+ 8]
......@@ -347,13 +335,10 @@ x264_add4x4_idct_mmx:
psubw %4, %1 ; %4=b5
%endmacro
cglobal x264_sub8x8_dct8_sse2
ALIGN 16
;-----------------------------------------------------------------------------
; void __cdecl x264_sub8x8_dct8_sse2( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
;-----------------------------------------------------------------------------
x264_sub8x8_dct8_sse2:
cglobal x264_sub8x8_dct8_sse2
MMX_ZERO xmm9
MMX_LOAD_DIFF_8P xmm0, xmm8, xmm9, [parm2q+0*FENC_STRIDE], [parm3q+0*FDEC_STRIDE]
......@@ -433,13 +418,10 @@ x264_sub8x8_dct8_sse2:
MMX_SUMSUB_BA %4, %5 ; %4=c3, %5=c4
%endmacro
cglobal x264_add8x8_idct8_sse2
ALIGN 16
;-----------------------------------------------------------------------------
; void __cdecl x264_add8x8_idct8_sse2( uint8_t *p_dst, int16_t dct[8][8] )
;-----------------------------------------------------------------------------
x264_add8x8_idct8_sse2:
cglobal x264_add8x8_idct8_sse2
movdqa xmm0, [parm2q+0x00]
movdqa xmm1, [parm2q+0x10]
movdqa xmm2, [parm2q+0x20]
......@@ -472,9 +454,7 @@ x264_add8x8_idct8_sse2:
; uint8_t *pix1, uint8_t *pix2 )
;-----------------------------------------------------------------------------
%macro SUB_NxN_DCT 6
ALIGN 16
cglobal %1
%1:
call %2
add parm1q, %3
add parm2q, %4-%5*FENC_STRIDE
......@@ -494,9 +474,7 @@ cglobal %1
; void __cdecl x264_add8x8_idct_mmx( uint8_t *pix, int16_t dct[4][4][4] )
;-----------------------------------------------------------------------------
%macro ADD_NxN_IDCT 6
ALIGN 16
cglobal %1
%1:
call %2
add parm1q, %4-%5*FDEC_STRIDE
add parm2q, %3
......@@ -522,9 +500,7 @@ ADD_NxN_IDCT x264_add16x16_idct8_sse2, x264_add8x8_idct8_sse2, 128, 8, 0, 8
;-----------------------------------------------------------------------------
; void __cdecl x264_zigzag_scan_4x4_field_sse2( int level[16], int16_t dct[4][4] )
;-----------------------------------------------------------------------------
ALIGN 16
cglobal x264_zigzag_scan_4x4_field_sse2
x264_zigzag_scan_4x4_field_sse2:
punpcklwd xmm0, [parm2q]
punpckhwd xmm1, [parm2q]
punpcklwd xmm2, [parm2q+16]
......
......@@ -30,12 +30,6 @@ pb_03: times 16 db 0x03
pb_a1: times 16 db 0xa1
SECTION .text
cglobal x264_deblock_v_luma_sse2
cglobal x264_deblock_h_luma_sse2
cglobal x264_deblock_v_chroma_mmxext
cglobal x264_deblock_h_chroma_mmxext
cglobal x264_deblock_v_chroma_intra_mmxext
cglobal x264_deblock_h_chroma_intra_mmxext
; expands to [base],...,[base+7*stride]
%define PASS8ROWS(base, base3, stride, stride3) \
......@@ -267,11 +261,10 @@ cglobal x264_deblock_h_chroma_intra_mmxext
SECTION .text
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
x264_deblock_v_luma_sse2:
cglobal x264_deblock_v_luma_sse2
; rdi = pix
movsxd rsi, esi ; stride
dec edx ; alpha-1
......@@ -317,11 +310,10 @@ x264_deblock_v_luma_sse2:
ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
x264_deblock_h_luma_sse2:
cglobal x264_deblock_h_luma_sse2
movsxd r10, esi
lea r11, [r10+r10*2]
lea rax, [rdi-4]
......@@ -383,11 +375,10 @@ x264_deblock_h_luma_sse2:
add rdi, r9
%endmacro
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_deblock_v_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
x264_deblock_v_chroma_mmxext:
cglobal x264_deblock_v_chroma_mmxext
CHROMA_V_START
movq mm0, [rax]
......@@ -406,11 +397,10 @@ x264_deblock_v_chroma_mmxext:
ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_deblock_h_chroma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
x264_deblock_h_chroma_mmxext:
cglobal x264_deblock_h_chroma_mmxext
CHROMA_H_START
TRANSPOSE4x8_LOAD PASS8ROWS(rax, rdi, rsi, r9)
......@@ -454,11 +444,10 @@ x264_deblock_h_chroma_mmxext:
paddb mm2, mm6
%endmacro
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_deblock_v_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
x264_deblock_v_chroma_intra_mmxext:
cglobal x264_deblock_v_chroma_intra_mmxext
CHROMA_V_START
movq mm0, [rax]
......@@ -472,11 +461,10 @@ x264_deblock_v_chroma_intra_mmxext:
movq [rdi], mm2
ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_deblock_h_chroma_intra_mmxext( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
x264_deblock_h_chroma_intra_mmxext:
cglobal x264_deblock_h_chroma_intra_mmxext
CHROMA_H_START
TRANSPOSE4x8_LOAD PASS8ROWS(rax, rdi, rsi, r9)
CHROMA_INTRA_BODY
......
......@@ -56,38 +56,17 @@ pw_64: times 4 dw 64
SECTION .text
cglobal x264_pixel_avg_w4_mmxext
cglobal x264_pixel_avg_w8_mmxext
cglobal x264_pixel_avg_w16_mmxext
cglobal x264_pixel_avg_w20_mmxext
cglobal x264_pixel_avg_w16_sse2
cglobal x264_pixel_avg_weight_4x4_mmxext
cglobal x264_pixel_avg_weight_w8_mmxext
cglobal x264_pixel_avg_weight_w16_mmxext
cglobal x264_mc_copy_w4_mmx
cglobal x264_mc_copy_w8_mmx
cglobal x264_mc_copy_w16_mmx
cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_mmxext
cglobal x264_prefetch_fenc_mmxext
cglobal x264_prefetch_ref_mmxext
;=============================================================================
; pixel avg
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int i_dst_stride,
; uint8_t *src1, int i_src1_stride,
; uint8_t *src2, int i_src2_stride,
; int i_height );
;-----------------------------------------------------------------------------
x264_pixel_avg_w4_mmxext:
cglobal x264_pixel_avg_w4_mmxext
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
mov eax, parm7d ; i_height
......@@ -109,14 +88,13 @@ ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_pixel_avg_w8_mmxext( uint8_t *dst, int i_dst_stride,
; uint8_t *src1, int i_src1_stride,
; uint8_t *src2, int i_src2_stride,
; int i_height );
;-----------------------------------------------------------------------------
x264_pixel_avg_w8_mmxext:
cglobal x264_pixel_avg_w8_mmxext
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
mov eax, parm7d ; i_height
......@@ -136,14 +114,13 @@ ALIGN 4
jg .height_loop
rep ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_pixel_avg_w16_mmxext( uint8_t *dst, int i_dst_stride,
; uint8_t *src1, int i_src1_stride,
; uint8_t *src2, int i_src2_stride,
; int i_height );
;-----------------------------------------------------------------------------
x264_pixel_avg_w16_mmxext:
cglobal x264_pixel_avg_w16_mmxext
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
mov eax, parm7d ; i_height
......@@ -163,14 +140,13 @@ ALIGN 4
jg .height_loop
rep ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_pixel_avg_w20_mmxext( uint8_t *dst, int i_dst_stride,
; uint8_t *src1, int i_src1_stride,
; uint8_t *src2, int i_src2_stride,
; int i_height );
;-----------------------------------------------------------------------------
x264_pixel_avg_w20_mmxext:
cglobal x264_pixel_avg_w20_mmxext
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
mov eax, parm7d ; i_height
......@@ -193,14 +169,13 @@ ALIGN 4
jg .height_loop
rep ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_pixel_avg_w16_sse2( uint8_t *dst, int i_dst_stride,
; uint8_t *src1, int i_src1_stride,
; uint8_t *src2, int i_src2_stride,
; int i_height );
;-----------------------------------------------------------------------------
x264_pixel_avg_w16_sse2:
cglobal x264_pixel_avg_w16_sse2
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
mov eax, parm7d ; i_height
......@@ -260,11 +235,10 @@ ALIGN 4
.height_loop
%endmacro
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_avg_weight_w16_mmxext( uint8_t *dst, int, uint8_t *src, int, int i_weight, int )
;-----------------------------------------------------------------------------
x264_pixel_avg_weight_w16_mmxext:
cglobal x264_pixel_avg_weight_w16_mmxext
BIWEIGHT_START_MMX
BIWEIGHT_4P_MMX [parm1q ], [parm3q ]
......@@ -278,11 +252,10 @@ x264_pixel_avg_weight_w16_mmxext:
jg .height_loop
rep ret
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int )
;-----------------------------------------------------------------------------
x264_pixel_avg_weight_w8_mmxext:
cglobal x264_pixel_avg_weight_w8_mmxext
BIWEIGHT_START_MMX
BIWEIGHT_4P_MMX [parm1q ], [parm3q ]
......@@ -294,11 +267,10 @@ x264_pixel_avg_weight_w8_mmxext:
jg .height_loop
rep ret
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int )
;-----------------------------------------------------------------------------
x264_pixel_avg_weight_4x4_mmxext:
cglobal x264_pixel_avg_weight_4x4_mmxext
BIWEIGHT_START_MMX
BIWEIGHT_4P_MMX [parm1q ], [parm3q ]
BIWEIGHT_4P_MMX [parm1q+parm2q ], [parm3q+parm4q ]
......@@ -314,12 +286,11 @@ x264_pixel_avg_weight_4x4_mmxext:
; pixel copy
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w4_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w4_mmx:
cglobal x264_mc_copy_w4_mmx
mov eax, parm5d ; i_height
ALIGN 4
......@@ -335,12 +306,11 @@ ALIGN 4
jg .height_loop
rep ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w8_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w8_mmx:
cglobal x264_mc_copy_w8_mmx
mov eax, parm5d ; i_height
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
......@@ -363,12 +333,11 @@ ALIGN 4
jg .height_loop
rep ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w16_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w16_mmx:
cglobal x264_mc_copy_w16_mmx
mov eax, parm5d ; i_height
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
......@@ -399,11 +368,10 @@ ALIGN 4
rep ret
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w16_sse2( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w16_sse2:
cglobal x264_mc_copy_w16_sse2
mov eax, parm5d ; i_height
ALIGN 4
......@@ -424,15 +392,13 @@ ALIGN 4
; chroma MC
;=============================================================================
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_chroma_mmxext( uint8_t *src, int i_src_stride,
; uint8_t *dst, int i_dst_stride,
; int dx, int dy,
; int i_width, int i_height )
;-----------------------------------------------------------------------------
x264_mc_chroma_mmxext:
cglobal x264_mc_chroma_mmxext
mov r10d, parm6d
mov r11d, parm5d
sar r10d, 3
......@@ -590,8 +556,7 @@ ALIGN 4
; void x264_prefetch_fenc_mmxext( uint8_t *pix_y, int stride_y,
; uint8_t *pix_uv, int stride_uv, int mb_x )
;-----------------------------------------------------------------------------
ALIGN 16
x264_prefetch_fenc_mmxext:
cglobal x264_prefetch_fenc_mmxext
mov eax, parm5d
and eax, 3
imul eax, parm2d
......@@ -613,8 +578,7 @@ x264_prefetch_fenc_mmxext:
;-----------------------------------------------------------------------------
; void x264_prefetch_ref_mmxext( uint8_t *pix, int stride, int parity )
;-----------------------------------------------------------------------------
ALIGN 16
x264_prefetch_ref_mmxext:
cglobal x264_prefetch_ref_mmxext
dec parm3d
and parm3d, parm2d
lea parm1q, [parm1q+parm3q*8+64]
......
......@@ -94,16 +94,11 @@ pw_32: times 4 dw 32
SECTION .text
cglobal x264_hpel_filter_mmxext
cglobal x264_plane_copy_mmxext
;-----------------------------------------------------------------------------
; void x264_hpel_filter_mmxext( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
; int i_stride, int i_width, int i_height );
;-----------------------------------------------------------------------------
ALIGN 16
x264_hpel_filter_mmxext :
cglobal x264_hpel_filter_mmxext
%ifdef WIN64
push rdi
......@@ -276,8 +271,7 @@ ALIGN 16
; void x264_plane_copy_mmxext( uint8_t *dst, int i_dst,
; uint8_t *src, int i_src, int w, int h)
;-----------------------------------------------------------------------------
ALIGN 16
x264_plane_copy_mmxext:
cglobal x264_plane_copy_mmxext
movsxd parm2q, parm2d
movsxd parm4q, parm4d
add parm5d, 3
......
......@@ -408,59 +408,6 @@ BITS 64
SECTION .text
cglobal x264_pixel_sad_16x16_mmxext
cglobal x264_pixel_sad_16x8_mmxext
cglobal x264_pixel_sad_8x16_mmxext
cglobal x264_pixel_sad_8x8_mmxext
cglobal x264_pixel_sad_8x4_mmxext
cglobal x264_pixel_sad_4x8_mmxext
cglobal x264_pixel_sad_4x4_mmxext
cglobal x264_pixel_sad_x3_16x16_mmxext
cglobal x264_pixel_sad_x3_16x8_mmxext
cglobal x264_pixel_sad_x3_8x16_mmxext
cglobal x264_pixel_sad_x3_8x8_mmxext
cglobal x264_pixel_sad_x3_8x4_mmxext
cglobal x264_pixel_sad_x3_4x8_mmxext
cglobal x264_pixel_sad_x3_4x4_mmxext
cglobal x264_pixel_sad_x4_16x16_mmxext
cglobal x264_pixel_sad_x4_16x8_mmxext
cglobal x264_pixel_sad_x4_8x16_mmxext
cglobal x264_pixel_sad_x4_8x8_mmxext
cglobal x264_pixel_sad_x4_8x4_mmxext
cglobal x264_pixel_sad_x4_4x8_mmxext
cglobal x264_pixel_sad_x4_4x4_mmxext
cglobal x264_pixel_sad_pde_16x16_mmxext
cglobal x264_pixel_sad_pde_16x8_mmxext
cglobal x264_pixel_sad_pde_8x16_mmxext
cglobal x264_pixel_ssd_16x16_mmx
cglobal x264_pixel_ssd_16x8_mmx
cglobal x264_pixel_ssd_8x16_mmx
cglobal x264_pixel_ssd_8x8_mmx
cglobal x264_pixel_ssd_8x4_mmx
cglobal x264_pixel_ssd_4x8_mmx
cglobal x264_pixel_ssd_4x4_mmx
cglobal x264_pixel_satd_4x4_mmxext
cglobal x264_pixel_satd_4x8_mmxext
cglobal x264_pixel_satd_8x4_mmxext
cglobal x264_pixel_satd_8x8_mmxext
cglobal x264_pixel_satd_16x8_mmxext
cglobal x264_pixel_satd_8x16_mmxext
cglobal x264_pixel_satd_16x16_mmxext
cglobal x264_intra_satd_x3_4x4_mmxext
cglobal x264_intra_satd_x3_8x8c_mmxext
cglobal x264_intra_satd_x3_16x16_mmxext
cglobal x264_pixel_ads4_mmxext
cglobal x264_pixel_ads2_mmxext
cglobal x264_pixel_ads1_mmxext
%macro SAD_START 0
pxor mm0, mm0
%endmacro
......@@ -474,8 +421,7 @@ cglobal x264_pixel_ads1_mmxext
; int x264_pixel_sad_16x16_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
%macro SAD 2
ALIGN 16
x264_pixel_sad_%1x%2_mmxext:
cglobal x264_pixel_sad_%1x%2_mmxext
SAD_START
%rep %2/2
SAD_INC_2x%1P
......@@ -496,8 +442,7 @@ SAD 4, 4
; uint8_t *pix2, int i_stride, int scores[3] )
;-----------------------------------------------------------------------------
%macro SAD_X 3
ALIGN 16
x264_pixel_sad_x%1_%2x%3_mmxext:
cglobal x264_pixel_sad_x%1_%2x%3_mmxext
SAD_X%1_2x%2P 1
%rep %3/2-1
SAD_X%1_2x%2P 0
......@@ -534,8 +479,7 @@ ALIGN 4
; int x264_pixel_sad_pde_16x16_mmxext (uint8_t *, int, uint8_t *, int, int )
;-----------------------------------------------------------------------------
%macro SAD_PDE 2
ALIGN 16
x264_pixel_sad_pde_%1x%2_mmxext:
cglobal x264_pixel_sad_pde_%1x%2_mmxext
SAD_START
%rep %2/4
SAD_INC_2x%1P
......@@ -577,8 +521,7 @@ SAD_PDE 8, 16
; int x264_pixel_ssd_16x16_mmx (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
%macro SSD 2
ALIGN 16
x264_pixel_ssd_%1x%2_mmx:
cglobal x264_pixel_ssd_%1x%2_mmx
SSD_START
%rep %2
SSD_INC_1x%1P
......@@ -611,42 +554,38 @@ SSD 4, 4
ret
%endmacro
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_satd_4x4_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_satd_4x4_mmxext:
cglobal x264_pixel_satd_4x4_mmxext
SATD_START
LOAD_DIFF_HADAMARD_SUM mm0, 0, 0
SATD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_satd_4x8_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_satd_4x8_mmxext:
cglobal x264_pixel_satd_4x8_mmxext
SATD_START
LOAD_DIFF_HADAMARD_SUM mm0, 0, 1
LOAD_DIFF_HADAMARD_SUM mm1, 0, 0
paddw mm0, mm1
SATD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_satd_8x4_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_satd_8x4_mmxext:
cglobal x264_pixel_satd_8x4_mmxext