Commit 7e91b59f authored by Henrik Gramner's avatar Henrik Gramner Committed by Henrik Gramner
Browse files

x86: Update x86inc.asm

parent d821d880
Pipeline #193129 passed with stages
in 18 minutes
;*****************************************************************************
;* x86inc.asm: x86 abstraction layer
;*****************************************************************************
;* Copyright (C) 2005-2021 x264 project
;* Copyright (C) 2005-2022 x264 project
;*
;* Authors: Loren Merritt <lorenm@u.washington.edu>
;* Henrik Gramner <henrik@gramner.com>
......@@ -238,6 +238,16 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%endif
%endmacro
; Repeats an instruction/operation for multiple arguments.
; Example usage: "REPX {psrlw x, 8}, m0, m1, m2, m3"
%macro REPX 2-* ; operation, args
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%macro PUSH 1
push %1
%ifidn rstk, rsp
......@@ -1342,7 +1352,20 @@ INIT_XMM
%1 %6, __src2
%endif
%elif %0 >= 9
__instr %6, %7, %8, %9
%if avx_enabled && __sizeofreg >= 16 && %4 == 1
%ifnnum regnumof%7
%if %3
vmovaps %6, %7
%else
vmovdqa %6, %7
%endif
__instr %6, %6, %8, %9
%else
__instr %6, %7, %8, %9
%endif
%else
__instr %6, %7, %8, %9
%endif
%elif %0 == 8
%if avx_enabled && __sizeofreg >= 16 && %4 == 0
%xdefine __src1 %7
......@@ -1379,7 +1402,7 @@ INIT_XMM
%else
vmovdqa %6, %7
%endif
__instr %6, %8
__instr %6, %6, %8
%else
__instr %6, __src1, __src2
%endif
......@@ -1448,8 +1471,8 @@ AVX_INSTR andpd, sse2, 1, 0, 1
AVX_INSTR andps, sse, 1, 0, 1
AVX_INSTR blendpd, sse4, 1, 1, 0
AVX_INSTR blendps, sse4, 1, 1, 0
AVX_INSTR blendvpd, sse4 ; can't be emulated
AVX_INSTR blendvps, sse4 ; can't be emulated
AVX_INSTR blendvpd, sse4, 1, 1, 0 ; last operand must be xmm0 with legacy encoding
AVX_INSTR blendvps, sse4, 1, 1, 0 ; last operand must be xmm0 with legacy encoding
AVX_INSTR cmpeqpd, sse2, 1, 0, 1
AVX_INSTR cmpeqps, sse, 1, 0, 1
AVX_INSTR cmpeqsd, sse2, 1, 0, 0
......@@ -1582,7 +1605,7 @@ AVX_INSTR pand, mmx, 0, 0, 1
AVX_INSTR pandn, mmx, 0, 0, 0
AVX_INSTR pavgb, mmx2, 0, 0, 1
AVX_INSTR pavgw, mmx2, 0, 0, 1
AVX_INSTR pblendvb, sse4 ; can't be emulated
AVX_INSTR pblendvb, sse4, 0, 1, 0 ; last operand must be xmm0 with legacy encoding
AVX_INSTR pblendw, sse4, 0, 1, 0
AVX_INSTR pclmulhqhqdq, fnord, 0, 0, 0
AVX_INSTR pclmulhqlqdq, fnord, 0, 0, 0
......
......@@ -59,14 +59,6 @@ cextern cdef_dir_8bpc_avx2.main
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%macro CDEF_FILTER 2 ; w, h
DEFINE_ARGS dst, stride, _, dir, pridmp, pri, sec, tmp
movifnidn prid, r5m
......
......@@ -64,14 +64,6 @@ cextern shufw_6543210x
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%if ARCH_X86_32
DECLARE_REG_TMP 5, 3
%elif WIN64
......
......@@ -105,14 +105,6 @@ cextern gaussian_sequence
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
INIT_YMM avx2
......
......@@ -90,14 +90,6 @@ cextern gaussian_sequence
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%if ARCH_X86_32
%undef base
%define PIC_ptr(a) base+a
......
......@@ -98,14 +98,6 @@ cextern gaussian_sequence
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
INIT_YMM avx2
cglobal generate_grain_y_8bpc, 2, 9, 8, buf, fg_data
%define base r4-generate_grain_y_8bpc_avx2_table
......
......@@ -90,14 +90,6 @@ cextern gaussian_sequence
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%if ARCH_X86_32
%define PIC_ptr(a) base+a
%else
......
......@@ -134,14 +134,6 @@ cextern filter_intra_taps
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
INIT_YMM avx2
cglobal ipred_dc_top_16bpc, 3, 7, 6, dst, stride, tl, w, h
movifnidn hd, hm
......
......@@ -80,14 +80,6 @@ cextern filter_intra_taps
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%macro PAETH 3 ; top, signed_ldiff, ldiff
paddw m0, m%2, m2
psubw m1, m0, m3 ; tldiff
......
......@@ -70,14 +70,6 @@ cextern filter_intra_taps
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
INIT_XMM ssse3
cglobal ipred_dc_top_16bpc, 3, 7, 6, dst, stride, tl, w, h
LEA r5, ipred_dc_left_16bpc_ssse3_table
......
......@@ -145,14 +145,6 @@ cextern iadst_16x16_internal_8bpc_avx2.main_pass2_end
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
%macro WRAP_XMM 1+
......
......@@ -174,14 +174,6 @@ tbl_Nx64_offset: db 2* 0, 2*32, 2*16, 2*46
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%define m_suffix(x, sfx) mangle(private_prefix %+ _ %+ x %+ sfx)
%define m(x) m_suffix(x, SUFFIX)
......
......@@ -132,15 +132,6 @@ SECTION .text
; 1-byte offsets as long as data is within +-128 bytes of the base pointer.
%define o_base deint_shuf + 128
%define o(x) (r6 - (o_base) + (x))
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
; flags: 1 = swap, 2 = interleave, 4: coef_regs
......
......@@ -242,15 +242,6 @@ SECTION .text
%define o_base int8_permA+64*18
%define o(x) (r5 - (o_base) + (x))
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
; flags: 1 = swap, 2 = interleave (l), 4 = interleave (t), 8 = no_pack,
......
......@@ -142,14 +142,6 @@ pw_m301x8: times 8 dw -301*8
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX)
%if ARCH_X86_64
......
......@@ -49,14 +49,6 @@ pb_mask: dd 1, 1, 2, 2, 4, 4, 8, 8
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
; in: out:
; mm%1 a b c d a e i m
; mm%2 e f g h b f j n
......
......@@ -106,14 +106,6 @@ ASSERT ARCH_X86_32
%endif
%endmacro
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%macro SPLATD 2
movd %1, %2
pshufd %1, %1, q0000
......
......@@ -66,14 +66,6 @@ cextern sgr_x_by_x_avx2
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
DECLARE_REG_TMP 8, 7, 9, 11, 12, 13, 14 ; wiener ring buffer pointers
INIT_YMM avx2
......
......@@ -202,14 +202,6 @@ cextern resize_filter
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f({%1})
%endrep
%endmacro
INIT_XMM avx2
cglobal put_bilin_16bpc, 4, 8, 0, dst, ds, src, ss, w, h, mxy
mov mxyd, r6m ; mx
......
......@@ -254,14 +254,6 @@ cextern resize_filter
SECTION .text
%macro REPX 2-*
%xdefine %%f(x) %1
%rep %0 - 1
%rotate 1
%%f(%1)
%endrep
%endmacro
%if WIN64
DECLARE_REG_TMP 4
%else
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment