Commit f33ba9e2 authored by Loren Merritt's avatar Loren Merritt Committed by Fiona Glaser
Browse files

Cosmetics: cleaner syntax for defining temporary registers in asm

Globally define t#[qdwb], so that only t# needs to be locally defined when reorganizing registers
parent 406a40dc
......@@ -31,21 +31,12 @@ cextern x264_cabac_range_lps
cextern x264_cabac_transition
cextern x264_cabac_renorm_shift
%macro DEF_TMP 16
%rep 8
%define t%1d r%9d
%define t%1b r%9b
%define t%1 r%9
%rotate 1
%endrep
%endmacro
; t3 must be ecx, since it's used for shift.
%ifdef ARCH_X86_64
DEF_TMP 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,10
DECLARE_REG_TMP 0,1,2,3,4,5,6,10
%define pointer resq
%else
DEF_TMP 0,1,2,3,4,5,6,7, 0,3,2,1,4,5,6,3
DECLARE_REG_TMP 0,3,2,1,4,5,6,3
%define pointer resd
%endif
......
......@@ -41,27 +41,13 @@ SECTION .text
; implicit bipred only:
; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64
%ifdef ARCH_X86_64
%define t0 r0
%define t1 r1
%define t2 r2
%define t3 r3
%define t4 r4
%define t5 r5
%define t6d r10d
%define t7d r11d
DECLARE_REG_TMP 0,1,2,3,4,5,10,11
%macro AVG_START 0
PROLOGUE 6,7
.height_loop:
%endmacro
%else
%define t0 r1
%define t1 r2
%define t2 r3
%define t3 r4
%define t4 r5
%define t5 r6
%define t6d r1d
%define t7d r2d
DECLARE_REG_TMP 1,2,3,4,5,6,1,2
%macro AVG_START 0
PROLOGUE 0,7
mov t0, r0m
......@@ -690,12 +676,11 @@ cglobal x264_prefetch_ref_mmxext, 3,3
; chroma MC
;=============================================================================
%define t0d eax
%define t0 rax
%define t0 rax
%ifdef ARCH_X86_64
%define t1d r10d
%define t1 r10
%else
%define t1d r1d
%define t1 r1
%endif
%macro MC_CHROMA_START 0
......
......@@ -230,9 +230,9 @@ cglobal x264_pixel_ssd_4x4_sse4, 4,4
pxor m6, m6 ; sum squared
pxor m7, m7 ; zero
%ifdef ARCH_X86_64
%define t3d r3d
%define t3 r3
%else
%define t3d r2d
%define t3 r2
%endif
%endmacro
......@@ -1028,15 +1028,13 @@ cglobal x264_intra_satd_x3_4x4_%1, 2,6
; stack is 16 byte aligned because abi says so
%define top_1d rsp-8 ; size 8
%define left_1d rsp-16 ; size 8
%define t0 r10
%define t0d r10d
%define t0 r10
%else
; stack is 16 byte aligned at least in gcc, and we've pushed 3 regs + return address, so it's still aligned
SUB esp, 16
%define top_1d esp+8
%define left_1d esp
%define t0 r2
%define t0d r2d
%define t0 r2
%endif
call load_hadamard
......@@ -1068,17 +1066,11 @@ cglobal x264_intra_satd_x3_4x4_%1, 2,6
RET
%ifdef ARCH_X86_64
%define t0 r10
%define t0d r10d
%define t2 r11
%define t2w r11w
%define t2d r11d
%define t0 r10
%define t2 r11
%else
%define t0 r0
%define t0d r0d
%define t2 r2
%define t2w r2w
%define t2d r2d
%define t0 r0
%define t2 r2
%endif
;-----------------------------------------------------------------------------
......@@ -1731,10 +1723,10 @@ cglobal x264_pixel_ssim_end4_sse2, 3,3
%macro ADS_START 1 ; unroll_size
%ifdef ARCH_X86_64
%define t0 r6
%define t0 r6
mov r10, rsp
%else
%define t0 r4
%define t0 r4
mov rbp, rsp
%endif
mov r0d, r5m
......
......@@ -241,19 +241,9 @@ QUANT_DC x264_quant_2x2_dc_ssse3, 1
%endmacro
%ifdef ARCH_X86_64
%define t0 r4
%define t0d r4d
%define t1 r3
%define t1d r3d
%define t2 r2
%define t2d r2d
DECLARE_REG_TMP 4,3,2
%else
%define t0 r2
%define t0d r2d
%define t1 r0
%define t1d r0d
%define t2 r1
%define t2d r1d
DECLARE_REG_TMP 2,0,1
%endif
%macro DEQUANT_START 2
......
......@@ -116,6 +116,29 @@ DECLARE_REG_SIZE si, sil
DECLARE_REG_SIZE di, dil
DECLARE_REG_SIZE bp, bpl
; t# defines for when per-arch register allocation is more complex than just function arguments
%macro DECLARE_REG_TMP 1-*
%assign %%i 0
%rep %0
CAT_XDEFINE t, %%i, r%1
%assign %%i %%i+1
%rotate 1
%endrep
%endmacro
%macro DECLARE_REG_TMP_SIZE 0-*
%rep %0
%define t%1q t%1 %+ q
%define t%1d t%1 %+ d
%define t%1w t%1 %+ w
%define t%1b t%1 %+ b
%rotate 1
%endrep
%endmacro
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
%ifdef ARCH_X86_64
%define gprsize 8
%else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment