Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • videolan/x264
  • EwoutH/x264
  • gramner/x264
  • BugMaster/x264
  • MaskRay/x264
  • thresh/x264
  • tpm/x264
  • wolfired/x264
  • ifb/x264
  • robinstorm/x264
  • ltnokiago/x264
  • janne/x264
  • Kromjunya/x264
  • trisnaayu0596/x264
  • felipegarcia1402/x264
  • coder2004/x264
  • philou/x264
  • walagnatalia/x264
  • DonDiego/x264
  • JHammler/x264
  • qyot27/x264
  • dwbuiten/x264
  • Kagami/x264
  • andriy-andreyev/x264
  • gxw/x264
  • trofi/x264
  • kierank/x264
  • aureliendavid/x264
  • galad/x264
  • roommini/x264
  • ocrete/x264
  • mstorsjo/x264
  • yinsj0116/x264
  • mamonet/x264
  • 1div0/x264
  • ko1265/x264
  • sergiomb2/x264
  • xutongda/x264
  • wenzhiwu/x264
  • arrowd/x264
  • FranceBB/x264
  • ziemek99/x264
  • longervision/x264
  • xopok/x264
  • jbk/x264
  • szatmary/x264
  • pekdon/x264
  • Jiangguyu/x264
  • jrtc27/x264
  • kankanol1/x264
  • gxwLite/x264
  • brad/x264
  • Gc6026/x264
  • jdek/x264
  • appcrash/x264
  • tguillem/x264
  • As/x264
  • wevian/x264
  • wangluls/x264
  • RellikJaeger/x264
  • hum/x264
  • rogerhardiman/x264
  • jankowalski12611/x264
  • zhijie1996/x264
  • yinshiyou/x264
  • Freed-Wu/x264
  • yajcoca/x264
  • bUd/x264
  • chienvannguyen2020/x264
  • nurbinakhatun386/x264
  • Siberiawind/x-264-meson
  • HecaiYuan/x264
  • david.chen/x264
  • Ytsejam76/x264
  • robUx4/x264
  • zhaoshiz/x-264-arm64ec
  • yintong.ustc/x-264-bd-ventana
  • nekobasu/x264
  • Courmisch/x264
  • BD-qjy/x264
  • quink/x264
  • markos/x264
82 results
Show changes
Commits on Source (3)
  • Henrik Gramner's avatar
    x86inc: Fix warnings with old nasm versions · 3d8aff7e
    Henrik Gramner authored and Henrik Gramner's avatar Henrik Gramner committed
    3d8aff7e
  • Henrik Gramner's avatar
    x86inc: Restore the stack state between stack allocations · 4df71a75
    Henrik Gramner authored and Henrik Gramner's avatar Henrik Gramner committed
    Allows the use of multiple independent stack allocations within
    a function without having to manually fiddle with stack offsets.
    4df71a75
  • Henrik Gramner's avatar
    x86inc: Improve XMM-spilling functionality on 64-bit Windows · 585e0199
    Henrik Gramner authored and Henrik Gramner's avatar Henrik Gramner committed
    Prior to this change dealing with the scenario where the number of
    XMM registers spilled depends on if a branch is taken or not was
    complicated to handle well. There was essentially three options:
    
    1) Always spill the largest number of XMM register. Results in
       unnecessary spills.
    
    2) Do the spilling after the branch. Results in code duplication
       for the shared subset of spills.
    
    3) Do the spilling manually. Optimal, but overly complex and vexing.
    
    This adds an additional optional argument to the WIN64_SPILL_XMM
    and WIN64_PUSH_XMM macros to make it possible to allocate space
    for a certain number of registers but initially only push a subset
    of those, with the option of pushing additional register later.
    585e0199
......@@ -111,7 +111,7 @@
%endif
%define HAVE_PRIVATE_EXTERN 1
%ifdef __NASM_VER__
%ifdef __NASM_VERSION_ID__
%use smartalign
%if __NASM_VERSION_ID__ < 0x020e0000 ; 2.14
%define HAVE_PRIVATE_EXTERN 0
......@@ -393,7 +393,24 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%endif
%endmacro
%macro ALLOC_STACK 0-2 0, 0 ; stack_size, n_xmm_regs (for win64 only)
%macro RESET_STACK_STATE 0
%ifidn rstk, rsp
%assign stack_offset stack_offset - stack_size_padded
%else
%xdefine rstk rsp
%endif
%assign stack_size 0
%assign stack_size_padded 0
%assign xmm_regs_used 0
%endmacro
%macro ALLOC_STACK 0-2 0, 0 ; stack_size, n_xmm_regs
RESET_STACK_STATE
%ifnum %2
%if mmsize != 8
%assign xmm_regs_used %2
%endif
%endif
%ifnum %1
%if %1 != 0
%assign %%pad 0
......@@ -403,11 +420,8 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%endif
%if WIN64
%assign %%pad %%pad + 32 ; shadow space
%if mmsize != 8
%assign xmm_regs_used %2
%if xmm_regs_used > 8
%assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
%endif
%if xmm_regs_used > 8
%assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
%endif
%endif
%if required_stack_alignment <= STACK_ALIGNMENT
......@@ -503,35 +517,62 @@ DECLARE_REG 14, R13, 120
%endif
%endmacro
%macro WIN64_PUSH_XMM 0
; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
%if xmm_regs_used > 6 + high_mm_regs
movaps [rstk + stack_offset + 8], xmm6
%endif
%if xmm_regs_used > 7 + high_mm_regs
movaps [rstk + stack_offset + 24], xmm7
%endif
%assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
%if %%xmm_regs_on_stack > 0
%assign %%i 8
%rep %%xmm_regs_on_stack
movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
%assign %%i %%i+1
%endrep
; Push XMM registers to the stack. If no argument is specified all used register
; will be pushed, otherwise only push previously unpushed registers.
%macro WIN64_PUSH_XMM 0-2 ; new_xmm_regs_used, xmm_regs_pushed
%if mmsize != 8
%if %0 == 2
%assign %%pushed %2
%assign xmm_regs_used %1
%elif %0 == 1
%assign %%pushed xmm_regs_used
%assign xmm_regs_used %1
%else
%assign %%pushed 0
%endif
; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
%if %%pushed <= 6 + high_mm_regs && xmm_regs_used > 6 + high_mm_regs
movaps [rstk + stack_offset + 8], xmm6
%endif
%if %%pushed <= 7 + high_mm_regs && xmm_regs_used > 7 + high_mm_regs
movaps [rstk + stack_offset + 24], xmm7
%endif
%assign %%pushed %%pushed - high_mm_regs - 8
%if %%pushed < 0
%assign %%pushed 0
%endif
%assign %%regs_to_push xmm_regs_used - %%pushed - high_mm_regs - 8
%if %%regs_to_push > 0
ASSERT (%%regs_to_push + %%pushed) * 16 <= stack_size_padded - stack_size - 32
%assign %%i %%pushed + 8
%rep %%regs_to_push
movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
%assign %%i %%i+1
%endrep
%endif
%endif
%endmacro
%macro WIN64_SPILL_XMM 1
%assign xmm_regs_used %1
ASSERT xmm_regs_used <= 16 + high_mm_regs
%assign %%xmm_regs_on_stack xmm_regs_used - high_mm_regs - 8
%if %%xmm_regs_on_stack > 0
; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
%assign %%pad %%xmm_regs_on_stack*16 + 32
%assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
SUB rsp, stack_size_padded
; Allocated stack space for XMM registers and push all, or a subset, of those
%macro WIN64_SPILL_XMM 1-2 ; xmm_regs_used, xmm_regs_reserved
RESET_STACK_STATE
%if mmsize != 8
%assign xmm_regs_used %1
ASSERT xmm_regs_used <= 16 + high_mm_regs
%if %0 == 2
ASSERT %2 >= %1
%assign %%xmm_regs_on_stack %2 - high_mm_regs - 8
%else
%assign %%xmm_regs_on_stack %1 - high_mm_regs - 8
%endif
%if %%xmm_regs_on_stack > 0
; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
%assign %%pad %%xmm_regs_on_stack*16 + 32
%assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
SUB rsp, stack_size_padded
%endif
WIN64_PUSH_XMM
%endif
WIN64_PUSH_XMM
%endmacro
%macro WIN64_RESTORE_XMM_INTERNAL 0
......@@ -562,9 +603,7 @@ DECLARE_REG 14, R13, 120
%macro WIN64_RESTORE_XMM 0
WIN64_RESTORE_XMM_INTERNAL
%assign stack_offset (stack_offset-stack_size_padded)
%assign stack_size_padded 0
%assign xmm_regs_used 0
RESET_STACK_STATE
%endmacro
%define has_epilogue regs_used > 7 || stack_size > 0 || vzeroupper_required || xmm_regs_used > 6+high_mm_regs
......@@ -599,12 +638,11 @@ DECLARE_REG 14, R13, 72
%macro PROLOGUE 2-5+ 0, 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
%assign num_args %1
%assign regs_used %2
%assign xmm_regs_used %3
ASSERT regs_used >= num_args
SETUP_STACK_POINTER %4
ASSERT regs_used <= 15
PUSH_IF_USED 9, 10, 11, 12, 13, 14
ALLOC_STACK %4
ALLOC_STACK %4, %3
LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
%if %0 > 4
%ifnum %4
......@@ -668,7 +706,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
SETUP_STACK_POINTER %4
ASSERT regs_used <= 7
PUSH_IF_USED 3, 4, 5, 6
ALLOC_STACK %4
ALLOC_STACK %4, %3
LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
%if %0 > 4
%ifnum %4
......@@ -701,13 +739,21 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endif ;======================================================================
%if WIN64 == 0
%macro WIN64_SPILL_XMM 1
%macro WIN64_SPILL_XMM 1-2
RESET_STACK_STATE
%if mmsize != 8
%assign xmm_regs_used %1
%endif
%endmacro
%macro WIN64_RESTORE_XMM_INTERNAL 0
%endmacro
%macro WIN64_RESTORE_XMM 0
RESET_STACK_STATE
%endmacro
%macro WIN64_PUSH_XMM 0
%macro WIN64_PUSH_XMM 0-2
%if mmsize != 8 && %0 >= 1
%assign xmm_regs_used %1
%endif
%endmacro
%endif
......@@ -856,8 +902,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
; The GNU linker assumes the stack is executable by default.
[SECTION .note.GNU-stack noalloc noexec nowrite progbits]
%ifdef __NASM_VER__
%if __NASM_VER__ >= 0x020e0300 ; 2.14.03
%ifdef __NASM_VERSION_ID__
%if __NASM_VERSION_ID__ >= 0x020e0300 ; 2.14.03
%if ARCH_X86_64
; Control-flow Enforcement Technology (CET) properties.
[SECTION .note.gnu.property alloc noexec nowrite note align=gprsize]
......@@ -964,13 +1010,13 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%endif
%if ARCH_X86_64 || cpuflag(sse2)
%ifdef __NASM_VER__
%ifdef __NASM_VERSION_ID__
ALIGNMODE p6
%else
CPU amdnop
%endif
%else
%ifdef __NASM_VER__
%ifdef __NASM_VERSION_ID__
ALIGNMODE nop
%else
CPU basicnop
......