Commit 35d32d09 authored by Martin Storsjö's avatar Martin Storsjö Committed by Henrik Gramner

checkasm: arm: Check register clobbering

Cast the function pointer to a different type signature, to
be able to use uint64_t as return type (instead of intptr_t) for
those calls that require it.

Use two separate functions, depending on whether neon is available.
parent 9cbdb635
......@@ -122,6 +122,7 @@ ASMSRC += common/arm/cpu-a.S common/arm/pixel-a.S common/arm/mc-a.S \
common/arm/predict-a.S common/arm/bitstream-a.S
SRCS += common/arm/mc-c.c common/arm/predict-c.c
OBJASM = $(ASMSRC:%.S=%.o)
OBJCHK += tools/checkasm-arm.o
* checkasm-arm.S: assembly check tool
* Copyright (C) 2015 x264 project
* Authors: Martin Storsjo <>
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
* This program is also available under a commercial proprietary license.
* For more information, contact us at
#include "../common/arm/asm.S"
.section .rodata
.align 4
.quad 0x21f86d66c8ca00ce
.quad 0x75b6ba21077c48ad
.quad 0xed56bb2dcb3c7736
.quad 0x8bda43d3fd1a7e06
.quad 0xb64a9c9e5d318408
.quad 0xdf9a54b303f1d3a3
.quad 0x4a75479abd64e097
.quad 0x249214109d5d1c88
.asciz "failed to preserve register"
@ max number of args used by any x264 asm function.
#define MAX_ARGS 15
#define ARG_STACK 4*(MAX_ARGS - 2)
.macro clobbercheck variant
.equ pushed, 4*10
function x264_checkasm_call_\variant
push {r4-r11, lr}
.ifc \variant, neon
vpush {q4-q7}
.equ pushed, pushed + 16*4
movrel r12, register_init
.ifc \variant, neon
vldm r12, {q4-q7}
ldm r12, {r4-r11}
push {r1}
sub sp, sp, #ARG_STACK
.equ pos, 0
.rept MAX_ARGS-2
ldr r12, [sp, #ARG_STACK + pushed + 8 + pos]
str r12, [sp, #pos]
.equ pos, pos + 4
mov r12, r0
mov r0, r2
mov r1, r3
ldrd r2, r3, [sp, #ARG_STACK + pushed]
blx r12
add sp, sp, #ARG_STACK
pop {r2}
push {r0, r1}
movrel r12, register_init
.ifc \variant, neon
vldm r12, {q0-q3}
veor q0, q0, q4
veor q1, q1, q5
veor q2, q2, q6
veor q3, q3, q7
vorr q0, q0, q1
vorr q0, q0, q2
vorr q0, q0, q3
vorr d0, d0, d1
vrev64.32 d1, d0
vorr d0, d0, d1
vmov.32 r3, d0[0]
mov r3, #0
.macro check_reg reg1, reg2
ldrd r0, r1, [r12], #8
eor r0, r0, \reg1
eor r1, r1, \reg2
orr r3, r3, r0
orr r3, r3, r1
check_reg r4, r5
check_reg r6, r7
check_reg r8, r9
check_reg r10, r11
.purgem check_reg
cmp r3, #0
beq 0f
mov r12, #0
str r12, [r2]
movrel r0, error_message
bl puts
pop {r0, r1}
.ifc \variant, neon
vpop {q4-q7}
pop {r4-r11, pc}
clobbercheck neon
clobbercheck noneon
......@@ -231,6 +231,12 @@ intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... );
intptr_t x264_checkasm_call_neon( intptr_t (*func)(), int *ok, ... );
intptr_t x264_checkasm_call_noneon( intptr_t (*func)(), int *ok, ... );
intptr_t (*x264_checkasm_call)( intptr_t (*func)(), int *ok, ... ) = x264_checkasm_call_noneon;
#define call_c1(func,...) func(__VA_ARGS__)
#if ARCH_X86_64
......@@ -248,12 +254,18 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \
x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \
x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); })
#elif ARCH_X86 || (ARCH_AARCH64 && !defined(__APPLE__))
#elif ARCH_X86 || (ARCH_AARCH64 && !defined(__APPLE__)) || ARCH_ARM
#define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ )
#define call_a1 call_c1
#define call_a1_64(func,...) ((uint64_t (*)(intptr_t(*)(), int*, ...))x264_checkasm_call)( (intptr_t(*)())func, &ok, __VA_ARGS__ )
#define call_a1_64 call_a1
#define call_bench(func,cpu,...)\
if( do_bench && !strncmp(func_name, bench_pattern, bench_pattern_len) )\
......@@ -286,6 +298,7 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... );
#define call_c(func,...) ({ call_c2(func,__VA_ARGS__); call_c1(func,__VA_ARGS__); })
#define call_a2(func,...) ({ call_bench(func,cpu_new,__VA_ARGS__); })
#define call_c2(func,...) ({ call_bench(func,0,__VA_ARGS__); })
#define call_a64(func,...) ({ call_a2(func,__VA_ARGS__); call_a1_64(func,__VA_ARGS__); })
static int check_pixel( int cpu_ref, int cpu_new )
......@@ -372,7 +385,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
uint32_t cost8_c = pixel_c.sa8d[PIXEL_16x16]( pbuf1, 16, pbuf2, 64 );
uint32_t cost4_c = pixel_c.satd[PIXEL_16x16]( pbuf1, 16, pbuf2, 64 );
uint64_t res_a = call_a( pixel_asm.sa8d_satd[PIXEL_16x16], pbuf1, (intptr_t)16, pbuf2, (intptr_t)64 );
uint64_t res_a = call_a64( pixel_asm.sa8d_satd[PIXEL_16x16], pbuf1, (intptr_t)16, pbuf2, (intptr_t)64 );
uint32_t cost8_a = res_a;
uint32_t cost4_a = res_a >> 32;
if( cost8_a != cost8_c || cost4_a != cost4_c )
......@@ -2786,6 +2799,8 @@ static int check_all_flags( void )
ret = check_all_funcs( 0, X264_CPU_ALTIVEC );
#elif ARCH_ARM
if( cpu_detect & X264_CPU_NEON )
x264_checkasm_call = x264_checkasm_call_neon;
if( cpu_detect & X264_CPU_ARMV6 )
ret |= add_flags( &cpu0, &cpu1, X264_CPU_ARMV6, "ARMv6" );
if( cpu_detect & X264_CPU_NEON )
