Commit 498cca0b authored by Vittorio Giovara's avatar Vittorio Giovara Committed by Anton Mitrofanov
Browse files

arm: Set the function symbol prefix in a single location

parent 8f2437d3
......@@ -34,9 +34,9 @@
#endif
#ifdef PREFIX
# define EXTERN_ASM _
# define EXTERN_ASM _x264_
#else
# define EXTERN_ASM
# define EXTERN_ASM x264_
#endif
#ifdef __ELF__
......@@ -75,7 +75,11 @@ ELF .eabi_attribute 25, \val
.macro function name, export=1
.macro endfunc
.if \export
ELF .size EXTERN_ASM\name, . - EXTERN_ASM\name
.else
ELF .size \name, . - \name
.endif
FUNC .endfunc
.purgem endfunc
.endm
......
......@@ -25,7 +25,7 @@
#include "asm.S"
function x264_nal_escape_neon
function nal_escape_neon
push {r4-r5,lr}
vmov.u8 q0, #0xff
vmov.u8 q8, #4
......
......@@ -29,7 +29,7 @@
// done in gas because .fpu neon overrides the refusal to assemble
// instructions the selected -march/-mcpu doesn't support
function x264_cpu_neon_test
function cpu_neon_test
vadd.i16 q0, q0, q0
bx lr
endfunc
......@@ -37,7 +37,7 @@ endfunc
// return: 0 on success
// 1 if counters were already enabled
// 9 if lo-res counters were already enabled
function x264_cpu_enable_armv7_counter, export=0
function cpu_enable_armv7_counter, export=0
mrc p15, 0, r2, c9, c12, 0 // read PMNC
ands r0, r2, #1
andne r0, r2, #9
......@@ -50,7 +50,7 @@ function x264_cpu_enable_armv7_counter, export=0
bx lr
endfunc
function x264_cpu_disable_armv7_counter, export=0
function cpu_disable_armv7_counter, export=0
mrc p15, 0, r0, c9, c12, 0 // read PMNC
bic r0, r0, #1 // disable counters
mcr p15, 0, r0, c9, c12, 0 // write PMNC
......@@ -64,14 +64,14 @@ endfunc
// return: 0 if transfers neon -> arm transfers take more than 10 cycles
// nonzero otherwise
function x264_cpu_fast_neon_mrc_test
function cpu_fast_neon_mrc_test
// check for user access to performance counters
mrc p15, 0, r0, c9, c14, 0
cmp r0, #0
bxeq lr
push {r4-r6,lr}
bl x264_cpu_enable_armv7_counter
bl cpu_enable_armv7_counter
ands r1, r0, #8
mov r3, #0
mov ip, #4
......@@ -99,7 +99,7 @@ average_loop:
// disable counters if we enabled them
ands r0, r0, #1
bleq x264_cpu_disable_armv7_counter
bleq cpu_disable_armv7_counter
lsr r0, r3, #5
cmp r0, #10
......
......@@ -62,7 +62,7 @@ endconst
.endm
function x264_dct4x4dc_neon
function dct4x4dc_neon
vld1.64 {d0-d3}, [r0,:128]
SUMSUB_ABCD d4, d5, d6, d7, d0, d1, d2, d3
SUMSUB_ABCD d0, d2, d3, d1, d4, d6, d5, d7
......@@ -81,7 +81,7 @@ function x264_dct4x4dc_neon
bx lr
endfunc
function x264_idct4x4dc_neon
function idct4x4dc_neon
vld1.64 {d0-d3}, [r0,:128]
SUMSUB_ABCD d4, d5, d6, d7, d0, d1, d2, d3
SUMSUB_ABCD d0, d2, d3, d1, d4, d6, d5, d7
......@@ -105,7 +105,7 @@ endfunc
vsub.s16 \d3, \d7, \d5
.endm
function x264_sub4x4_dct_neon
function sub4x4_dct_neon
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
vld1.32 {d0[]}, [r1,:32], r3
......@@ -128,7 +128,7 @@ function x264_sub4x4_dct_neon
bx lr
endfunc
function x264_sub8x4_dct_neon, export=0
function sub8x4_dct_neon, export=0
vld1.64 {d0}, [r1,:64], r3
vld1.64 {d1}, [r2,:64], ip
vsubl.u8 q8, d0, d1
......@@ -164,34 +164,34 @@ function x264_sub8x4_dct_neon, export=0
bx lr
endfunc
function x264_sub8x8_dct_neon
function sub8x8_dct_neon
push {lr}
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
pop {lr}
b x264_sub8x4_dct_neon
b sub8x4_dct_neon
endfunc
function x264_sub16x16_dct_neon
function sub16x16_dct_neon
push {lr}
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
bl x264_sub8x4_dct_neon
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
bl sub8x4_dct_neon
sub r1, r1, #8*FENC_STRIDE-8
sub r2, r2, #8*FDEC_STRIDE-8
bl x264_sub8x4_dct_neon
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
bl sub8x4_dct_neon
sub r1, r1, #8
sub r2, r2, #8
bl x264_sub8x4_dct_neon
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
bl sub8x4_dct_neon
sub r1, r1, #8*FENC_STRIDE-8
sub r2, r2, #8*FDEC_STRIDE-8
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
pop {lr}
b x264_sub8x4_dct_neon
b sub8x4_dct_neon
endfunc
......@@ -226,7 +226,7 @@ endfunc
SUMSUB_SHR2 2, q11, q13, q3, q13, q0, q1
.endm
function x264_sub8x8_dct8_neon
function sub8x8_dct8_neon
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d16}, [r1,:64], r3
......@@ -278,19 +278,19 @@ function x264_sub8x8_dct8_neon
bx lr
endfunc
function x264_sub16x16_dct8_neon
function sub16x16_dct8_neon
push {lr}
bl X(x264_sub8x8_dct8_neon)
bl X(sub8x8_dct8_neon)
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
bl X(x264_sub8x8_dct8_neon)
bl X(sub8x8_dct8_neon)
sub r1, r1, #8
sub r2, r2, #8
bl X(x264_sub8x8_dct8_neon)
bl X(sub8x8_dct8_neon)
pop {lr}
sub r1, r1, #FENC_STRIDE*8 - 8
sub r2, r2, #FDEC_STRIDE*8 - 8
b X(x264_sub8x8_dct8_neon)
b X(sub8x8_dct8_neon)
endfunc
......@@ -303,7 +303,7 @@ endfunc
vadd.s16 \d6, \d6, \d1
.endm
function x264_add4x4_idct_neon
function add4x4_idct_neon
mov r2, #FDEC_STRIDE
vld1.64 {d0-d3}, [r1,:128]
......@@ -335,7 +335,7 @@ function x264_add4x4_idct_neon
bx lr
endfunc
function x264_add8x4_idct_neon, export=0
function add8x4_idct_neon, export=0
vld1.64 {d0-d3}, [r1,:128]!
IDCT_1D d16, d18, d20, d22, d0, d1, d2, d3
vld1.64 {d4-d7}, [r1,:128]!
......@@ -375,29 +375,29 @@ function x264_add8x4_idct_neon, export=0
bx lr
endfunc
function x264_add8x8_idct_neon
function add8x8_idct_neon
mov r2, #FDEC_STRIDE
mov ip, lr
bl x264_add8x4_idct_neon
bl add8x4_idct_neon
mov lr, ip
b x264_add8x4_idct_neon
b add8x4_idct_neon
endfunc
function x264_add16x16_idct_neon
function add16x16_idct_neon
mov r2, #FDEC_STRIDE
mov ip, lr
bl x264_add8x4_idct_neon
bl x264_add8x4_idct_neon
bl add8x4_idct_neon
bl add8x4_idct_neon
sub r0, r0, #8*FDEC_STRIDE-8
bl x264_add8x4_idct_neon
bl x264_add8x4_idct_neon
bl add8x4_idct_neon
bl add8x4_idct_neon
sub r0, r0, #8
bl x264_add8x4_idct_neon
bl x264_add8x4_idct_neon
bl add8x4_idct_neon
bl add8x4_idct_neon
sub r0, r0, #8*FDEC_STRIDE-8
bl x264_add8x4_idct_neon
bl add8x4_idct_neon
mov lr, ip
b x264_add8x4_idct_neon
b add8x4_idct_neon
endfunc
......@@ -435,7 +435,7 @@ endfunc
SUMSUB_AB q11, q12, q2, q12
.endm
function x264_add8x8_idct8_neon
function add8x8_idct8_neon
mov r2, #FDEC_STRIDE
vld1.64 {d16-d19}, [r1,:128]!
vld1.64 {d20-d23}, [r1,:128]!
......@@ -497,20 +497,20 @@ function x264_add8x8_idct8_neon
bx lr
endfunc
function x264_add16x16_idct8_neon
function add16x16_idct8_neon
mov ip, lr
bl X(x264_add8x8_idct8_neon)
bl X(add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
bl X(x264_add8x8_idct8_neon)
bl X(add8x8_idct8_neon)
sub r0, r0, #8
bl X(x264_add8x8_idct8_neon)
bl X(add8x8_idct8_neon)
sub r0, r0, #8*FDEC_STRIDE-8
mov lr, ip
b X(x264_add8x8_idct8_neon)
b X(add8x8_idct8_neon)
endfunc
function x264_add8x8_idct_dc_neon
function add8x8_idct_dc_neon
mov r2, #FDEC_STRIDE
vld1.64 {d16}, [r1,:64]
vrshr.s16 d16, d16, #6
......@@ -593,7 +593,7 @@ endfunc
vst1.64 {d22-d23}, [r2,:128], r3
.endm
function x264_add16x16_idct_dc_neon
function add16x16_idct_dc_neon
mov r2, r0
mov r3, #FDEC_STRIDE
vmov.i16 q15, #0
......@@ -609,7 +609,7 @@ function x264_add16x16_idct_dc_neon
bx lr
endfunc
function x264_sub8x8_dct_dc_neon
function sub8x8_dct_dc_neon
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d16}, [r1,:64], r3
......@@ -657,7 +657,7 @@ function x264_sub8x8_dct_dc_neon
bx lr
endfunc
function x264_sub8x16_dct_dc_neon
function sub8x16_dct_dc_neon
mov r3, #FENC_STRIDE
mov ip, #FDEC_STRIDE
vld1.64 {d16}, [r1,:64], r3
......@@ -751,7 +751,7 @@ function x264_sub8x16_dct_dc_neon
endfunc
function x264_zigzag_scan_4x4_frame_neon
function zigzag_scan_4x4_frame_neon
movrel r2, scan4x4_frame
vld1.64 {d0-d3}, [r1,:128]
vld1.64 {d16-d19}, [r2,:128]
......
......@@ -117,7 +117,7 @@
vqmovun.s16 d1, q12
.endm
function x264_deblock_v_luma_neon
function deblock_v_luma_neon
h264_loop_filter_start
vld1.64 {d0, d1}, [r0,:128], r1
......@@ -143,7 +143,7 @@ function x264_deblock_v_luma_neon
bx lr
endfunc
function x264_deblock_h_luma_neon
function deblock_h_luma_neon
h264_loop_filter_start
sub r0, r0, #4
......@@ -324,7 +324,7 @@ endfunc
.endm
function x264_deblock_v_luma_intra_neon
function deblock_v_luma_intra_neon
push {lr}
vld1.64 {d0, d1}, [r0,:128], r1
vld1.64 {d2, d3}, [r0,:128], r1
......@@ -352,7 +352,7 @@ function x264_deblock_v_luma_intra_neon
pop {pc}
endfunc
function x264_deblock_h_luma_intra_neon
function deblock_h_luma_intra_neon
push {lr}
sub r0, r0, #4
vld1.64 {d22}, [r0], r1
......@@ -447,7 +447,7 @@ endfunc
vqmovun.s16 d1, q12
.endm
function x264_deblock_v_chroma_neon
function deblock_v_chroma_neon
h264_loop_filter_start
sub r0, r0, r1, lsl #1
......@@ -465,7 +465,7 @@ function x264_deblock_v_chroma_neon
bx lr
endfunc
function x264_deblock_h_chroma_neon
function deblock_h_chroma_neon
h264_loop_filter_start
sub r0, r0, #4
......@@ -499,7 +499,7 @@ deblock_h_chroma:
bx lr
endfunc
function x264_deblock_h_chroma_422_neon
function deblock_h_chroma_422_neon
h264_loop_filter_start
push {lr}
sub r0, r0, #4
......@@ -547,7 +547,7 @@ endfunc
vqmovun.s16 d0, q11
.endm
function x264_deblock_h_chroma_mbaff_neon
function deblock_h_chroma_mbaff_neon
h264_loop_filter_start
sub r0, r0, #4
......@@ -610,7 +610,7 @@ endfunc
vbit q0, q2, q13
.endm
function x264_deblock_v_chroma_intra_neon
function deblock_v_chroma_intra_neon
sub r0, r0, r1, lsl #1
vld2.8 {d18,d19}, [r0,:128], r1
vld2.8 {d16,d17}, [r0,:128], r1
......@@ -626,7 +626,7 @@ function x264_deblock_v_chroma_intra_neon
bx lr
endfunc
function x264_deblock_h_chroma_intra_neon
function deblock_h_chroma_intra_neon
sub r0, r0, #4
vld1.8 {d18}, [r0], r1
vld1.8 {d16}, [r0], r1
......@@ -657,15 +657,15 @@ function x264_deblock_h_chroma_intra_neon
bx lr
endfunc
function x264_deblock_h_chroma_422_intra_neon
function deblock_h_chroma_422_intra_neon
push {lr}
bl X(x264_deblock_h_chroma_intra_neon)
bl X(deblock_h_chroma_intra_neon)
add r0, r0, #2
pop {lr}
b X(x264_deblock_h_chroma_intra_neon)
b X(deblock_h_chroma_intra_neon)
endfunc
function x264_deblock_h_chroma_intra_mbaff_neon
function deblock_h_chroma_intra_mbaff_neon
sub r0, r0, #4
vld1.8 {d18}, [r0], r1
vld1.8 {d16}, [r0], r1
......@@ -688,7 +688,7 @@ function x264_deblock_h_chroma_intra_mbaff_neon
bx lr
endfunc
function x264_deblock_strength_neon
function deblock_strength_neon
ldr ip, [sp]
vmov.i8 q8, #0
lsl ip, ip, #8
......
......@@ -38,7 +38,7 @@ endconst
// They also use nothing above armv5te, but we don't care about pre-armv6
// void prefetch_ref( uint8_t *pix, intptr_t stride, int parity )
function x264_prefetch_ref_arm
function prefetch_ref_arm
sub r2, r2, #1
add r0, r0, #64
and r2, r2, r1
......@@ -58,7 +58,7 @@ endfunc
// void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y,
// uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
function x264_prefetch_fenc_arm
function prefetch_fenc_arm
ldr ip, [sp]
push {lr}
and lr, ip, #3
......@@ -83,8 +83,8 @@ function x264_prefetch_fenc_arm
endfunc
// void *x264_memcpy_aligned( void *dst, const void *src, size_t n )
function x264_memcpy_aligned_neon
// void *memcpy_aligned( void *dst, const void *src, size_t n )
function memcpy_aligned_neon
orr r3, r0, r1, lsr #1
movrel ip, memcpy_table
and r3, r3, #0xc
......@@ -150,8 +150,8 @@ endconst
.ltorg
// void x264_memzero_aligned( void *dst, size_t n )
function x264_memzero_aligned_neon
// void memzero_aligned( void *dst, size_t n )
function memzero_aligned_neon
vmov.i8 q0, #0
vmov.i8 q1, #0
memzero_loop:
......@@ -168,18 +168,18 @@ endfunc
// uint8_t *src1, intptr_t src1_stride,
// uint8_t *src2, intptr_t src2_stride, int weight );
.macro AVGH w h
function x264_pixel_avg_\w\()x\h\()_neon
function pixel_avg_\w\()x\h\()_neon
ldr ip, [sp, #8]
push {r4-r6,lr}
cmp ip, #32
ldrd r4, r5, [sp, #16]
mov lr, #\h
beq x264_pixel_avg_w\w\()_neon
beq pixel_avg_w\w\()_neon
rsbs r6, ip, #64
blt x264_pixel_avg_weight_w\w\()_add_sub_neon // weight > 64
blt pixel_avg_weight_w\w\()_add_sub_neon // weight > 64
cmp ip, #0
bge x264_pixel_avg_weight_w\w\()_add_add_neon
b x264_pixel_avg_weight_w\w\()_sub_add_neon // weight < 0
bge pixel_avg_weight_w\w\()_add_add_neon
b pixel_avg_weight_w\w\()_sub_add_neon // weight < 0
endfunc
.endm
......@@ -244,7 +244,7 @@ AVGH 16, 16
.endm
.macro AVG_WEIGHT ext
function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
function pixel_avg_weight_w4_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #2
......@@ -260,7 +260,7 @@ function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
pop {r4-r6,pc}
endfunc
function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
function pixel_avg_weight_w8_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #4
......@@ -284,7 +284,7 @@ function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
pop {r4-r6,pc}
endfunc
function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
function pixel_avg_weight_w16_\ext\()_neon, export=0
load_weights_\ext
1: // height loop
subs lr, lr, #2
......@@ -309,7 +309,7 @@ AVG_WEIGHT add_add
AVG_WEIGHT add_sub
AVG_WEIGHT sub_add
function x264_pixel_avg_w4_neon, export=0
function pixel_avg_w4_neon, export=0
subs lr, lr, #2
vld1.32 {d0[]}, [r2], r3
vld1.32 {d2[]}, [r4], r5
......@@ -319,11 +319,11 @@ function x264_pixel_avg_w4_neon, export=0
vrhadd.u8 d1, d1, d3
vst1.32 {d0[0]}, [r0,:32], r1
vst1.32 {d1[0]}, [r0,:32], r1
bgt x264_pixel_avg_w4_neon
bgt pixel_avg_w4_neon
pop {r4-r6,pc}
endfunc
function x264_pixel_avg_w8_neon, export=0
function pixel_avg_w8_neon, export=0
subs lr, lr, #4
vld1.64 {d0}, [r2], r3
vld1.64 {d2}, [r4], r5
......@@ -341,11 +341,11 @@ function x264_pixel_avg_w8_neon, export=0
vrhadd.u8 d3, d3, d5
vst1.64 {d2}, [r0,:64], r1
vst1.64 {d3}, [r0,:64], r1
bgt x264_pixel_avg_w8_neon
bgt pixel_avg_w8_neon
pop {r4-r6,pc}
endfunc
function x264_pixel_avg_w16_neon, export=0
function pixel_avg_w16_neon, export=0
subs lr, lr, #4
vld1.64 {d0-d1}, [r2], r3
vld1.64 {d2-d3}, [r4], r5
......@@ -363,12 +363,12 @@ function x264_pixel_avg_w16_neon, export=0
vrhadd.u8 q3, q3, q0
vst1.64 {d4-d5}, [r0,:128], r1
vst1.64 {d6-d7}, [r0,:128], r1
bgt x264_pixel_avg_w16_neon
bgt pixel_avg_w16_neon
pop {r4-r6,pc}
endfunc
function x264_pixel_avg2_w4_neon
function pixel_avg2_w4_neon
ldr ip, [sp, #4]
push {lr}
ldr lr, [sp, #4]
......@@ -386,7 +386,7 @@ avg2_w4_loop:
pop {pc}
endfunc
function x264_pixel_avg2_w8_neon
function pixel_avg2_w8_neon
ldr ip, [sp, #4]
push {lr}
ldr lr, [sp, #4]
......@@ -404,7 +404,7 @@ avg2_w8_loop:
pop {pc}
endfunc
function x264_pixel_avg2_w16_neon
function pixel_avg2_w16_neon
ldr ip, [sp, #4]
push {lr}
ldr lr, [sp, #4]
......@@ -422,7 +422,7 @@ avg2_w16_loop:
pop {pc}
endfunc
function x264_pixel_avg2_w20_neon
function pixel_avg2_w20_neon
ldr ip, [sp, #4]
push {lr}
sub r1, r1, #16
......@@ -464,7 +464,7 @@ endfunc
// void mc_weight( uint8_t *src, intptr_t src_stride, uint8_t *dst, intptr_t dst_stride,
// const x264_weight_t *weight, int height )
function x264_mc_weight_w20_neon
function mc_weight_w20_neon
weight_prologue full
sub r1, #16
weight20_loop:
......@@ -500,7 +500,7 @@ weight20_loop:
pop {r4-r5,pc}
endfunc
function x264_mc_weight_w16_neon
function mc_weight_w16_neon
weight_prologue full
weight16_loop:
subs ip, #2
......@@ -528,7 +528,7 @@ weight16_loop:
pop {r4-r5,pc}
endfunc
function x264_mc_weight_w8_neon
function mc_weight_w8_neon
weight_prologue full
weight8_loop:
subs ip, #2
......@@ -548,7 +548,7 @@ weight8_loop:
pop {r4-r5,pc}
endfunc
function x264_mc_weight_w4_neon
function mc_weight_w4_neon
weight_prologue full