Commit 7839a9e1 authored by Vittorio Giovara's avatar Vittorio Giovara Committed by Anton Mitrofanov

aarch64: Set the function symbol prefix in a single location

parent 498cca0b
......@@ -28,9 +28,9 @@
#include "config.h"
#ifdef PREFIX
# define EXTERN_ASM _
# define EXTERN_ASM _x264_
#else
# define EXTERN_ASM
# define EXTERN_ASM x264_
#endif
#ifdef __ELF__
......@@ -53,7 +53,11 @@
.macro function name, export=0, align=2
.macro endfunc
.if \export
ELF .size EXTERN_ASM\name, . - EXTERN_ASM\name
.else
ELF .size \name, . - \name
.endif
FUNC .endfunc
.purgem endfunc
.endm
......
......@@ -25,7 +25,7 @@
#include "asm.S"
function x264_nal_escape_neon, export=1
function nal_escape_neon, export=1
movi v0.16b, #0xff
movi v4.16b, #4
mov w3, #3
......
......@@ -29,9 +29,9 @@
// w11 holds x264_cabac_t.i_low
// w12 holds x264_cabac_t.i_range
function x264_cabac_encode_decision_asm, export=1
movrel x8, X(x264_cabac_range_lps)
movrel x9, X(x264_cabac_transition)
function cabac_encode_decision_asm, export=1
movrel x8, X(cabac_range_lps)
movrel x9, X(cabac_transition)
add w10, w1, #CABAC_STATE
ldrb w3, [x0, x10] // i_state
ldr w12, [x0, #CABAC_I_RANGE]
......@@ -101,7 +101,7 @@ cabac_putbyte:
ret
endfunc
function x264_cabac_encode_bypass_asm, export=1
function cabac_encode_bypass_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
ldr w11, [x0, #CABAC_I_LOW]
ldr w2, [x0, #CABAC_I_QUEUE]
......@@ -114,7 +114,7 @@ function x264_cabac_encode_bypass_asm, export=1
ret
endfunc
function x264_cabac_encode_terminal_asm, export=1
function cabac_encode_terminal_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
ldr w11, [x0, #CABAC_I_LOW]
sub w12, w12, #2
......
......@@ -79,7 +79,7 @@ endconst
.endm
function x264_dct4x4dc_neon, export=1
function dct4x4dc_neon, export=1
ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x0]
movi v31.4h, #1
SUMSUB_AB v4.4h, v5.4h, v0.4h, v1.4h
......@@ -102,7 +102,7 @@ function x264_dct4x4dc_neon, export=1
ret
endfunc
function x264_idct4x4dc_neon, export=1
function idct4x4dc_neon, export=1
ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x0]
SUMSUB_AB v4.4h, v5.4h, v0.4h, v1.4h
SUMSUB_AB v6.4h, v7.4h, v2.4h, v3.4h
......@@ -131,7 +131,7 @@ endfunc
sub \v3, \v7, \v5
.endm
function x264_sub4x4_dct_neon, export=1
function sub4x4_dct_neon, export=1
mov x3, #FENC_STRIDE
mov x4, #FDEC_STRIDE
ld1 {v0.s}[0], [x1], x3
......@@ -154,7 +154,7 @@ function x264_sub4x4_dct_neon, export=1
ret
endfunc
function x264_sub8x4_dct_neon
function sub8x4_dct_neon
ld1 {v0.8b}, [x1], x3
ld1 {v1.8b}, [x2], x4
usubl v16.8h, v0.8b, v1.8b
......@@ -193,34 +193,34 @@ function x264_sub8x4_dct_neon
ret
endfunc
function x264_sub8x8_dct_neon, export=1
function sub8x8_dct_neon, export=1
mov x5, x30
mov x3, #FENC_STRIDE
mov x4, #FDEC_STRIDE
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
mov x30, x5
b x264_sub8x4_dct_neon
b sub8x4_dct_neon
endfunc
function x264_sub16x16_dct_neon, export=1
function sub16x16_dct_neon, export=1
mov x5, x30
mov x3, #FENC_STRIDE
mov x4, #FDEC_STRIDE
bl x264_sub8x4_dct_neon
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
bl sub8x4_dct_neon
sub x1, x1, #8*FENC_STRIDE-8
sub x2, x2, #8*FDEC_STRIDE-8
bl x264_sub8x4_dct_neon
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
bl sub8x4_dct_neon
sub x1, x1, #8
sub x2, x2, #8
bl x264_sub8x4_dct_neon
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
bl sub8x4_dct_neon
sub x1, x1, #8*FENC_STRIDE-8
sub x2, x2, #8*FDEC_STRIDE-8
bl x264_sub8x4_dct_neon
bl sub8x4_dct_neon
mov x30, x5
b x264_sub8x4_dct_neon
b sub8x4_dct_neon
endfunc
......@@ -255,7 +255,7 @@ endfunc
SUMSUB_SHR2 2, v3.8h, v5.8h, v30.8h, v29.8h, v20.8h, v21.8h
.endm
function x264_sub8x8_dct8_neon, export=1
function sub8x8_dct8_neon, export=1
mov x3, #FENC_STRIDE
mov x4, #FDEC_STRIDE
ld1 {v16.8b}, [x1], x3
......@@ -292,19 +292,19 @@ function x264_sub8x8_dct8_neon, export=1
ret
endfunc
function x264_sub16x16_dct8_neon, export=1
function sub16x16_dct8_neon, export=1
mov x7, x30
bl X(x264_sub8x8_dct8_neon)
bl X(sub8x8_dct8_neon)
sub x1, x1, #FENC_STRIDE*8 - 8
sub x2, x2, #FDEC_STRIDE*8 - 8
bl X(x264_sub8x8_dct8_neon)
bl X(sub8x8_dct8_neon)
sub x1, x1, #8
sub x2, x2, #8
bl X(x264_sub8x8_dct8_neon)
bl X(sub8x8_dct8_neon)
mov x30, x7
sub x1, x1, #FENC_STRIDE*8 - 8
sub x2, x2, #FDEC_STRIDE*8 - 8
b X(x264_sub8x8_dct8_neon)
b X(sub8x8_dct8_neon)
endfunc
......@@ -317,7 +317,7 @@ endfunc
add \d6, \d6, \d1
.endm
function x264_add4x4_idct_neon, export=1
function add4x4_idct_neon, export=1
mov x2, #FDEC_STRIDE
ld1 {v0.4h,v1.4h,v2.4h,v3.4h}, [x1]
......@@ -357,7 +357,7 @@ function x264_add4x4_idct_neon, export=1
ret
endfunc
function x264_add8x4_idct_neon, export=1
function add8x4_idct_neon, export=1
ld1 {v0.8h,v1.8h}, [x1], #32
ld1 {v2.8h,v3.8h}, [x1], #32
transpose v20.2d, v21.2d, v0.2d, v2.2d
......@@ -398,29 +398,29 @@ function x264_add8x4_idct_neon, export=1
ret
endfunc
function x264_add8x8_idct_neon, export=1
function add8x8_idct_neon, export=1
mov x2, #FDEC_STRIDE
mov x5, x30
bl X(x264_add8x4_idct_neon)
bl X(add8x4_idct_neon)
mov x30, x5
b X(x264_add8x4_idct_neon)
b X(add8x4_idct_neon)
endfunc
function x264_add16x16_idct_neon, export=1
function add16x16_idct_neon, export=1
mov x2, #FDEC_STRIDE
mov x5, x30
bl X(x264_add8x4_idct_neon)
bl X(x264_add8x4_idct_neon)
bl X(add8x4_idct_neon)
bl X(add8x4_idct_neon)
sub x0, x0, #8*FDEC_STRIDE-8
bl X(x264_add8x4_idct_neon)
bl X(x264_add8x4_idct_neon)
bl X(add8x4_idct_neon)
bl X(add8x4_idct_neon)
sub x0, x0, #8
bl X(x264_add8x4_idct_neon)
bl X(x264_add8x4_idct_neon)
bl X(add8x4_idct_neon)
bl X(add8x4_idct_neon)
sub x0, x0, #8*FDEC_STRIDE-8
bl X(x264_add8x4_idct_neon)
bl X(add8x4_idct_neon)
mov x30, x5
b X(x264_add8x4_idct_neon)
b X(add8x4_idct_neon)
endfunc
.macro IDCT8_1D type
......@@ -446,7 +446,7 @@ endfunc
SUMSUB_AB v19.8h, v20.8h, v2.8h, v20.8h
.endm
function x264_add8x8_idct8_neon, export=1
function add8x8_idct8_neon, export=1
mov x2, #FDEC_STRIDE
ld1 {v16.8h,v17.8h}, [x1], #32
ld1 {v18.8h,v19.8h}, [x1], #32
......@@ -503,19 +503,19 @@ function x264_add8x8_idct8_neon, export=1
ret
endfunc
function x264_add16x16_idct8_neon, export=1
function add16x16_idct8_neon, export=1
mov x7, x30
bl X(x264_add8x8_idct8_neon)
bl X(add8x8_idct8_neon)
sub x0, x0, #8*FDEC_STRIDE-8
bl X(x264_add8x8_idct8_neon)
bl X(add8x8_idct8_neon)
sub x0, x0, #8
bl X(x264_add8x8_idct8_neon)
bl X(add8x8_idct8_neon)
sub x0, x0, #8*FDEC_STRIDE-8
mov x30, x7
b X(x264_add8x8_idct8_neon)
b X(add8x8_idct8_neon)
endfunc
function x264_add8x8_idct_dc_neon, export=1
function add8x8_idct_dc_neon, export=1
mov x2, #FDEC_STRIDE
ld1 {v16.4h}, [x1]
ld1 {v0.8b}, [x0], x2
......@@ -605,7 +605,7 @@ endfunc
st1 {v7.16b}, [x2], x3
.endm
function x264_add16x16_idct_dc_neon, export=1
function add16x16_idct_dc_neon, export=1
mov x2, x0
mov x3, #FDEC_STRIDE
......@@ -640,7 +640,7 @@ endfunc
add \dst\().8h, \dst\().8h, \t3\().8h
.endm
function x264_sub8x8_dct_dc_neon, export=1
function sub8x8_dct_dc_neon, export=1
mov x3, #FENC_STRIDE
mov x4, #FDEC_STRIDE
......@@ -660,7 +660,7 @@ function x264_sub8x8_dct_dc_neon, export=1
ret
endfunc
function x264_sub8x16_dct_dc_neon, export=1
function sub8x16_dct_dc_neon, export=1
mov x3, #FENC_STRIDE
mov x4, #FDEC_STRIDE
sub4x4x2_dct_dc v0, v16, v17, v18, v19, v20, v21, v22, v23
......@@ -689,7 +689,7 @@ function x264_sub8x16_dct_dc_neon, export=1
ret
endfunc
function x264_zigzag_interleave_8x8_cavlc_neon, export=1
function zigzag_interleave_8x8_cavlc_neon, export=1
mov x3, #7
movi v31.4s, #1
ld4 {v0.8h,v1.8h,v2.8h,v3.8h}, [x1], #64
......@@ -718,7 +718,7 @@ function x264_zigzag_interleave_8x8_cavlc_neon, export=1
ret
endfunc
function x264_zigzag_scan_4x4_frame_neon, export=1
function zigzag_scan_4x4_frame_neon, export=1
movrel x2, scan4x4_frame
ld1 {v0.16b,v1.16b}, [x1]
ld1 {v16.16b,v17.16b}, [x2]
......@@ -729,7 +729,7 @@ function x264_zigzag_scan_4x4_frame_neon, export=1
endfunc
.macro zigzag_sub_4x4 f ac
function x264_zigzag_sub_4x4\ac\()_\f\()_neon, export=1
function zigzag_sub_4x4\ac\()_\f\()_neon, export=1
mov x9, #FENC_STRIDE
mov x4, #FDEC_STRIDE
movrel x5, sub4x4_\f
......@@ -772,7 +772,7 @@ zigzag_sub_4x4 field, ac
zigzag_sub_4x4 frame
zigzag_sub_4x4 frame, ac
function x264_zigzag_scan_4x4_field_neon, export=1
function zigzag_scan_4x4_field_neon, export=1
movrel x2, scan4x4_field
ld1 {v0.8h,v1.8h}, [x1]
ld1 {v16.16b}, [x2]
......@@ -781,7 +781,7 @@ function x264_zigzag_scan_4x4_field_neon, export=1
ret
endfunc
function x264_zigzag_scan_8x8_frame_neon, export=1
function zigzag_scan_8x8_frame_neon, export=1
movrel x2, scan8x8_frame
ld1 {v0.8h,v1.8h}, [x1], #32
ld1 {v2.8h,v3.8h}, [x1], #32
......@@ -841,7 +841,7 @@ const scan8x8_frame, align=5
.byte T(7,5), T(7,6), T(6,7), T(7,7)
endconst
function x264_zigzag_scan_8x8_field_neon, export=1
function zigzag_scan_8x8_field_neon, export=1
movrel x2, scan8x8_field
ld1 {v0.8h,v1.8h}, [x1], #32
ld1 {v2.8h,v3.8h}, [x1], #32
......@@ -868,7 +868,7 @@ function x264_zigzag_scan_8x8_field_neon, export=1
endfunc
.macro zigzag_sub8x8 f
function x264_zigzag_sub_8x8_\f\()_neon, export=1
function zigzag_sub_8x8_\f\()_neon, export=1
movrel x4, sub8x8_\f
mov x5, #FENC_STRIDE
mov x6, #FDEC_STRIDE
......
......@@ -108,7 +108,7 @@
sqxtun2 v0.16b, v24.8h
.endm
function x264_deblock_v_luma_neon, export=1
function deblock_v_luma_neon, export=1
h264_loop_filter_start
ld1 {v0.16b}, [x0], x1
......@@ -131,7 +131,7 @@ function x264_deblock_v_luma_neon, export=1
ret
endfunc
function x264_deblock_h_luma_neon, export=1
function deblock_h_luma_neon, export=1
h264_loop_filter_start
sub x0, x0, #4
......@@ -302,7 +302,7 @@ endfunc
bit v2.16b, v26.16b, v18.16b // q2'_2
.endm
function x264_deblock_v_luma_intra_neon, export=1
function deblock_v_luma_intra_neon, export=1
h264_loop_filter_start_intra
ld1 {v0.16b}, [x0], x1 // q0
......@@ -328,7 +328,7 @@ function x264_deblock_v_luma_intra_neon, export=1
ret
endfunc
function x264_deblock_h_luma_intra_neon, export=1
function deblock_h_luma_intra_neon, export=1
h264_loop_filter_start_intra
sub x0, x0, #4
......@@ -421,7 +421,7 @@ endfunc
sqxtun2 v0.16b, v23.8h
.endm
function x264_deblock_v_chroma_neon, export=1
function deblock_v_chroma_neon, export=1
h264_loop_filter_start
sub x0, x0, x1, lsl #1
......@@ -439,7 +439,7 @@ function x264_deblock_v_chroma_neon, export=1
ret
endfunc
function x264_deblock_h_chroma_neon, export=1
function deblock_h_chroma_neon, export=1
h264_loop_filter_start
sub x0, x0, #4
......@@ -472,7 +472,7 @@ deblock_h_chroma:
ret
endfunc
function x264_deblock_h_chroma_422_neon, export=1
function deblock_h_chroma_422_neon, export=1
add x5, x0, x1
sub x0, x0, #4
add x1, x1, x1
......@@ -516,7 +516,7 @@ endfunc
sqxtun v17.8b, v22.8h
.endm
function x264_deblock_h_chroma_mbaff_neon, export=1
function deblock_h_chroma_mbaff_neon, export=1
h264_loop_filter_start
sub x4, x0, #4
......@@ -575,7 +575,7 @@ endfunc
bit v17.16b, v25.16b, v26.16b
.endm
function x264_deblock_v_chroma_intra_neon, export=1
function deblock_v_chroma_intra_neon, export=1
h264_loop_filter_start_intra
sub x0, x0, x1, lsl #1
......@@ -593,7 +593,7 @@ function x264_deblock_v_chroma_intra_neon, export=1
ret
endfunc
function x264_deblock_h_chroma_intra_mbaff_neon, export=1
function deblock_h_chroma_intra_mbaff_neon, export=1
h264_loop_filter_start_intra
sub x4, x0, #4
......@@ -615,7 +615,7 @@ function x264_deblock_h_chroma_intra_mbaff_neon, export=1
ret
endfunc
function x264_deblock_h_chroma_intra_neon, export=1
function deblock_h_chroma_intra_neon, export=1
h264_loop_filter_start_intra
sub x4, x0, #4
......@@ -645,7 +645,7 @@ function x264_deblock_h_chroma_intra_neon, export=1
ret
endfunc
function x264_deblock_h_chroma_422_intra_neon, export=1
function deblock_h_chroma_422_intra_neon, export=1
h264_loop_filter_start_intra
sub x4, x0, #4
......@@ -697,12 +697,12 @@ function x264_deblock_h_chroma_422_intra_neon, export=1
ret
endfunc
//static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE],
// int8_t ref[2][X264_SCAN8_LUMA_SIZE],
// int16_t mv[2][X264_SCAN8_LUMA_SIZE][2],
// uint8_t bs[2][8][4], int mvy_limit,
// int bframe )
function x264_deblock_strength_neon, export=1
// void deblock_strength( uint8_t nnz[X264_SCAN8_SIZE],
// int8_t ref[2][X264_SCAN8_LUMA_SIZE],
// int16_t mv[2][X264_SCAN8_LUMA_SIZE][2],
// uint8_t bs[2][8][4], int mvy_limit,
// int bframe )
function deblock_strength_neon, export=1
movi v4.16b, #0
lsl w4, w4, #8
add x3, x3, #32
......
......@@ -31,7 +31,7 @@
// note: prefetch stuff assumes 64-byte cacheline
// void prefetch_ref( uint8_t *pix, intptr_t stride, int parity )
function x264_prefetch_ref_aarch64, export=1
function prefetch_ref_aarch64, export=1
cmp w2, #1
csel x2, xzr, x1, eq
add x0, x0, #64
......@@ -54,8 +54,8 @@ endfunc
// void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y,
// uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
.macro x264_prefetch_fenc sub
function x264_prefetch_fenc_\sub\()_aarch64, export=1
.macro prefetch_fenc sub
function prefetch_fenc_\sub\()_aarch64, export=1
and w6, w5, #3
and w7, w5, #3
mul x6, x6, x1
......@@ -82,14 +82,14 @@ function x264_prefetch_fenc_\sub\()_aarch64, export=1
endfunc
.endm
x264_prefetch_fenc 420
x264_prefetch_fenc 422
prefetch_fenc 420
prefetch_fenc 422
// void pixel_avg( uint8_t *dst, intptr_t dst_stride,
// uint8_t *src1, intptr_t src1_stride,
// uint8_t *src2, intptr_t src2_stride, int weight );
.macro AVGH w h
function x264_pixel_avg_\w\()x\h\()_neon, export=1
function pixel_avg_\w\()x\h\()_neon, export=1
mov w10, #64
cmp w6, #32
mov w9, #\h
......@@ -292,7 +292,7 @@ function pixel_avg_w16_neon
ret
endfunc
function x264_pixel_avg2_w4_neon, export=1
function pixel_avg2_w4_neon, export=1
1:
subs w5, w5, #2
ld1 {v0.s}[0], [x2], x3
......@@ -307,7 +307,7 @@ function x264_pixel_avg2_w4_neon, export=1
ret
endfunc
function x264_pixel_avg2_w8_neon, export=1
function pixel_avg2_w8_neon, export=1
1:
subs w5, w5, #2
ld1 {v0.8b}, [x2], x3
......@@ -322,7 +322,7 @@ function x264_pixel_avg2_w8_neon, export=1
ret
endfunc
function x264_pixel_avg2_w16_neon, export=1
function pixel_avg2_w16_neon, export=1
1:
subs w5, w5, #2
ld1 {v0.16b}, [x2], x3
......@@ -337,7 +337,7 @@ function x264_pixel_avg2_w16_neon, export=1
ret
endfunc
function x264_pixel_avg2_w20_neon, export=1
function pixel_avg2_w20_neon, export=1
sub x1, x1, #16
1:
subs w5, w5, #2
......@@ -373,7 +373,7 @@ endfunc
// void mc_weight( uint8_t *src, intptr_t src_stride, uint8_t *dst,
// intptr_t dst_stride, const x264_weight_t *weight, int h )
function x264_mc_weight_w20_neon, export=1
function mc_weight_w20_neon, export=1
weight_prologue full
sub x1, x1, #16
1:
......@@ -409,7 +409,7 @@ function x264_mc_weight_w20_neon, export=1
ret
endfunc
function x264_mc_weight_w16_neon, export=1
function mc_weight_w16_neon, export=1
weight_prologue full
weight16_loop:
1:
......@@ -438,7 +438,7 @@ weight16_loop:
ret
endfunc
function x264_mc_weight_w8_neon, export=1
function mc_weight_w8_neon, export=1
weight_prologue full
1:
subs w9, w9, #2
......@@ -458,7 +458,7 @@ function x264_mc_weight_w8_neon, export=1
ret
endfunc
function x264_mc_weight_w4_neon, export=1
function mc_weight_w4_neon, export=1
weight_prologue full
1:
subs w9, w9, #2
......@@ -474,7 +474,7 @@ function x264_mc_weight_w4_neon, export=1
ret
endfunc
function x264_mc_weight_w20_nodenom_neon, export=1
function mc_weight_w20_nodenom_neon, export=1
weight_prologue nodenom
sub x1, x1, #16
1:
......@@ -505,7 +505,7 @@ function x264_mc_weight_w20_nodenom_neon, export=1
ret
endfunc
function x264_mc_weight_w16_nodenom_neon, export=1
function mc_weight_w16_nodenom_neon, export=1
weight_prologue nodenom
1:
subs w9, w9, #2
......@@ -529,7 +529,7 @@ function x264_mc_weight_w16_nodenom_neon, export=1
ret
endfunc
function x264_mc_weight_w8_nodenom_neon, export=1
function mc_weight_w8_nodenom_neon, export=1
weight_prologue nodenom
1:
subs w9, w9, #2
......@@ -547,7 +547,7 @@ function x264_mc_weight_w8_nodenom_neon, export=1
ret
endfunc
function x264_mc_weight_w4_nodenom_neon, export=1
function mc_weight_w4_nodenom_neon, export=1
weight_prologue nodenom
1:
subs w9, w9, #2
......@@ -568,7 +568,7 @@ endfunc
.endm
.macro weight_simple name op
function x264_mc_weight_w20_\name\()_neon, export=1
function mc_weight_w20_\name\()_neon, export=1
weight_simple_prologue
1:
subs w5, w5, #2
......@@ -588,7 +588,7 @@ function x264_mc_weight_w20_\name\()_neon, export=1
ret
endfunc
function x264_mc_weight_w16_\name\()_neon, export=1
function mc_weight_w16_\name\()_neon, export=1
weight_simple_prologue
1:
subs w5, w5, #2
......@@ -602,7 +602,7 @@ function x264_mc_weight_w16_\name\()_neon, export=1
ret
endfunc
function x264_mc_weight_w8_\name\()_neon, export=1
function mc_weight_w8_\name\()_neon, export=1
weight_simple_prologue
1:
subs w5, w5, #2
......@@ -616,7 +616,7 @@ function x264_mc_weight_w8_\name\()_neon, export=1
ret
endfunc
function x264_mc_weight_w4_\name\()_neon, export=1
function mc_weight_w4_\name\()_neon, export=1
weight_simple_prologue
1:
subs w5, w5, #2
......@@ -635,7 +635,7 @@ weight_simple offsetsub, uqsub
// void mc_copy( uint8_t *dst, intptr_t dst_stride, uint8_t *src, intptr_t src_stride, int height )
function x264_mc_copy_w4_neon, export=1
function mc_copy_w4_neon, export=1
1:
subs w4, w4, #4
ld1 {v0.s}[0], [x2], x3
......@@ -650,7 +650,7 @@ function x264_mc_copy_w4_neon, export=1
ret
endfunc
function x264_mc_copy_w8_neon, export=1
function mc_copy_w8_neon, export=1
1: subs w4, w4, #4
ld1 {v0.8b}, [x2], x3
ld1 {v1.8b}, [x2], x3
......@@ -664,7 +664,7 @@ function x264_mc_copy_w8_neon, export=1
ret
endfunc
function x264_mc_copy_w16_neon, export=1
function mc_copy_w16_neon, export=1
1: subs w4, w4, #4
ld1 {v0.16b}, [x2], x3
ld1 {v1.16b}, [x2], x3
......@@ -678,11 +678,11 @@ function x264_mc_copy_w16_neon, export=1
ret
endfunc
// void x264_mc_chroma_neon( uint8_t *dst_u, uint8_t *dst_v,
// intptr_t i_dst_stride,
// uint8_t *src, intptr_t i_src_stride,
// int dx, int dy, int i_width, int i_height );
function x264_mc_chroma_neon, export=1
// void mc_chroma( uint8_t *dst_u, uint8_t *dst_v,
// intptr_t i_dst_stride,
// uint8_t *src, intptr_t i_src_stride,
// int dx, int dy, int i_width, int i_height );
function mc_chroma_neon, export=1
ldr w15, [sp] // height
sbfx x12, x6, #3, #29 // asr(3) and sign extend
sbfx x11, x5, #3, #29 // asr(3) and sign extend
......@@ -1016,9 +1016,9 @@ function mc_chroma_w8_neon
ret
endfunc
//void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
// intptr_t stride, int width, int height, int16_t *buf )
function x264_hpel_filter_neon, export=1
// void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
// intptr_t stride, int width, int height, int16_t *buf )
function hpel_filter_neon, export=1
ubfm x9, x3, #0, #3
add w15, w5, w9
sub x13, x3, x9 // align src
......@@ -1158,7 +1158,7 @@ endfunc
// frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth,
// uint8_t *dstv, uint8_t *dstc, intptr_t src_stride,
// intptr_t dst_stride, int width, int height )
function x264_frame_init_lowres_core_neon, export=1
function frame_init_lowres_core_neon, export=1
ldr w8, [sp]
sub x10, x6, w7, uxtw // dst_stride - width
and x10, x10, #~15
......@@ -1233,12 +1233,12 @@ function x264_frame_init_lowres_core_neon, export=1
ret
endfunc
function x264_load_deinterleave_chroma_fenc_neon, export=1
function load_deinterleave_chroma_fenc_neon, export=1
mov x4, #FENC_STRIDE/2
b load_deinterleave_chroma
endfunc
function x264_load_deinterleave_chroma_fdec_neon, export=1
function load_deinterleave_chroma_fdec_neon, export=1
mov x4, #FDEC_STRIDE/2
load_deinterleave_chroma:
ld2 {v0.8b,v1.8b}, [x1], x2
......@@ -1253,7 +1253,7 @@ load_deinterleave_chroma:
ret
endfunc
function x264_plane_copy_core_neon, export=1
function plane_copy_core_neon, export=1
add w8, w4, #15 // 32-bit write clears the upper 32-bit the register
and w4, w8, #~15
// safe use of the full reg since negative width makes no sense
......@@ -1282,7 +1282,7 @@ function x264_plane_copy_core_neon, export=1
ret
endfunc
function x264_plane_copy_swap_core_neon, export=1
function plane_copy_swap_core_neon, export=1
lsl w4, w4, #1
sub x1, x1, x4
sub x3, x3, x4
......@@ -1310,7 +1310,7 @@ function x264_plane_copy_swap_core_neon, export=1
ret
endfunc
function x264_plane_copy_deinterleave_neon, export=1
function plane_copy_deinterleave_neon, export=1
add w9, w6, #15
and w9, w9, #0xfffffff0
sub x1, x1, x9
......@@ -1349,7 +1349,7 @@ endfunc
b.gt 1b
.endm
function x264_plane_copy_deinterleave_rgb_neon, export=1
function plane_copy_deinterleave_rgb_neon, export=1
#if SYS_MACOSX
ldr w8, [sp]
ldp w9, w10, [sp, #4]
......@@ -1381,7 +1381,7 @@ function x264_plane_copy_deinterleave_rgb_neon, export=1
ret
endfunc
function x264_plane_copy_interleave_core_neon, export=1
function plane_copy_interleave_core_neon, export=1
add w9, w6, #15
and w9, w9, #0xfffffff0
sub x1, x1, x9, lsl #1
......@@ -1404,7 +1404,7 @@ function x264_plane_copy_interleave_core_neon, export=1
ret
endfunc
function x264_store_interleave_chroma_neon, export=1
function store_interleave_chroma_neon, export=1
mov x5, #FDEC_STRIDE
1:
ld1 {v0.8b}, [x2], x5
......@@ -1431,7 +1431,7 @@ endfunc
add v0.8h, v0.8h, v5.8h
.endm
function x264_integral_init4h_neon, export=1
function integral_init4h_neon, export=1
sub x3, x0, x2, lsl #1
ld1 {v6.8b,v7.8b}, [x1], #16
1:
......@@ -1466,7 +1466,7 @@ endfunc
add v0.8h, v0.8h, \s\().8h
.endm
function x264_integral_init8h_neon, export=1
function integral_init8h_neon, export=1
sub x3, x0, x2, lsl #1
ld1 {v16.8b,v17.8b}, [x1], #16
1:
......@@ -1483,7 +1483,7 @@ function x264_integral_init8h_neon, export=1
ret
endfunc
function x264_integral_init4v_neon, export=1
function integral_init4v_neon, export=1
mov x3, x0
add x4, x0, x2, lsl #3
add x8, x0, x2, lsl #4
......@@ -1518,7 +1518,7 @@ function x264_integral_init4v_neon, export=1
ret
endfunc
function x264_integral_init8v_neon, export=1
function integral_init8v_neon, export=1
add x2, x0, x1, lsl #4