Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
VideoLAN
x264
Commits
7839a9e1
Commit
7839a9e1
authored
Jan 17, 2017
by
Vittorio Giovara
Committed by
Anton Mitrofanov
Dec 24, 2017
Browse files
aarch64: Set the function symbol prefix in a single location
parent
498cca0b
Changes
10
Hide whitespace changes
Inline
Side-by-side
common/aarch64/asm.S
View file @
7839a9e1
...
...
@@ -28,9 +28,9 @@
#include "config.h"
#ifdef PREFIX
# define EXTERN_ASM _
# define EXTERN_ASM _
x264_
#else
# define EXTERN_ASM
# define EXTERN_ASM
x264_
#endif
#ifdef __ELF__
...
...
@@ -53,7 +53,11 @@
.
macro
function
name
,
export
=
0
,
align
=
2
.
macro
endfunc
.
if
\
export
ELF
.
size
EXTERN_ASM
\
name
,
.
-
EXTERN_ASM
\
name
.
else
ELF
.
size
\
name
,
.
-
\
name
.
endif
FUNC
.
endfunc
.
purgem
endfunc
.
endm
...
...
common/aarch64/bitstream-a.S
View file @
7839a9e1
...
...
@@ -25,7 +25,7 @@
#include "asm.S"
function
x264_
nal_escape_neon
,
export
=
1
function
nal_escape_neon
,
export
=
1
movi
v0
.16
b
,
#
0xff
movi
v4
.16
b
,
#
4
mov
w3
,
#
3
...
...
common/aarch64/cabac-a.S
View file @
7839a9e1
...
...
@@ -29,9 +29,9 @@
//
w11
holds
x264_cabac_t
.
i_low
//
w12
holds
x264_cabac_t
.
i_range
function
x264_
cabac_encode_decision_asm
,
export
=
1
movrel
x8
,
X
(
x264_
cabac_range_lps
)
movrel
x9
,
X
(
x264_
cabac_transition
)
function
cabac_encode_decision_asm
,
export
=
1
movrel
x8
,
X
(
cabac_range_lps
)
movrel
x9
,
X
(
cabac_transition
)
add
w10
,
w1
,
#
CABAC_STATE
ldrb
w3
,
[
x0
,
x10
]
//
i_state
ldr
w12
,
[
x0
,
#
CABAC_I_RANGE
]
...
...
@@ -101,7 +101,7 @@ cabac_putbyte:
ret
endfunc
function
x264_
cabac_encode_bypass_asm
,
export
=
1
function
cabac_encode_bypass_asm
,
export
=
1
ldr
w12
,
[
x0
,
#
CABAC_I_RANGE
]
ldr
w11
,
[
x0
,
#
CABAC_I_LOW
]
ldr
w2
,
[
x0
,
#
CABAC_I_QUEUE
]
...
...
@@ -114,7 +114,7 @@ function x264_cabac_encode_bypass_asm, export=1
ret
endfunc
function
x264_
cabac_encode_terminal_asm
,
export
=
1
function
cabac_encode_terminal_asm
,
export
=
1
ldr
w12
,
[
x0
,
#
CABAC_I_RANGE
]
ldr
w11
,
[
x0
,
#
CABAC_I_LOW
]
sub
w12
,
w12
,
#
2
...
...
common/aarch64/dct-a.S
View file @
7839a9e1
...
...
@@ -79,7 +79,7 @@ endconst
.
endm
function
x264_
dct4x4dc_neon
,
export
=
1
function
dct4x4dc_neon
,
export
=
1
ld1
{
v0
.4
h
,
v1
.4
h
,
v2
.4
h
,
v3
.4
h
},
[
x0
]
movi
v31
.4
h
,
#
1
SUMSUB_AB
v4
.4
h
,
v5
.4
h
,
v0
.4
h
,
v1
.4
h
...
...
@@ -102,7 +102,7 @@ function x264_dct4x4dc_neon, export=1
ret
endfunc
function
x264_
idct4x4dc_neon
,
export
=
1
function
idct4x4dc_neon
,
export
=
1
ld1
{
v0
.4
h
,
v1
.4
h
,
v2
.4
h
,
v3
.4
h
},
[
x0
]
SUMSUB_AB
v4
.4
h
,
v5
.4
h
,
v0
.4
h
,
v1
.4
h
SUMSUB_AB
v6
.4
h
,
v7
.4
h
,
v2
.4
h
,
v3
.4
h
...
...
@@ -131,7 +131,7 @@ endfunc
sub
\
v3
,
\
v7
,
\
v5
.
endm
function
x264_
sub4x4_dct_neon
,
export
=
1
function
sub4x4_dct_neon
,
export
=
1
mov
x3
,
#
FENC_STRIDE
mov
x4
,
#
FDEC_STRIDE
ld1
{
v0
.
s
}[
0
],
[
x1
],
x3
...
...
@@ -154,7 +154,7 @@ function x264_sub4x4_dct_neon, export=1
ret
endfunc
function
x264_
sub8x4_dct_neon
function
sub8x4_dct_neon
ld1
{
v0
.8
b
},
[
x1
],
x3
ld1
{
v1
.8
b
},
[
x2
],
x4
usubl
v16
.8
h
,
v0
.8
b
,
v1
.8
b
...
...
@@ -193,34 +193,34 @@ function x264_sub8x4_dct_neon
ret
endfunc
function
x264_
sub8x8_dct_neon
,
export
=
1
function
sub8x8_dct_neon
,
export
=
1
mov
x5
,
x30
mov
x3
,
#
FENC_STRIDE
mov
x4
,
#
FDEC_STRIDE
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
mov
x30
,
x5
b
x264_
sub8x4_dct_neon
b
sub8x4_dct_neon
endfunc
function
x264_
sub16x16_dct_neon
,
export
=
1
function
sub16x16_dct_neon
,
export
=
1
mov
x5
,
x30
mov
x3
,
#
FENC_STRIDE
mov
x4
,
#
FDEC_STRIDE
bl
x264_
sub8x4_dct_neon
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
bl
sub8x4_dct_neon
sub
x1
,
x1
,
#
8
*
FENC_STRIDE
-
8
sub
x2
,
x2
,
#
8
*
FDEC_STRIDE
-
8
bl
x264_
sub8x4_dct_neon
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
bl
sub8x4_dct_neon
sub
x1
,
x1
,
#
8
sub
x2
,
x2
,
#
8
bl
x264_
sub8x4_dct_neon
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
bl
sub8x4_dct_neon
sub
x1
,
x1
,
#
8
*
FENC_STRIDE
-
8
sub
x2
,
x2
,
#
8
*
FDEC_STRIDE
-
8
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
mov
x30
,
x5
b
x264_
sub8x4_dct_neon
b
sub8x4_dct_neon
endfunc
...
...
@@ -255,7 +255,7 @@ endfunc
SUMSUB_SHR2
2
,
v3
.8
h
,
v5
.8
h
,
v30
.8
h
,
v29
.8
h
,
v20
.8
h
,
v21
.8
h
.
endm
function
x264_
sub8x8_dct8_neon
,
export
=
1
function
sub8x8_dct8_neon
,
export
=
1
mov
x3
,
#
FENC_STRIDE
mov
x4
,
#
FDEC_STRIDE
ld1
{
v16
.8
b
},
[
x1
],
x3
...
...
@@ -292,19 +292,19 @@ function x264_sub8x8_dct8_neon, export=1
ret
endfunc
function
x264_
sub16x16_dct8_neon
,
export
=
1
function
sub16x16_dct8_neon
,
export
=
1
mov
x7
,
x30
bl
X
(
x264_
sub8x8_dct8_neon
)
bl
X
(
sub8x8_dct8_neon
)
sub
x1
,
x1
,
#
FENC_STRIDE
*
8
-
8
sub
x2
,
x2
,
#
FDEC_STRIDE
*
8
-
8
bl
X
(
x264_
sub8x8_dct8_neon
)
bl
X
(
sub8x8_dct8_neon
)
sub
x1
,
x1
,
#
8
sub
x2
,
x2
,
#
8
bl
X
(
x264_
sub8x8_dct8_neon
)
bl
X
(
sub8x8_dct8_neon
)
mov
x30
,
x7
sub
x1
,
x1
,
#
FENC_STRIDE
*
8
-
8
sub
x2
,
x2
,
#
FDEC_STRIDE
*
8
-
8
b
X
(
x264_
sub8x8_dct8_neon
)
b
X
(
sub8x8_dct8_neon
)
endfunc
...
...
@@ -317,7 +317,7 @@ endfunc
add
\
d6
,
\
d6
,
\
d1
.
endm
function
x264_
add4x4_idct_neon
,
export
=
1
function
add4x4_idct_neon
,
export
=
1
mov
x2
,
#
FDEC_STRIDE
ld1
{
v0
.4
h
,
v1
.4
h
,
v2
.4
h
,
v3
.4
h
},
[
x1
]
...
...
@@ -357,7 +357,7 @@ function x264_add4x4_idct_neon, export=1
ret
endfunc
function
x264_
add8x4_idct_neon
,
export
=
1
function
add8x4_idct_neon
,
export
=
1
ld1
{
v0
.8
h
,
v1
.8
h
},
[
x1
],
#
32
ld1
{
v2
.8
h
,
v3
.8
h
},
[
x1
],
#
32
transpose
v20
.2
d
,
v21
.2
d
,
v0
.2
d
,
v2
.2
d
...
...
@@ -398,29 +398,29 @@ function x264_add8x4_idct_neon, export=1
ret
endfunc
function
x264_
add8x8_idct_neon
,
export
=
1
function
add8x8_idct_neon
,
export
=
1
mov
x2
,
#
FDEC_STRIDE
mov
x5
,
x30
bl
X
(
x264_
add8x4_idct_neon
)
bl
X
(
add8x4_idct_neon
)
mov
x30
,
x5
b
X
(
x264_
add8x4_idct_neon
)
b
X
(
add8x4_idct_neon
)
endfunc
function
x264_
add16x16_idct_neon
,
export
=
1
function
add16x16_idct_neon
,
export
=
1
mov
x2
,
#
FDEC_STRIDE
mov
x5
,
x30
bl
X
(
x264_
add8x4_idct_neon
)
bl
X
(
x264_
add8x4_idct_neon
)
bl
X
(
add8x4_idct_neon
)
bl
X
(
add8x4_idct_neon
)
sub
x0
,
x0
,
#
8
*
FDEC_STRIDE
-
8
bl
X
(
x264_
add8x4_idct_neon
)
bl
X
(
x264_
add8x4_idct_neon
)
bl
X
(
add8x4_idct_neon
)
bl
X
(
add8x4_idct_neon
)
sub
x0
,
x0
,
#
8
bl
X
(
x264_
add8x4_idct_neon
)
bl
X
(
x264_
add8x4_idct_neon
)
bl
X
(
add8x4_idct_neon
)
bl
X
(
add8x4_idct_neon
)
sub
x0
,
x0
,
#
8
*
FDEC_STRIDE
-
8
bl
X
(
x264_
add8x4_idct_neon
)
bl
X
(
add8x4_idct_neon
)
mov
x30
,
x5
b
X
(
x264_
add8x4_idct_neon
)
b
X
(
add8x4_idct_neon
)
endfunc
.
macro
IDCT8_1D
type
...
...
@@ -446,7 +446,7 @@ endfunc
SUMSUB_AB
v19
.8
h
,
v20
.8
h
,
v2
.8
h
,
v20
.8
h
.
endm
function
x264_
add8x8_idct8_neon
,
export
=
1
function
add8x8_idct8_neon
,
export
=
1
mov
x2
,
#
FDEC_STRIDE
ld1
{
v16
.8
h
,
v17
.8
h
},
[
x1
],
#
32
ld1
{
v18
.8
h
,
v19
.8
h
},
[
x1
],
#
32
...
...
@@ -503,19 +503,19 @@ function x264_add8x8_idct8_neon, export=1
ret
endfunc
function
x264_
add16x16_idct8_neon
,
export
=
1
function
add16x16_idct8_neon
,
export
=
1
mov
x7
,
x30
bl
X
(
x264_
add8x8_idct8_neon
)
bl
X
(
add8x8_idct8_neon
)
sub
x0
,
x0
,
#
8
*
FDEC_STRIDE
-
8
bl
X
(
x264_
add8x8_idct8_neon
)
bl
X
(
add8x8_idct8_neon
)
sub
x0
,
x0
,
#
8
bl
X
(
x264_
add8x8_idct8_neon
)
bl
X
(
add8x8_idct8_neon
)
sub
x0
,
x0
,
#
8
*
FDEC_STRIDE
-
8
mov
x30
,
x7
b
X
(
x264_
add8x8_idct8_neon
)
b
X
(
add8x8_idct8_neon
)
endfunc
function
x264_
add8x8_idct_dc_neon
,
export
=
1
function
add8x8_idct_dc_neon
,
export
=
1
mov
x2
,
#
FDEC_STRIDE
ld1
{
v16
.4
h
},
[
x1
]
ld1
{
v0
.8
b
},
[
x0
],
x2
...
...
@@ -605,7 +605,7 @@ endfunc
st1
{
v7
.16
b
},
[
x2
],
x3
.
endm
function
x264_
add16x16_idct_dc_neon
,
export
=
1
function
add16x16_idct_dc_neon
,
export
=
1
mov
x2
,
x0
mov
x3
,
#
FDEC_STRIDE
...
...
@@ -640,7 +640,7 @@ endfunc
add
\
dst
\
()
.8
h
,
\
dst
\
()
.8
h
,
\
t3
\
()
.8
h
.
endm
function
x264_
sub8x8_dct_dc_neon
,
export
=
1
function
sub8x8_dct_dc_neon
,
export
=
1
mov
x3
,
#
FENC_STRIDE
mov
x4
,
#
FDEC_STRIDE
...
...
@@ -660,7 +660,7 @@ function x264_sub8x8_dct_dc_neon, export=1
ret
endfunc
function
x264_
sub8x16_dct_dc_neon
,
export
=
1
function
sub8x16_dct_dc_neon
,
export
=
1
mov
x3
,
#
FENC_STRIDE
mov
x4
,
#
FDEC_STRIDE
sub4x4x2_dct_dc
v0
,
v16
,
v17
,
v18
,
v19
,
v20
,
v21
,
v22
,
v23
...
...
@@ -689,7 +689,7 @@ function x264_sub8x16_dct_dc_neon, export=1
ret
endfunc
function
x264_
zigzag_interleave_8x8_cavlc_neon
,
export
=
1
function
zigzag_interleave_8x8_cavlc_neon
,
export
=
1
mov
x3
,
#
7
movi
v31
.4
s
,
#
1
ld4
{
v0
.8
h
,
v1
.8
h
,
v2
.8
h
,
v3
.8
h
},
[
x1
],
#
64
...
...
@@ -718,7 +718,7 @@ function x264_zigzag_interleave_8x8_cavlc_neon, export=1
ret
endfunc
function
x264_
zigzag_scan_4x4_frame_neon
,
export
=
1
function
zigzag_scan_4x4_frame_neon
,
export
=
1
movrel
x2
,
scan4x4_frame
ld1
{
v0
.16
b
,
v1
.16
b
},
[
x1
]
ld1
{
v16
.16
b
,
v17
.16
b
},
[
x2
]
...
...
@@ -729,7 +729,7 @@ function x264_zigzag_scan_4x4_frame_neon, export=1
endfunc
.
macro
zigzag_sub_4x4
f
ac
function
x264_
zigzag_sub_4x4
\
ac
\()
_
\
f
\()
_neon
,
export
=
1
function
zigzag_sub_4x4
\
ac
\()
_
\
f
\()
_neon
,
export
=
1
mov
x9
,
#
FENC_STRIDE
mov
x4
,
#
FDEC_STRIDE
movrel
x5
,
sub4x4_
\
f
...
...
@@ -772,7 +772,7 @@ zigzag_sub_4x4 field, ac
zigzag_sub_4x4
frame
zigzag_sub_4x4
frame
,
ac
function
x264_
zigzag_scan_4x4_field_neon
,
export
=
1
function
zigzag_scan_4x4_field_neon
,
export
=
1
movrel
x2
,
scan4x4_field
ld1
{
v0
.8
h
,
v1
.8
h
},
[
x1
]
ld1
{
v16
.16
b
},
[
x2
]
...
...
@@ -781,7 +781,7 @@ function x264_zigzag_scan_4x4_field_neon, export=1
ret
endfunc
function
x264_
zigzag_scan_8x8_frame_neon
,
export
=
1
function
zigzag_scan_8x8_frame_neon
,
export
=
1
movrel
x2
,
scan8x8_frame
ld1
{
v0
.8
h
,
v1
.8
h
},
[
x1
],
#
32
ld1
{
v2
.8
h
,
v3
.8
h
},
[
x1
],
#
32
...
...
@@ -841,7 +841,7 @@ const scan8x8_frame, align=5
.
byte
T
(
7
,
5
),
T
(
7
,
6
),
T
(
6
,
7
),
T
(
7
,
7
)
endconst
function
x264_
zigzag_scan_8x8_field_neon
,
export
=
1
function
zigzag_scan_8x8_field_neon
,
export
=
1
movrel
x2
,
scan8x8_field
ld1
{
v0
.8
h
,
v1
.8
h
},
[
x1
],
#
32
ld1
{
v2
.8
h
,
v3
.8
h
},
[
x1
],
#
32
...
...
@@ -868,7 +868,7 @@ function x264_zigzag_scan_8x8_field_neon, export=1
endfunc
.
macro
zigzag_sub8x8
f
function
x264_
zigzag_sub_8x8_
\
f
\()
_neon
,
export
=
1
function
zigzag_sub_8x8_
\
f
\()
_neon
,
export
=
1
movrel
x4
,
sub8x8_
\
f
mov
x5
,
#
FENC_STRIDE
mov
x6
,
#
FDEC_STRIDE
...
...
common/aarch64/deblock-a.S
View file @
7839a9e1
...
...
@@ -108,7 +108,7 @@
sqxtun2
v0
.16
b
,
v24
.8
h
.
endm
function
x264_
deblock_v_luma_neon
,
export
=
1
function
deblock_v_luma_neon
,
export
=
1
h264_loop_filter_start
ld1
{
v0
.16
b
},
[
x0
],
x1
...
...
@@ -131,7 +131,7 @@ function x264_deblock_v_luma_neon, export=1
ret
endfunc
function
x264_
deblock_h_luma_neon
,
export
=
1
function
deblock_h_luma_neon
,
export
=
1
h264_loop_filter_start
sub
x0
,
x0
,
#
4
...
...
@@ -302,7 +302,7 @@ endfunc
bit
v2
.16
b
,
v26
.16
b
,
v18
.16
b
//
q2
'_2
.
endm
function
x264_
deblock_v_luma_intra_neon
,
export
=
1
function
deblock_v_luma_intra_neon
,
export
=
1
h264_loop_filter_start_intra
ld1
{
v0
.16
b
},
[
x0
],
x1
//
q0
...
...
@@ -328,7 +328,7 @@ function x264_deblock_v_luma_intra_neon, export=1
ret
endfunc
function
x264_
deblock_h_luma_intra_neon
,
export
=
1
function
deblock_h_luma_intra_neon
,
export
=
1
h264_loop_filter_start_intra
sub
x0
,
x0
,
#
4
...
...
@@ -421,7 +421,7 @@ endfunc
sqxtun2
v0
.16
b
,
v23
.8
h
.
endm
function
x264_
deblock_v_chroma_neon
,
export
=
1
function
deblock_v_chroma_neon
,
export
=
1
h264_loop_filter_start
sub
x0
,
x0
,
x1
,
lsl
#
1
...
...
@@ -439,7 +439,7 @@ function x264_deblock_v_chroma_neon, export=1
ret
endfunc
function
x264_
deblock_h_chroma_neon
,
export
=
1
function
deblock_h_chroma_neon
,
export
=
1
h264_loop_filter_start
sub
x0
,
x0
,
#
4
...
...
@@ -472,7 +472,7 @@ deblock_h_chroma:
ret
endfunc
function
x264_
deblock_h_chroma_422_neon
,
export
=
1
function
deblock_h_chroma_422_neon
,
export
=
1
add
x5
,
x0
,
x1
sub
x0
,
x0
,
#
4
add
x1
,
x1
,
x1
...
...
@@ -516,7 +516,7 @@ endfunc
sqxtun
v17
.8
b
,
v22
.8
h
.
endm
function
x264_
deblock_h_chroma_mbaff_neon
,
export
=
1
function
deblock_h_chroma_mbaff_neon
,
export
=
1
h264_loop_filter_start
sub
x4
,
x0
,
#
4
...
...
@@ -575,7 +575,7 @@ endfunc
bit
v17
.16
b
,
v25
.16
b
,
v26
.16
b
.
endm
function
x264_
deblock_v_chroma_intra_neon
,
export
=
1
function
deblock_v_chroma_intra_neon
,
export
=
1
h264_loop_filter_start_intra
sub
x0
,
x0
,
x1
,
lsl
#
1
...
...
@@ -593,7 +593,7 @@ function x264_deblock_v_chroma_intra_neon, export=1
ret
endfunc
function
x264_
deblock_h_chroma_intra_mbaff_neon
,
export
=
1
function
deblock_h_chroma_intra_mbaff_neon
,
export
=
1
h264_loop_filter_start_intra
sub
x4
,
x0
,
#
4
...
...
@@ -615,7 +615,7 @@ function x264_deblock_h_chroma_intra_mbaff_neon, export=1
ret
endfunc
function
x264_
deblock_h_chroma_intra_neon
,
export
=
1
function
deblock_h_chroma_intra_neon
,
export
=
1
h264_loop_filter_start_intra
sub
x4
,
x0
,
#
4
...
...
@@ -645,7 +645,7 @@ function x264_deblock_h_chroma_intra_neon, export=1
ret
endfunc
function
x264_
deblock_h_chroma_422_intra_neon
,
export
=
1
function
deblock_h_chroma_422_intra_neon
,
export
=
1
h264_loop_filter_start_intra
sub
x4
,
x0
,
#
4
...
...
@@ -697,12 +697,12 @@ function x264_deblock_h_chroma_422_intra_neon, export=1
ret
endfunc
//
static
void
deblock_strength
_c
(
uint8_t
nnz
[
X264_SCAN8_SIZE
],
//
int8_t
ref
[
2
][
X264_SCAN8_LUMA_SIZE
],
//
int16_t
mv
[
2
][
X264_SCAN8_LUMA_SIZE
][
2
],
//
uint8_t
bs
[
2
][
8
][
4
],
int
mvy_limit
,
//
int
bframe
)
function
x264_
deblock_strength_neon
,
export
=
1
//
void
deblock_strength
(
uint8_t
nnz
[
X264_SCAN8_SIZE
],
//
int8_t
ref
[
2
][
X264_SCAN8_LUMA_SIZE
],
//
int16_t
mv
[
2
][
X264_SCAN8_LUMA_SIZE
][
2
],
//
uint8_t
bs
[
2
][
8
][
4
],
int
mvy_limit
,
//
int
bframe
)
function
deblock_strength_neon
,
export
=
1
movi
v4
.16
b
,
#
0
lsl
w4
,
w4
,
#
8
add
x3
,
x3
,
#
32
...
...
common/aarch64/mc-a.S
View file @
7839a9e1
...
...
@@ -31,7 +31,7 @@
//
note
:
prefetch
stuff
assumes
64
-
byte
cacheline
//
void
prefetch_ref
(
uint8_t
*
pix
,
intptr_t
stride
,
int
parity
)
function
x264_
prefetch_ref_aarch64
,
export
=
1
function
prefetch_ref_aarch64
,
export
=
1
cmp
w2
,
#
1
csel
x2
,
xzr
,
x1
,
eq
add
x0
,
x0
,
#
64
...
...
@@ -54,8 +54,8 @@ endfunc
//
void
prefetch_fenc
(
uint8_t
*
pix_y
,
intptr_t
stride_y
,
//
uint8_t
*
pix_uv
,
intptr_t
stride_uv
,
int
mb_x
)
.
macro
x264_
prefetch_fenc
sub
function
x264_
prefetch_fenc_
\
sub
\()
_aarch64
,
export
=
1
.
macro
prefetch_fenc
sub
function
prefetch_fenc_
\
sub
\()
_aarch64
,
export
=
1
and
w6
,
w5
,
#
3
and
w7
,
w5
,
#
3
mul
x6
,
x6
,
x1
...
...
@@ -82,14 +82,14 @@ function x264_prefetch_fenc_\sub\()_aarch64, export=1
endfunc
.
endm
x264_
prefetch_fenc
420
x264_
prefetch_fenc
422
prefetch_fenc
420
prefetch_fenc
422
//
void
pixel_avg
(
uint8_t
*
dst
,
intptr_t
dst_stride
,
//
uint8_t
*
src1
,
intptr_t
src1_stride
,
//
uint8_t
*
src2
,
intptr_t
src2_stride
,
int
weight
)
;
.
macro
AVGH
w
h
function
x264_
pixel_avg_
\
w
\()
x
\
h
\()
_neon
,
export
=
1
function
pixel_avg_
\
w
\()
x
\
h
\()
_neon
,
export
=
1
mov
w10
,
#
64
cmp
w6
,
#
32
mov
w9
,
#
\
h
...
...
@@ -292,7 +292,7 @@ function pixel_avg_w16_neon
ret
endfunc
function
x264_
pixel_avg2_w4_neon
,
export
=
1
function
pixel_avg2_w4_neon
,
export
=
1
1
:
subs
w5
,
w5
,
#
2
ld1
{
v0
.
s
}[
0
],
[
x2
],
x3
...
...
@@ -307,7 +307,7 @@ function x264_pixel_avg2_w4_neon, export=1
ret
endfunc
function
x264_
pixel_avg2_w8_neon
,
export
=
1
function
pixel_avg2_w8_neon
,
export
=
1
1
:
subs
w5
,
w5
,
#
2
ld1
{
v0
.8
b
},
[
x2
],
x3
...
...
@@ -322,7 +322,7 @@ function x264_pixel_avg2_w8_neon, export=1
ret
endfunc
function
x264_
pixel_avg2_w16_neon
,
export
=
1
function
pixel_avg2_w16_neon
,
export
=
1
1
:
subs
w5
,
w5
,
#
2
ld1
{
v0
.16
b
},
[
x2
],
x3
...
...
@@ -337,7 +337,7 @@ function x264_pixel_avg2_w16_neon, export=1
ret
endfunc
function
x264_
pixel_avg2_w20_neon
,
export
=
1
function
pixel_avg2_w20_neon
,
export
=
1
sub
x1
,
x1
,
#
16
1
:
subs
w5
,
w5
,
#
2
...
...
@@ -373,7 +373,7 @@ endfunc
//
void
mc_weight
(
uint8_t
*
src
,
intptr_t
src_stride
,
uint8_t
*
dst
,
//
intptr_t
dst_stride
,
const
x264_weight_t
*
weight
,
int
h
)
function
x264_
mc_weight_w20_neon
,
export
=
1
function
mc_weight_w20_neon
,
export
=
1
weight_prologue
full
sub
x1
,
x1
,
#
16
1
:
...
...
@@ -409,7 +409,7 @@ function x264_mc_weight_w20_neon, export=1
ret
endfunc
function
x264_
mc_weight_w16_neon
,
export
=
1
function
mc_weight_w16_neon
,
export
=
1
weight_prologue
full
weight16_loop
:
1
:
...
...
@@ -438,7 +438,7 @@ weight16_loop:
ret
endfunc
function
x264_
mc_weight_w8_neon
,
export
=
1
function
mc_weight_w8_neon
,
export
=
1
weight_prologue
full
1
:
subs
w9
,
w9
,
#
2
...
...
@@ -458,7 +458,7 @@ function x264_mc_weight_w8_neon, export=1
ret
endfunc
function
x264_
mc_weight_w4_neon
,
export
=
1
function
mc_weight_w4_neon
,
export
=
1
weight_prologue
full
1
:
subs
w9
,
w9
,
#
2
...
...
@@ -474,7 +474,7 @@ function x264_mc_weight_w4_neon, export=1
ret
endfunc
function
x264_
mc_weight_w20_nodenom_neon
,
export
=
1
function
mc_weight_w20_nodenom_neon
,
export
=
1
weight_prologue
nodenom
sub
x1
,
x1
,
#
16
1
:
...
...
@@ -505,7 +505,7 @@ function x264_mc_weight_w20_nodenom_neon, export=1
ret
endfunc
function
x264_
mc_weight_w16_nodenom_neon
,
export
=
1
function
mc_weight_w16_nodenom_neon
,
export
=
1
weight_prologue
nodenom
1
:
subs
w9
,
w9
,
#
2
...
...
@@ -529,7 +529,7 @@ function x264_mc_weight_w16_nodenom_neon, export=1
ret
endfunc
function
x264_
mc_weight_w8_nodenom_neon
,
export
=
1
function
mc_weight_w8_nodenom_neon
,
export
=
1
weight_prologue
nodenom
1
:
subs
w9
,
w9
,
#
2
...
...
@@ -547,7 +547,7 @@ function x264_mc_weight_w8_nodenom_neon, export=1
ret
endfunc
function
x264_
mc_weight_w4_nodenom_neon
,
export
=
1
function
mc_weight_w4_nodenom_neon
,
export
=
1
weight_prologue
nodenom
1
:
subs
w9
,
w9
,
#
2
...
...
@@ -568,7 +568,7 @@ endfunc
.
endm
.
macro
weight_simple
name
op
function
x264_
mc_weight_w20_
\
name
\()
_neon
,
export
=
1
function
mc_weight_w20_
\
name
\()
_neon
,
export
=
1
weight_simple_prologue
1
:
subs
w5
,
w5
,
#
2
...
...
@@ -588,7 +588,7 @@ function x264_mc_weight_w20_\name\()_neon, export=1
ret
endfunc
function
x264_
mc_weight_w16_
\
name
\()
_neon
,
export
=
1