Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
VideoLAN
x264
Commits
498cca0b
Commit
498cca0b
authored
Jan 17, 2017
by
Vittorio Giovara
Committed by
Anton Mitrofanov
Dec 24, 2017
Browse files
arm: Set the function symbol prefix in a single location
parent
8f2437d3
Changes
10
Hide whitespace changes
Inline
Side-by-side
common/arm/asm.S
View file @
498cca0b
...
...
@@ -34,9 +34,9 @@
#endif
#ifdef PREFIX
# define EXTERN_ASM _
# define EXTERN_ASM _
x264_
#else
# define EXTERN_ASM
# define EXTERN_ASM
x264_
#endif
#ifdef __ELF__
...
...
@@ -75,7 +75,11 @@ ELF .eabi_attribute 25, \val
.
macro
function
name
,
export
=
1
.
macro
endfunc
.
if
\
export
ELF
.
size
EXTERN_ASM
\
name
,
.
-
EXTERN_ASM
\
name
.
else
ELF
.
size
\
name
,
.
-
\
name
.
endif
FUNC
.
endfunc
.
purgem
endfunc
.
endm
...
...
common/arm/bitstream-a.S
View file @
498cca0b
...
...
@@ -25,7 +25,7 @@
#include "asm.S"
function
x264_
nal_escape_neon
function
nal_escape_neon
push
{
r4
-
r5
,
lr
}
vmov.u8
q0
,
#
0xff
vmov.u8
q8
,
#
4
...
...
common/arm/cpu-a.S
View file @
498cca0b
...
...
@@ -29,7 +29,7 @@
//
done
in
gas
because
.
fpu
neon
overrides
the
refusal
to
assemble
//
instructions
the
selected
-
march
/-
mcpu
doesn
't support
function
x264_
cpu_neon_test
function
cpu_neon_test
vadd.i16
q0
,
q0
,
q0
bx
lr
endfunc
...
...
@@ -37,7 +37,7 @@ endfunc
//
return
:
0
on
success
//
1
if
counters
were
already
enabled
//
9
if
lo
-
res
counters
were
already
enabled
function
x264_
cpu_enable_armv7_counter
,
export
=
0
function
cpu_enable_armv7_counter
,
export
=
0
mrc
p15
,
0
,
r2
,
c9
,
c12
,
0
//
read
PMNC
ands
r0
,
r2
,
#
1
andne
r0
,
r2
,
#
9
...
...
@@ -50,7 +50,7 @@ function x264_cpu_enable_armv7_counter, export=0
bx
lr
endfunc
function
x264_
cpu_disable_armv7_counter
,
export
=
0
function
cpu_disable_armv7_counter
,
export
=
0
mrc
p15
,
0
,
r0
,
c9
,
c12
,
0
//
read
PMNC
bic
r0
,
r0
,
#
1
//
disable
counters
mcr
p15
,
0
,
r0
,
c9
,
c12
,
0
//
write
PMNC
...
...
@@ -64,14 +64,14 @@ endfunc
//
return
:
0
if
transfers
neon
->
arm
transfers
take
more
than
10
cycles
//
nonzero
otherwise
function
x264_
cpu_fast_neon_mrc_test
function
cpu_fast_neon_mrc_test
//
check
for
user
access
to
performance
counters
mrc
p15
,
0
,
r0
,
c9
,
c14
,
0
cmp
r0
,
#
0
bxeq
lr
push
{
r4
-
r6
,
lr
}
bl
x264_
cpu_enable_armv7_counter
bl
cpu_enable_armv7_counter
ands
r1
,
r0
,
#
8
mov
r3
,
#
0
mov
ip
,
#
4
...
...
@@ -99,7 +99,7 @@ average_loop:
//
disable
counters
if
we
enabled
them
ands
r0
,
r0
,
#
1
bleq
x264_
cpu_disable_armv7_counter
bleq
cpu_disable_armv7_counter
lsr
r0
,
r3
,
#
5
cmp
r0
,
#
10
...
...
common/arm/dct-a.S
View file @
498cca0b
...
...
@@ -62,7 +62,7 @@ endconst
.
endm
function
x264_
dct4x4dc_neon
function
dct4x4dc_neon
vld1.64
{
d0
-
d3
},
[
r0
,:
128
]
SUMSUB_ABCD
d4
,
d5
,
d6
,
d7
,
d0
,
d1
,
d2
,
d3
SUMSUB_ABCD
d0
,
d2
,
d3
,
d1
,
d4
,
d6
,
d5
,
d7
...
...
@@ -81,7 +81,7 @@ function x264_dct4x4dc_neon
bx
lr
endfunc
function
x264_
idct4x4dc_neon
function
idct4x4dc_neon
vld1.64
{
d0
-
d3
},
[
r0
,:
128
]
SUMSUB_ABCD
d4
,
d5
,
d6
,
d7
,
d0
,
d1
,
d2
,
d3
SUMSUB_ABCD
d0
,
d2
,
d3
,
d1
,
d4
,
d6
,
d5
,
d7
...
...
@@ -105,7 +105,7 @@ endfunc
vsub.s16
\
d3
,
\
d7
,
\
d5
.
endm
function
x264_
sub4x4_dct_neon
function
sub4x4_dct_neon
mov
r3
,
#
FENC_STRIDE
mov
ip
,
#
FDEC_STRIDE
vld1.32
{
d0
[]},
[
r1
,:
32
],
r3
...
...
@@ -128,7 +128,7 @@ function x264_sub4x4_dct_neon
bx
lr
endfunc
function
x264_
sub8x4_dct_neon
,
export
=
0
function
sub8x4_dct_neon
,
export
=
0
vld1.64
{
d0
},
[
r1
,:
64
],
r3
vld1.64
{
d1
},
[
r2
,:
64
],
ip
vsubl.u8
q8
,
d0
,
d1
...
...
@@ -164,34 +164,34 @@ function x264_sub8x4_dct_neon, export=0
bx
lr
endfunc
function
x264_
sub8x8_dct_neon
function
sub8x8_dct_neon
push
{
lr
}
mov
r3
,
#
FENC_STRIDE
mov
ip
,
#
FDEC_STRIDE
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
pop
{
lr
}
b
x264_
sub8x4_dct_neon
b
sub8x4_dct_neon
endfunc
function
x264_
sub16x16_dct_neon
function
sub16x16_dct_neon
push
{
lr
}
mov
r3
,
#
FENC_STRIDE
mov
ip
,
#
FDEC_STRIDE
bl
x264_
sub8x4_dct_neon
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
bl
sub8x4_dct_neon
sub
r1
,
r1
,
#
8
*
FENC_STRIDE
-
8
sub
r2
,
r2
,
#
8
*
FDEC_STRIDE
-
8
bl
x264_
sub8x4_dct_neon
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
bl
sub8x4_dct_neon
sub
r1
,
r1
,
#
8
sub
r2
,
r2
,
#
8
bl
x264_
sub8x4_dct_neon
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
bl
sub8x4_dct_neon
sub
r1
,
r1
,
#
8
*
FENC_STRIDE
-
8
sub
r2
,
r2
,
#
8
*
FDEC_STRIDE
-
8
bl
x264_
sub8x4_dct_neon
bl
sub8x4_dct_neon
pop
{
lr
}
b
x264_
sub8x4_dct_neon
b
sub8x4_dct_neon
endfunc
...
...
@@ -226,7 +226,7 @@ endfunc
SUMSUB_SHR2
2
,
q11
,
q13
,
q3
,
q13
,
q0
,
q1
.
endm
function
x264_
sub8x8_dct8_neon
function
sub8x8_dct8_neon
mov
r3
,
#
FENC_STRIDE
mov
ip
,
#
FDEC_STRIDE
vld1.64
{
d16
},
[
r1
,:
64
],
r3
...
...
@@ -278,19 +278,19 @@ function x264_sub8x8_dct8_neon
bx
lr
endfunc
function
x264_
sub16x16_dct8_neon
function
sub16x16_dct8_neon
push
{
lr
}
bl
X
(
x264_
sub8x8_dct8_neon
)
bl
X
(
sub8x8_dct8_neon
)
sub
r1
,
r1
,
#
FENC_STRIDE
*
8
-
8
sub
r2
,
r2
,
#
FDEC_STRIDE
*
8
-
8
bl
X
(
x264_
sub8x8_dct8_neon
)
bl
X
(
sub8x8_dct8_neon
)
sub
r1
,
r1
,
#
8
sub
r2
,
r2
,
#
8
bl
X
(
x264_
sub8x8_dct8_neon
)
bl
X
(
sub8x8_dct8_neon
)
pop
{
lr
}
sub
r1
,
r1
,
#
FENC_STRIDE
*
8
-
8
sub
r2
,
r2
,
#
FDEC_STRIDE
*
8
-
8
b
X
(
x264_
sub8x8_dct8_neon
)
b
X
(
sub8x8_dct8_neon
)
endfunc
...
...
@@ -303,7 +303,7 @@ endfunc
vadd.s16
\
d6
,
\
d6
,
\
d1
.
endm
function
x264_
add4x4_idct_neon
function
add4x4_idct_neon
mov
r2
,
#
FDEC_STRIDE
vld1.64
{
d0
-
d3
},
[
r1
,:
128
]
...
...
@@ -335,7 +335,7 @@ function x264_add4x4_idct_neon
bx
lr
endfunc
function
x264_
add8x4_idct_neon
,
export
=
0
function
add8x4_idct_neon
,
export
=
0
vld1.64
{
d0
-
d3
},
[
r1
,:
128
]!
IDCT_1D
d16
,
d18
,
d20
,
d22
,
d0
,
d1
,
d2
,
d3
vld1.64
{
d4
-
d7
},
[
r1
,:
128
]!
...
...
@@ -375,29 +375,29 @@ function x264_add8x4_idct_neon, export=0
bx
lr
endfunc
function
x264_
add8x8_idct_neon
function
add8x8_idct_neon
mov
r2
,
#
FDEC_STRIDE
mov
ip
,
lr
bl
x264_
add8x4_idct_neon
bl
add8x4_idct_neon
mov
lr
,
ip
b
x264_
add8x4_idct_neon
b
add8x4_idct_neon
endfunc
function
x264_
add16x16_idct_neon
function
add16x16_idct_neon
mov
r2
,
#
FDEC_STRIDE
mov
ip
,
lr
bl
x264_
add8x4_idct_neon
bl
x264_
add8x4_idct_neon
bl
add8x4_idct_neon
bl
add8x4_idct_neon
sub
r0
,
r0
,
#
8
*
FDEC_STRIDE
-
8
bl
x264_
add8x4_idct_neon
bl
x264_
add8x4_idct_neon
bl
add8x4_idct_neon
bl
add8x4_idct_neon
sub
r0
,
r0
,
#
8
bl
x264_
add8x4_idct_neon
bl
x264_
add8x4_idct_neon
bl
add8x4_idct_neon
bl
add8x4_idct_neon
sub
r0
,
r0
,
#
8
*
FDEC_STRIDE
-
8
bl
x264_
add8x4_idct_neon
bl
add8x4_idct_neon
mov
lr
,
ip
b
x264_
add8x4_idct_neon
b
add8x4_idct_neon
endfunc
...
...
@@ -435,7 +435,7 @@ endfunc
SUMSUB_AB
q11
,
q12
,
q2
,
q12
.
endm
function
x264_
add8x8_idct8_neon
function
add8x8_idct8_neon
mov
r2
,
#
FDEC_STRIDE
vld1.64
{
d16
-
d19
},
[
r1
,:
128
]!
vld1.64
{
d20
-
d23
},
[
r1
,:
128
]!
...
...
@@ -497,20 +497,20 @@ function x264_add8x8_idct8_neon
bx
lr
endfunc
function
x264_
add16x16_idct8_neon
function
add16x16_idct8_neon
mov
ip
,
lr
bl
X
(
x264_
add8x8_idct8_neon
)
bl
X
(
add8x8_idct8_neon
)
sub
r0
,
r0
,
#
8
*
FDEC_STRIDE
-
8
bl
X
(
x264_
add8x8_idct8_neon
)
bl
X
(
add8x8_idct8_neon
)
sub
r0
,
r0
,
#
8
bl
X
(
x264_
add8x8_idct8_neon
)
bl
X
(
add8x8_idct8_neon
)
sub
r0
,
r0
,
#
8
*
FDEC_STRIDE
-
8
mov
lr
,
ip
b
X
(
x264_
add8x8_idct8_neon
)
b
X
(
add8x8_idct8_neon
)
endfunc
function
x264_
add8x8_idct_dc_neon
function
add8x8_idct_dc_neon
mov
r2
,
#
FDEC_STRIDE
vld1.64
{
d16
},
[
r1
,:
64
]
vrshr.s16
d16
,
d16
,
#
6
...
...
@@ -593,7 +593,7 @@ endfunc
vst1.64
{
d22
-
d23
},
[
r2
,:
128
],
r3
.
endm
function
x264_
add16x16_idct_dc_neon
function
add16x16_idct_dc_neon
mov
r2
,
r0
mov
r3
,
#
FDEC_STRIDE
vmov.i16
q15
,
#
0
...
...
@@ -609,7 +609,7 @@ function x264_add16x16_idct_dc_neon
bx
lr
endfunc
function
x264_
sub8x8_dct_dc_neon
function
sub8x8_dct_dc_neon
mov
r3
,
#
FENC_STRIDE
mov
ip
,
#
FDEC_STRIDE
vld1.64
{
d16
},
[
r1
,:
64
],
r3
...
...
@@ -657,7 +657,7 @@ function x264_sub8x8_dct_dc_neon
bx
lr
endfunc
function
x264_
sub8x16_dct_dc_neon
function
sub8x16_dct_dc_neon
mov
r3
,
#
FENC_STRIDE
mov
ip
,
#
FDEC_STRIDE
vld1.64
{
d16
},
[
r1
,:
64
],
r3
...
...
@@ -751,7 +751,7 @@ function x264_sub8x16_dct_dc_neon
endfunc
function
x264_
zigzag_scan_4x4_frame_neon
function
zigzag_scan_4x4_frame_neon
movrel
r2
,
scan4x4_frame
vld1.64
{
d0
-
d3
},
[
r1
,:
128
]
vld1.64
{
d16
-
d19
},
[
r2
,:
128
]
...
...
common/arm/deblock-a.S
View file @
498cca0b
...
...
@@ -117,7 +117,7 @@
vqmovun.s16
d1
,
q12
.
endm
function
x264_
deblock_v_luma_neon
function
deblock_v_luma_neon
h264_loop_filter_start
vld1.64
{
d0
,
d1
},
[
r0
,:
128
],
r1
...
...
@@ -143,7 +143,7 @@ function x264_deblock_v_luma_neon
bx
lr
endfunc
function
x264_
deblock_h_luma_neon
function
deblock_h_luma_neon
h264_loop_filter_start
sub
r0
,
r0
,
#
4
...
...
@@ -324,7 +324,7 @@ endfunc
.
endm
function
x264_
deblock_v_luma_intra_neon
function
deblock_v_luma_intra_neon
push
{
lr
}
vld1.64
{
d0
,
d1
},
[
r0
,:
128
],
r1
vld1.64
{
d2
,
d3
},
[
r0
,:
128
],
r1
...
...
@@ -352,7 +352,7 @@ function x264_deblock_v_luma_intra_neon
pop
{
pc
}
endfunc
function
x264_
deblock_h_luma_intra_neon
function
deblock_h_luma_intra_neon
push
{
lr
}
sub
r0
,
r0
,
#
4
vld1.64
{
d22
},
[
r0
],
r1
...
...
@@ -447,7 +447,7 @@ endfunc
vqmovun.s16
d1
,
q12
.
endm
function
x264_
deblock_v_chroma_neon
function
deblock_v_chroma_neon
h264_loop_filter_start
sub
r0
,
r0
,
r1
,
lsl
#
1
...
...
@@ -465,7 +465,7 @@ function x264_deblock_v_chroma_neon
bx
lr
endfunc
function
x264_
deblock_h_chroma_neon
function
deblock_h_chroma_neon
h264_loop_filter_start
sub
r0
,
r0
,
#
4
...
...
@@ -499,7 +499,7 @@ deblock_h_chroma:
bx
lr
endfunc
function
x264_
deblock_h_chroma_422_neon
function
deblock_h_chroma_422_neon
h264_loop_filter_start
push
{
lr
}
sub
r0
,
r0
,
#
4
...
...
@@ -547,7 +547,7 @@ endfunc
vqmovun.s16
d0
,
q11
.
endm
function
x264_
deblock_h_chroma_mbaff_neon
function
deblock_h_chroma_mbaff_neon
h264_loop_filter_start
sub
r0
,
r0
,
#
4
...
...
@@ -610,7 +610,7 @@ endfunc
vbit
q0
,
q2
,
q13
.
endm
function
x264_
deblock_v_chroma_intra_neon
function
deblock_v_chroma_intra_neon
sub
r0
,
r0
,
r1
,
lsl
#
1
vld2.8
{
d18
,
d19
},
[
r0
,:
128
],
r1
vld2.8
{
d16
,
d17
},
[
r0
,:
128
],
r1
...
...
@@ -626,7 +626,7 @@ function x264_deblock_v_chroma_intra_neon
bx
lr
endfunc
function
x264_
deblock_h_chroma_intra_neon
function
deblock_h_chroma_intra_neon
sub
r0
,
r0
,
#
4
vld1.8
{
d18
},
[
r0
],
r1
vld1.8
{
d16
},
[
r0
],
r1
...
...
@@ -657,15 +657,15 @@ function x264_deblock_h_chroma_intra_neon
bx
lr
endfunc
function
x264_
deblock_h_chroma_422_intra_neon
function
deblock_h_chroma_422_intra_neon
push
{
lr
}
bl
X
(
x264_
deblock_h_chroma_intra_neon
)
bl
X
(
deblock_h_chroma_intra_neon
)
add
r0
,
r0
,
#
2
pop
{
lr
}
b
X
(
x264_
deblock_h_chroma_intra_neon
)
b
X
(
deblock_h_chroma_intra_neon
)
endfunc
function
x264_
deblock_h_chroma_intra_mbaff_neon
function
deblock_h_chroma_intra_mbaff_neon
sub
r0
,
r0
,
#
4
vld1.8
{
d18
},
[
r0
],
r1
vld1.8
{
d16
},
[
r0
],
r1
...
...
@@ -688,7 +688,7 @@ function x264_deblock_h_chroma_intra_mbaff_neon
bx
lr
endfunc
function
x264_
deblock_strength_neon
function
deblock_strength_neon
ldr
ip
,
[
sp
]
vmov.i8
q8
,
#
0
lsl
ip
,
ip
,
#
8
...
...
common/arm/mc-a.S
View file @
498cca0b
...
...
@@ -38,7 +38,7 @@ endconst
//
They
also
use
nothing
above
armv5te
,
but
we
don
't care about pre-armv6
//
void
prefetch_ref
(
uint8_t
*
pix
,
intptr_t
stride
,
int
parity
)
function
x264_
prefetch_ref_arm
function
prefetch_ref_arm
sub
r2
,
r2
,
#
1
add
r0
,
r0
,
#
64
and
r2
,
r2
,
r1
...
...
@@ -58,7 +58,7 @@ endfunc
//
void
prefetch_fenc
(
uint8_t
*
pix_y
,
intptr_t
stride_y
,
//
uint8_t
*
pix_uv
,
intptr_t
stride_uv
,
int
mb_x
)
function
x264_
prefetch_fenc_arm
function
prefetch_fenc_arm
ldr
ip
,
[
sp
]
push
{
lr
}
and
lr
,
ip
,
#
3
...
...
@@ -83,8 +83,8 @@ function x264_prefetch_fenc_arm
endfunc
//
void
*
x264_
memcpy_aligned
(
void
*
dst
,
const
void
*
src
,
size_t
n
)
function
x264_
memcpy_aligned_neon
//
void
*
memcpy_aligned
(
void
*
dst
,
const
void
*
src
,
size_t
n
)
function
memcpy_aligned_neon
orr
r3
,
r0
,
r1
,
lsr
#
1
movrel
ip
,
memcpy_table
and
r3
,
r3
,
#
0xc
...
...
@@ -150,8 +150,8 @@ endconst
.
ltorg
//
void
x264_
memzero_aligned
(
void
*
dst
,
size_t
n
)
function
x264_
memzero_aligned_neon
//
void
memzero_aligned
(
void
*
dst
,
size_t
n
)
function
memzero_aligned_neon
vmov.i8
q0
,
#
0
vmov.i8
q1
,
#
0
memzero_loop
:
...
...
@@ -168,18 +168,18 @@ endfunc
//
uint8_t
*
src1
,
intptr_t
src1_stride
,
//
uint8_t
*
src2
,
intptr_t
src2_stride
,
int
weight
)
;
.
macro
AVGH
w
h
function
x264_
pixel_avg_
\
w
\()
x
\
h
\()
_neon
function
pixel_avg_
\
w
\()
x
\
h
\()
_neon
ldr
ip
,
[
sp
,
#
8
]
push
{
r4
-
r6
,
lr
}
cmp
ip
,
#
32
ldrd
r4
,
r5
,
[
sp
,
#
16
]
mov
lr
,
#
\
h
beq
x264_
pixel_avg_w
\
w
\
()
_neon
beq
pixel_avg_w
\
w
\
()
_neon
rsbs
r6
,
ip
,
#
64
blt
x264_
pixel_avg_weight_w
\
w
\
()
_add_sub_neon
//
weight
>
64
blt
pixel_avg_weight_w
\
w
\
()
_add_sub_neon
//
weight
>
64
cmp
ip
,
#
0
bge
x264_
pixel_avg_weight_w
\
w
\
()
_add_add_neon
b
x264_
pixel_avg_weight_w
\
w
\
()
_sub_add_neon
//
weight
<
0
bge
pixel_avg_weight_w
\
w
\
()
_add_add_neon
b
pixel_avg_weight_w
\
w
\
()
_sub_add_neon
//
weight
<
0
endfunc
.
endm
...
...
@@ -244,7 +244,7 @@ AVGH 16, 16
.
endm
.
macro
AVG_WEIGHT
ext
function
x264_
pixel_avg_weight_w4_
\
ext
\()
_neon
,
export
=
0
function
pixel_avg_weight_w4_
\
ext
\()
_neon
,
export
=
0
load_weights_
\
ext
1
:
//
height
loop
subs
lr
,
lr
,
#
2
...
...
@@ -260,7 +260,7 @@ function x264_pixel_avg_weight_w4_\ext\()_neon, export=0
pop
{
r4
-
r6
,
pc
}
endfunc
function
x264_
pixel_avg_weight_w8_
\
ext
\()
_neon
,
export
=
0
function
pixel_avg_weight_w8_
\
ext
\()
_neon
,
export
=
0
load_weights_
\
ext
1
:
//
height
loop
subs
lr
,
lr
,
#
4
...
...
@@ -284,7 +284,7 @@ function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
pop
{
r4
-
r6
,
pc
}
endfunc
function
x264_
pixel_avg_weight_w16_
\
ext
\()
_neon
,
export
=
0
function
pixel_avg_weight_w16_
\
ext
\()
_neon
,
export
=
0
load_weights_
\
ext
1
:
//
height
loop
subs
lr
,
lr
,
#
2
...
...
@@ -309,7 +309,7 @@ AVG_WEIGHT add_add
AVG_WEIGHT
add_sub
AVG_WEIGHT
sub_add
function
x264_
pixel_avg_w4_neon
,
export
=
0
function
pixel_avg_w4_neon
,
export
=
0
subs
lr
,
lr
,
#
2
vld1.32
{
d0
[]},
[
r2
],
r3
vld1.32
{
d2
[]},
[
r4
],
r5
...
...
@@ -319,11 +319,11 @@ function x264_pixel_avg_w4_neon, export=0
vrhadd.u8
d1
,
d1
,
d3
vst1.32
{
d0
[
0
]},
[
r0
,:
32
],
r1
vst1.32
{
d1
[
0
]},
[
r0
,:
32
],
r1
bgt
x264_
pixel_avg_w4_neon
bgt
pixel_avg_w4_neon
pop
{
r4
-
r6
,
pc
}
endfunc
function
x264_
pixel_avg_w8_neon
,
export
=
0
function
pixel_avg_w8_neon
,
export
=
0
subs
lr
,
lr
,
#
4
vld1.64
{
d0
},
[
r2
],
r3
vld1.64
{
d2
},
[
r4
],
r5
...
...
@@ -341,11 +341,11 @@ function x264_pixel_avg_w8_neon, export=0
vrhadd.u8
d3
,
d3
,
d5
vst1.64
{
d2
},
[
r0
,:
64
],
r1
vst1.64
{
d3
},
[
r0
,:
64
],
r1
bgt
x264_
pixel_avg_w8_neon
bgt
pixel_avg_w8_neon
pop
{
r4
-
r6
,
pc
}
endfunc
function
x264_
pixel_avg_w16_neon
,
export
=
0
function
pixel_avg_w16_neon
,
export
=
0
subs
lr
,
lr
,
#
4
vld1.64
{
d0
-
d1
},
[
r2
],
r3
vld1.64
{
d2
-
d3
},
[
r4
],
r5
...
...
@@ -363,12 +363,12 @@ function x264_pixel_avg_w16_neon, export=0
vrhadd.u8
q3
,
q3
,
q0
vst1.64
{
d4
-
d5
},
[
r0
,:
128
],
r1
vst1.64
{
d6
-
d7
},
[
r0
,:
128
],
r1
bgt
x264_
pixel_avg_w16_neon
bgt
pixel_avg_w16_neon
pop
{
r4
-
r6
,
pc
}
endfunc
function
x264_
pixel_avg2_w4_neon
function
pixel_avg2_w4_neon
ldr
ip
,
[
sp
,
#
4
]
push
{
lr
}
ldr
lr
,
[
sp
,
#
4
]
...
...
@@ -386,7 +386,7 @@ avg2_w4_loop:
pop
{
pc
}
endfunc
function
x264_
pixel_avg2_w8_neon
function
pixel_avg2_w8_neon
ldr
ip
,
[
sp
,
#
4
]
push
{
lr
}
ldr
lr
,
[
sp
,
#
4
]
...
...
@@ -404,7 +404,7 @@ avg2_w8_loop:
pop
{
pc
}
endfunc
function
x264_
pixel_avg2_w16_neon
function
pixel_avg2_w16_neon
ldr
ip
,
[
sp
,
#
4
]
push
{
lr
}
ldr
lr
,
[
sp
,
#
4
]
...
...
@@ -422,7 +422,7 @@ avg2_w16_loop:
pop
{
pc
}
endfunc
function
x264_
pixel_avg2_w20_neon
function
pixel_avg2_w20_neon
ldr
ip
,
[
sp
,
#
4
]
push
{
lr
}
sub
r1
,
r1
,
#
16
...
...
@@ -464,7 +464,7 @@ endfunc
//
void
mc_weight
(
uint8_t
*
src
,
intptr_t
src_stride
,
uint8_t
*
dst
,
intptr_t
dst_stride
,
//
const
x264_weight_t
*
weight
,
int
height
)
function
x264_
mc_weight_w20_neon
function
mc_weight_w20_neon
weight_prologue
full
sub
r1
,
#
16
weight20_loop
:
...
...
@@ -500,7 +500,7 @@ weight20_loop:
pop
{
r4
-
r5
,
pc
}
endfunc
function
x264_
mc_weight_w16_neon
function
mc_weight_w16_neon
weight_prologue
full
weight16_loop
:
subs
ip
,
#
2
...
...
@@ -528,7 +528,7 @@ weight16_loop:
pop
{
r4
-
r5
,
pc
}
endfunc
function
x264_
mc_weight_w8_neon
function
mc_weight_w8_neon
weight_prologue
full
weight8_loop
:
subs
ip
,
#
2
...
...
@@ -548,7 +548,7 @@ weight8_loop:
pop
{
r4
-
r5
,
pc
}
endfunc
function
x264_
mc_weight_w4_neon
function
mc_weight_w4_neon
weight_prologue
full
weight4_loop
: