Commit dc0fe736 authored by Janne Grunau's avatar Janne Grunau Committed by Anton Mitrofanov

aarch64: implement x264_plane_copy_swap_neon

plane_copy_swap_c: 27054
plane_copy_swap_neon: 4152
parent eaf2fc20
...@@ -1281,6 +1281,34 @@ function x264_plane_copy_core_neon, export=1 ...@@ -1281,6 +1281,34 @@ function x264_plane_copy_core_neon, export=1
ret ret
endfunc endfunc
function x264_plane_copy_swap_core_neon, export=1
lsl w4, w4, #1
sub x1, x1, x4
sub x3, x3, x4
1:
mov w8, w4
tbz w4, #4, 32f
subs w8, w8, #16
ld1 {v0.16b}, [x2], #16
rev16 v0.16b, v0.16b
st1 {v0.16b}, [x0], #16
b.eq 0f
32:
subs w8, w8, #32
ld1 {v0.16b,v1.16b}, [x2], #32
rev16 v0.16b, v0.16b
rev16 v1.16b, v1.16b
st1 {v0.16b,v1.16b}, [x0], #32
b.gt 32b
0:
subs w5, w5, #1
add x2, x2, x3
add x0, x0, x1
b.gt 1b
ret
endfunc
function x264_plane_copy_deinterleave_neon, export=1 function x264_plane_copy_deinterleave_neon, export=1
add w9, w6, #15 add w9, w6, #15
and w9, w9, #0xfffffff0 and w9, w9, #0xfffffff0
......
...@@ -51,6 +51,8 @@ void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t ...@@ -51,6 +51,8 @@ void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t
void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst, void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h ); pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu, void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu,
pixel *dstv, intptr_t i_dstv, pixel *dstv, intptr_t i_dstv,
pixel *src, intptr_t i_src, int w, int h ); pixel *src, intptr_t i_src, int w, int h );
...@@ -208,6 +210,7 @@ void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, ...@@ -208,6 +210,7 @@ void x264_hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,
int height, int16_t *buf ); int height, int16_t *buf );
PLANE_COPY(16, neon) PLANE_COPY(16, neon)
PLANE_COPY_SWAP(16, neon)
PLANE_INTERLEAVE(neon) PLANE_INTERLEAVE(neon)
#endif // !HIGH_BIT_DEPTH #endif // !HIGH_BIT_DEPTH
...@@ -232,6 +235,7 @@ void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf ) ...@@ -232,6 +235,7 @@ void x264_mc_init_aarch64( int cpu, x264_mc_functions_t *pf )
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon; pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon;
pf->plane_copy = x264_plane_copy_neon; pf->plane_copy = x264_plane_copy_neon;
pf->plane_copy_swap = x264_plane_copy_swap_neon;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon; pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment