Commit 0122fd23 authored by Janne Grunau's avatar Janne Grunau Committed by Anton Mitrofanov

aarch64: x264_deblock_h_chroma_422_neon

deblock_h_chroma_422 2.5 times faster
parent 44cb1dcd
...@@ -247,6 +247,7 @@ function x264_deblock_h_chroma_neon, export=1 ...@@ -247,6 +247,7 @@ function x264_deblock_h_chroma_neon, export=1
h264_loop_filter_start h264_loop_filter_start
sub x0, x0, #4 sub x0, x0, #4
deblock_h_chroma:
ld1 {v18.d}[0], [x0], x1 ld1 {v18.d}[0], [x0], x1
ld1 {v16.d}[0], [x0], x1 ld1 {v16.d}[0], [x0], x1
ld1 {v0.d}[0], [x0], x1 ld1 {v0.d}[0], [x0], x1
...@@ -275,6 +276,18 @@ function x264_deblock_h_chroma_neon, export=1 ...@@ -275,6 +276,18 @@ function x264_deblock_h_chroma_neon, export=1
ret ret
endfunc endfunc
function x264_deblock_h_chroma_422_neon, export=1
add x5, x0, x1
add x1, x1, x1
mov x7, x30
bl X(x264_deblock_h_chroma_neon)
ldr w6, [x4]
mov x30, x7
sub x0, x5, #4
mov v24.s[0], w6
b deblock_h_chroma
endfunc
.macro h264_loop_filter_chroma8 .macro h264_loop_filter_chroma8
dup v22.8b, w2 // alpha dup v22.8b, w2 // alpha
uxtl v24.8h, v24.8b uxtl v24.8h, v24.8b
......
...@@ -738,6 +738,7 @@ void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X26 ...@@ -738,6 +738,7 @@ void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X26
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4], int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
int mvy_limit, int bframe ); int mvy_limit, int bframe );
#if ARCH_AARCH64 #if ARCH_AARCH64
void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 ); void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta ); void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
...@@ -856,6 +857,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ) ...@@ -856,6 +857,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_neon; pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_neon;
pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_neon; pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_neon;
pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_neon; pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_neon;
pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon;
pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_neon; pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_neon;
pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_neon; pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_neon;
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment