Skip to content
Snippets Groups Projects
Commit 8578bd9f authored by Janne Grunau's avatar Janne Grunau
Browse files

aarch64/asm: optimize cabac_encode_terminal with extrinsic knowledge

Approach taken from x86 asm. Overall speedup meaningless.
cabac_encode_terminal on average twice as fast on cortex-53 while
encoding with following command:
./x264 --threads 1 --profile high --preset veryfast --crf 15 -o /dev/null park_joy_420_720p50.y4m

Less relative speedup on cortex-a72/73.
parent 4121277b
No related branches found
No related tags found
1 merge request!39aarch64 cabac optimizations
......@@ -26,11 +26,19 @@
#include "common/common.h"
#include "asm-offsets.h"
#define STATIC_ASSERT(name, x) int assert_##name[2 * !!(x) - 1]
#define X264_CHECK_OFFSET(s, m, o) struct check_##s##_##m \
{ \
int m_##m[2 * (offsetof(s, m) == o) - 1]; \
STATIC_ASSERT(offset_##m, offsetof(s, m) == o); \
}
#define X264_CHECK_REL_OFFSET(s, a, type, b) struct check_##s##_##a##_##b \
{ \
STATIC_ASSERT(rel_offset_##a##_##b, offsetof(s, a) + sizeof(type) == offsetof(s, b)); \
}
X264_CHECK_OFFSET(x264_cabac_t, i_low, CABAC_I_LOW);
X264_CHECK_OFFSET(x264_cabac_t, i_range, CABAC_I_RANGE);
X264_CHECK_OFFSET(x264_cabac_t, i_queue, CABAC_I_QUEUE);
......@@ -40,3 +48,9 @@ X264_CHECK_OFFSET(x264_cabac_t, p, CABAC_P);
X264_CHECK_OFFSET(x264_cabac_t, p_end, CABAC_P_END);
X264_CHECK_OFFSET(x264_cabac_t, f8_bits_encoded, CABAC_F8_BITS_ENCODED);
X264_CHECK_OFFSET(x264_cabac_t, state, CABAC_STATE);
// the aarch64 asm makes following additional assumptions about the x264_cabac_t
// memory layout
X264_CHECK_REL_OFFSET(x264_cabac_t, i_low, int, i_range);
X264_CHECK_REL_OFFSET(x264_cabac_t, i_queue, int, i_bytes_outstanding);
......@@ -116,7 +116,20 @@ endfunc
function cabac_encode_terminal_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
ldr w11, [x0, #CABAC_I_LOW]
sub w12, w12, #2
b cabac_encode_renorm
tbz w12, #8, 1f
str w12, [x0, #CABAC_I_RANGE]
ret
1:
ldr w2, [x0, #CABAC_I_QUEUE]
ldr w11, [x0, #CABAC_I_LOW]
lsl w12, w12, #1
adds w2, w2, #1
lsl w11, w11, #1
b.ge cabac_putbyte
stp w11, w12, [x0, #CABAC_I_LOW] // store i_low, i_range
str w2, [x0, #CABAC_I_QUEUE]
ret
endfunc
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment