Commit 59b9c252 authored by Janne Grunau's avatar Janne Grunau Committed by Anton Mitrofanov

aarch64: cabac_encode_{decision,bypass,terminal}_asm

benchmarks on a Nexus 9 (nvidia denver):
101.3 cycles in x264_cabac_encode_decision_c,   67105369 runs, 3495 skips
 97.3 cycles in x264_cabac_encode_decision_asm, 67105493 runs, 3371 skips
132.8 cycles in x264_cabac_encode_terminal_c,    1046950 runs, 1626 skips
116.1 cycles in x264_cabac_encode_terminal_asm,  1048424 runs, 152 skips
 92.4 cycles in x264_cabac_encode_bypass_c,     16776192 runs, 1024 skips
 89.6 cycles in x264_cabac_encode_bypass_asm,   16776453 runs, 763 skips

Cycle counts are not as stable as one would like. The dynamic code
optimisation seems to produce different results for small chnages in a
binary. Repeated runs with the same binary produce stable results
though (ignoring the first run).
parent a6ec4249
......@@ -129,13 +129,15 @@ endif
ifeq ($(ARCH),AARCH64)
ifneq ($(AS),)
ASMSRC += common/aarch64/bitstream-a.S \
common/aarch64/cabac-a.S \
common/aarch64/dct-a.S \
common/aarch64/deblock-a.S \
common/aarch64/mc-a.S \
common/aarch64/pixel-a.S \
common/aarch64/predict-a.S \
common/aarch64/quant-a.S
SRCS += common/aarch64/mc-c.c \
SRCS += common/aarch64/asm-offsets.c \
common/aarch64/mc-c.c \
common/aarch64/predict-c.c
OBJASM = $(ASMSRC:%.S=%.o)
endif
......
/*****************************************************************************
* asm-offsets.c: check asm offsets for aarch64
*****************************************************************************
* Copyright (C) 2014 x264 project
*
* Authors: Janne Grunau <janne-x264@jannau.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
#include "common/common.h"
#include "asm-offsets.h"
#define X264_CHECK_OFFSET(s, m, o) struct check_##s##_##m \
{ \
int m_##m[2 * (offsetof(s, m) == o) - 1]; \
}
X264_CHECK_OFFSET(x264_cabac_t, i_low, CABAC_I_LOW);
X264_CHECK_OFFSET(x264_cabac_t, i_range, CABAC_I_RANGE);
X264_CHECK_OFFSET(x264_cabac_t, i_queue, CABAC_I_QUEUE);
X264_CHECK_OFFSET(x264_cabac_t, i_bytes_outstanding, CABAC_I_BYTES_OUTSTANDING);
X264_CHECK_OFFSET(x264_cabac_t, p_start, CABAC_P_START);
X264_CHECK_OFFSET(x264_cabac_t, p, CABAC_P);
X264_CHECK_OFFSET(x264_cabac_t, p_end, CABAC_P_END);
X264_CHECK_OFFSET(x264_cabac_t, f8_bits_encoded, CABAC_F8_BITS_ENCODED);
X264_CHECK_OFFSET(x264_cabac_t, state, CABAC_STATE);
/*****************************************************************************
* asm-offsets.h: asm offsets for aarch64
*****************************************************************************
* Copyright (C) 2014 x264 project
*
* Authors: Janne Grunau <janne-x264@jannau.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
#ifndef X264_AARCH64_ASM_OFFSETS_H
#define X264_AARCH64_ASM_OFFSETS_H
#define CABAC_I_LOW 0x00
#define CABAC_I_RANGE 0x04
#define CABAC_I_QUEUE 0x08
#define CABAC_I_BYTES_OUTSTANDING 0x0c
#define CABAC_P_START 0x10
#define CABAC_P 0x18
#define CABAC_P_END 0x20
#define CABAC_F8_BITS_ENCODED 0x30
#define CABAC_STATE 0x34
#endif
/*****************************************************************************
* cabac-a.S: aarch64 cabac
*****************************************************************************
* Copyright (C) 2014 x264 project
*
* Authors: Janne Grunau <janne-x264@jannau.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
#include "asm.S"
#include "asm-offsets.h"
// w11 holds x264_cabac_t.i_low
// w12 holds x264_cabac_t.i_range
function x264_cabac_encode_decision_asm, export=1
movrel x8, X(x264_cabac_range_lps)
movrel x9, X(x264_cabac_transition)
add w10, w1, #CABAC_STATE
ldrb w3, [x0, x10] // i_state
ldr w12, [x0, #CABAC_I_RANGE]
and x4, x3, #~1
asr w5, w12, #6
add x8, x8, x4, lsl #1
sub w5, w5, #4
eor w6, w2, w3 // b ^ i_state
ldrb w4, [x8, x5] // i_range_lps
ldr w11, [x0, #CABAC_I_LOW]
sub w12, w12, w4
tbz w6, #0, 1f // (b ^ i_state) & 1
add w11, w11, w12
mov w12, w4
1:
orr w4, w2, w3, lsl #1
ldrb w9, [x9, x4]
strb w9, [x0, x10] // i_state
cabac_encode_renorm:
clz w5, w12
ldr w2, [x0, #CABAC_I_QUEUE]
sub w5, w5, #23
lsl w12, w12, w5
lsl w11, w11, w5
2:
adds w2, w2, w5
str w12, [x0, #CABAC_I_RANGE]
b.lt 0f
cabac_putbyte:
mov w13, #0x400
add w12, w2, #10
lsl w13, w13, w2
asr w4, w11, w12 // out
sub w2, w2, #8
sub w13, w13, #1
subs w5, w4, #0xff
and w11, w11, w13
ldr w6, [x0, #CABAC_I_BYTES_OUTSTANDING]
str w2, [x0, #CABAC_I_QUEUE]
b.ne 1f
add w6, w6, #1
str w11, [x0, #CABAC_I_LOW]
str w6, [x0, #CABAC_I_BYTES_OUTSTANDING]
ret
1:
ldr x7, [x0, #CABAC_P]
asr w5, w4, #8 // carry
ldrb w8, [x7, #-1]
add w8, w8, w5
sub w5, w5, #1
strb w8, [x7, #-1]
cbz w6, 3f
2:
subs w6, w6, #1
strb w5, [x7], #1
b.gt 2b
3:
strb w4, [x7], #1
str wzr, [x0, #CABAC_I_BYTES_OUTSTANDING]
str x7, [x0, #CABAC_P]
0:
str w11, [x0, #CABAC_I_LOW]
str w2, [x0, #CABAC_I_QUEUE]
ret
endfunc
function x264_cabac_encode_bypass_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
ldr w11, [x0, #CABAC_I_LOW]
ldr w2, [x0, #CABAC_I_QUEUE]
and w1, w1, w12
add w11, w1, w11, lsl #1
adds w2, w2, #1
b.ge cabac_putbyte
str w11, [x0, #CABAC_I_LOW]
str w2, [x0, #CABAC_I_QUEUE]
ret
endfunc
function x264_cabac_encode_terminal_asm, export=1
ldr w12, [x0, #CABAC_I_RANGE]
ldr w11, [x0, #CABAC_I_LOW]
sub w12, w12, #2
b cabac_encode_renorm
endfunc
......@@ -72,6 +72,10 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb );
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
#elif defined(ARCH_AARCH64)
#define x264_cabac_encode_decision x264_cabac_encode_decision_asm
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm
#define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm
#else
#define x264_cabac_encode_decision x264_cabac_encode_decision_c
#define x264_cabac_encode_bypass x264_cabac_encode_bypass_c
......
......@@ -2437,6 +2437,8 @@ static void run_cabac_terminal_##cpu( x264_t *h, uint8_t *dst )\
DECL_CABAC(c)
#if HAVE_MMX
DECL_CABAC(asm)
#elif defined(ARCH_AARCH64)
DECL_CABAC(asm)
#else
#define run_cabac_decision_asm run_cabac_decision_c
#define run_cabac_bypass_asm run_cabac_bypass_c
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment