Commit e29fd5c0 authored by Henrik Gramner's avatar Henrik Gramner Committed by Henrik Gramner

Add msac optimizations

 * Eliminate the trailing zero after the CDF probabilities. We can
   reuse the count value as a terminator instead. This reduces the
   size of the CDF context by around 8%.

 * Align the CDF arrays.

 * Various other minor optimizations.
parent a819653e
......@@ -148,7 +148,7 @@ function msac_decode_symbol_adapt4_neon, export=1
add x8, x0, #RNG
ld1_n v0, v1, x1, \sz, \n // cdf
ld1r {v4\sz}, [x8] // rng
movrel x9, coeffs, 32
movrel x9, coeffs, 30
sub x9, x9, x2, lsl #1
ushr_n v2, v3, v0, v1, #6, \sz, \n // cdf >> EC_PROB_SHIFT
str h4, [sp, #14] // store original u = s->rng
......@@ -183,16 +183,24 @@ function msac_decode_symbol_adapt4_neon, export=1
// update_cdf
ldrh w3, [x1, x2, lsl #1] // count = cdf[n_symbols]
movi v5\szb, #0xff
cmp x2, #4 // set C if n_symbols >= 4 (n_symbols > 3)
mov w14, #4
lsr w4, w3, #4 // count >> 4
.if \n == 16
mov w4, #-5
.else
mvn w14, w2
mov w4, #-4
cmn w14, #3 // set C if n_symbols <= 2
.endif
urhadd_n v4, v5, v5, v5, v2, v3, \sz, \n // i >= val ? -1 : 32768
adc w4, w4, w14 // (count >> 4) + (n_symbols > 3) + 4
neg w4, w4 // -rate
.if \n == 16
sub w4, w4, w3, lsr #4 // -((count >> 4) + 5)
.else
lsr w14, w3, #4 // count >> 4
sbc w4, w4, w14 // -((count >> 4) + (n_symbols > 2) + 4)
.endif
sub_n v4, v5, v4, v5, v0, v1, \sz, \n // (32768 - cdf[i]) or (-1 - cdf[i])
dup v6.8h, w4 // -rate
sub w3, w3, w3, lsr #5 // count - (count >= 32)
sub w3, w3, w3, lsr #5 // count - (count == 32)
sub_n v0, v1, v0, v1, v2, v3, \sz, \n // cdf + (i >= val ? 1 : 0)
sshl_n v4, v5, v4, v5, v6, v6, \sz, \n // ({32768,-1} - cdf[i]) >> rate
add w3, w3, #1 // count + (count < 32)
......@@ -224,8 +232,7 @@ L(renorm2):
b.ge 9f
// refill
ldr x3, [x0, #BUF_POS]
ldr x4, [x0, #BUF_END]
ldp x3, x4, [x0] // BUF_POS, BUF_END
add x5, x3, #8
cmp x5, x4
b.gt 2f
......
......@@ -34,6 +34,7 @@
#include "common/intops.h"
#include "src/cdf.h"
#include "src/tables.h"
#define AOM_ICDF(x) (32768-(x))
......@@ -752,12 +753,11 @@ static const CdfMvComponent default_mv_component_cdf = {
}
};
static const uint16_t default_mv_joint_cdf[N_MV_JOINTS + 1] = {
static const uint16_t ALIGN(default_mv_joint_cdf[N_MV_JOINTS], 8) = {
AOM_CDF4(4096, 11264, 19328)
};
static const uint16_t default_kf_y_mode_cdf[5][5][N_INTRA_PRED_MODES + 1 + 2] = {
static const uint16_t ALIGN(default_kf_y_mode_cdf[5][5][N_INTRA_PRED_MODES + 3], 32) = {
{
{ AOM_CDF13(15588, 17027, 19338, 20218, 20682, 21110, 21825, 23244,
24189, 28165, 29093, 30466) },
......@@ -3927,25 +3927,18 @@ void dav1d_cdf_thread_update(const Dav1dFrameHeader *const hdr,
CdfContext *const dst,
const CdfContext *const src)
{
int i, j, k, l;
#define update_cdf_1d(n1d, name) \
do { \
memcpy(dst->name, src->name, sizeof(*dst->name) * n1d); \
assert(!dst->name[n1d - 1]); \
memcpy(dst->name, src->name, sizeof(dst->name)); \
dst->name[n1d] = 0; \
} while (0)
#define update_cdf_2d(n1d, n2d, name) \
for (j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
for (int j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
#define update_cdf_3d(n1d, n2d, n3d, name) \
for (k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
for (int k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
#define update_cdf_4d(n1d, n2d, n3d, n4d, name) \
for (l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
#define update_cdf_6d(n1d, n2d, n3d, n4d, n5d, n6d, name) \
for (n = 0; n < (n1d); n++) \
for (m = 0; m < (n2d); m++) \
update_cdf_4d(n3d, n4d, n5d, n6d, name[n][m])
for (int l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
#define update_bit_0d(name) \
do { \
......@@ -3954,65 +3947,57 @@ void dav1d_cdf_thread_update(const Dav1dFrameHeader *const hdr,
} while (0)
#define update_bit_1d(n1d, name) \
for (i = 0; i < (n1d); i++) update_bit_0d(name[i])
for (int i = 0; i < (n1d); i++) update_bit_0d(name[i])
#define update_bit_2d(n1d, n2d, name) \
for (j = 0; j < (n1d); j++) update_bit_1d(n2d, name[j])
for (int j = 0; j < (n1d); j++) update_bit_1d(n2d, name[j])
#define update_bit_3d(n1d, n2d, n3d, name) \
for (k = 0; k < (n1d); k++) update_bit_2d(n2d, n3d, name[k])
for (int k = 0; k < (n1d); k++) update_bit_2d(n2d, n3d, name[k])
update_bit_1d(N_BS_SIZES, m.use_filter_intra);
update_cdf_1d(5, m.filter_intra);
update_cdf_3d(2, N_INTRA_PRED_MODES, N_UV_INTRA_PRED_MODES - !k, m.uv_mode);
update_cdf_2d(8, 7, m.angle_delta);
update_cdf_3d(N_TX_SIZES - 1, 3, imin(k + 2, 3), m.txsz);
update_cdf_3d(2, N_INTRA_PRED_MODES, 7, m.txtp_intra1);
update_cdf_3d(3, N_INTRA_PRED_MODES, 5, m.txtp_intra2);
update_cdf_1d(4, m.filter_intra);
update_cdf_3d(2, N_INTRA_PRED_MODES, N_UV_INTRA_PRED_MODES - 1 - !k, m.uv_mode);
update_cdf_2d(8, 6, m.angle_delta);
update_cdf_3d(N_TX_SIZES - 1, 3, imin(k + 1, 2), m.txsz);
update_cdf_3d(2, N_INTRA_PRED_MODES, 6, m.txtp_intra1);
update_cdf_3d(3, N_INTRA_PRED_MODES, 4, m.txtp_intra2);
update_bit_1d(3, m.skip);
static const uint8_t n_partitions[N_BL_LEVELS] = {
[BL_128X128] = N_PARTITIONS - 2,
[BL_64X64] = N_PARTITIONS,
[BL_32X32] = N_PARTITIONS,
[BL_16X16] = N_PARTITIONS,
[BL_8X8] = N_SUB8X8_PARTITIONS,
};
update_cdf_3d(N_BL_LEVELS, 4, n_partitions[k], m.partition);
update_cdf_3d(N_BL_LEVELS, 4, dav1d_partition_type_count[k], m.partition);
update_bit_2d(N_TX_SIZES, 13, coef.skip);
update_cdf_3d(2, 2, 5, coef.eob_bin_16);
update_cdf_3d(2, 2, 6, coef.eob_bin_32);
update_cdf_3d(2, 2, 7, coef.eob_bin_64);
update_cdf_3d(2, 2, 8, coef.eob_bin_128);
update_cdf_3d(2, 2, 9, coef.eob_bin_256);
update_cdf_2d(2, 10, coef.eob_bin_512);
update_cdf_2d(2, 11, coef.eob_bin_1024);
update_cdf_3d(2, 2, 4, coef.eob_bin_16);
update_cdf_3d(2, 2, 5, coef.eob_bin_32);
update_cdf_3d(2, 2, 6, coef.eob_bin_64);
update_cdf_3d(2, 2, 7, coef.eob_bin_128);
update_cdf_3d(2, 2, 8, coef.eob_bin_256);
update_cdf_2d(2, 9, coef.eob_bin_512);
update_cdf_2d(2, 10, coef.eob_bin_1024);
update_bit_3d(N_TX_SIZES, 2, 11 /*22*/, coef.eob_hi_bit);
update_cdf_4d(N_TX_SIZES, 2, 4, 3, coef.eob_base_tok);
update_cdf_4d(N_TX_SIZES, 2, 41 /*42*/, 4, coef.base_tok);
update_cdf_4d(N_TX_SIZES, 2, 4, 2, coef.eob_base_tok);
update_cdf_4d(N_TX_SIZES, 2, 41 /*42*/, 3, coef.base_tok);
update_bit_2d(2, 3, coef.dc_sign);
update_cdf_4d(4, 2, 21, 4, coef.br_tok);
update_cdf_2d(3, DAV1D_MAX_SEGMENTS, m.seg_id);
update_cdf_1d(8, m.cfl_sign);
update_cdf_2d(6, 16, m.cfl_alpha);
update_cdf_4d(4, 2, 21, 3, coef.br_tok);
update_cdf_2d(3, DAV1D_MAX_SEGMENTS - 1, m.seg_id);
update_cdf_1d(7, m.cfl_sign);
update_cdf_2d(6, 15, m.cfl_alpha);
update_bit_0d(m.restore_wiener);
update_bit_0d(m.restore_sgrproj);
update_cdf_1d(3, m.restore_switchable);
update_cdf_1d(4, m.delta_q);
update_cdf_2d(5, 4, m.delta_lf);
update_cdf_1d(2, m.restore_switchable);
update_cdf_1d(3, m.delta_q);
update_cdf_2d(5, 3, m.delta_lf);
update_bit_2d(7, 3, m.pal_y);
update_bit_1d(2, m.pal_uv);
update_cdf_3d(2, 7, 7, m.pal_sz);
update_cdf_4d(2, 7, 5, k + 2, m.color_map);
update_cdf_3d(2, 7, 6, m.pal_sz);
update_cdf_4d(2, 7, 5, k + 1, m.color_map);
update_bit_2d(7, 3, m.txpart);
update_cdf_2d(2, 16, m.txtp_inter1);
update_cdf_1d(12, m.txtp_inter2);
update_cdf_2d(2, 15, m.txtp_inter1);
update_cdf_1d(11, m.txtp_inter2);
update_bit_1d(4, m.txtp_inter3);
if (!(hdr->frame_type & 1)) {
update_bit_0d(m.intrabc);
update_cdf_1d(N_MV_JOINTS, dmv.joint);
for (k = 0; k < 2; k++) {
update_cdf_1d(11, dmv.comp[k].classes);
update_cdf_1d(N_MV_JOINTS - 1, dmv.joint);
for (int k = 0; k < 2; k++) {
update_cdf_1d(10, dmv.comp[k].classes);
update_bit_0d(dmv.comp[k].class0);
update_bit_1d(10, dmv.comp[k].classN);
update_bit_0d(dmv.comp[k].sign);
......@@ -4021,20 +4006,20 @@ void dav1d_cdf_thread_update(const Dav1dFrameHeader *const hdr,
}
update_bit_1d(3, m.skip_mode);
update_cdf_2d(4, N_INTRA_PRED_MODES, m.y_mode);
update_cdf_3d(2, 8, DAV1D_N_SWITCHABLE_FILTERS, m.filter);
update_cdf_2d(4, N_INTRA_PRED_MODES - 1, m.y_mode);
update_cdf_3d(2, 8, DAV1D_N_SWITCHABLE_FILTERS - 1, m.filter);
update_bit_1d(6, m.newmv_mode);
update_bit_1d(2, m.globalmv_mode);
update_bit_1d(6, m.refmv_mode);
update_bit_1d(3, m.drl_bit);
update_cdf_2d(8, N_COMP_INTER_PRED_MODES, m.comp_inter_mode);
update_cdf_2d(8, N_COMP_INTER_PRED_MODES - 1, m.comp_inter_mode);
update_bit_1d(4, m.intra);
update_bit_1d(5, m.comp);
update_bit_1d(5, m.comp_dir);
update_bit_1d(6, m.jnt_comp);
update_bit_1d(6, m.mask_comp);
update_bit_1d(9, m.wedge_comp);
update_cdf_2d(9, 16, m.wedge_idx);
update_cdf_2d(9, 15, m.wedge_idx);
update_bit_2d(6, 3, m.ref);
update_bit_2d(3, 3, m.comp_fwd_ref);
update_bit_2d(2, 3, m.comp_bwd_ref);
......@@ -4042,17 +4027,17 @@ void dav1d_cdf_thread_update(const Dav1dFrameHeader *const hdr,
update_bit_1d(3, m.seg_pred);
update_bit_1d(4, m.interintra);
update_bit_1d(7, m.interintra_wedge);
update_cdf_2d(4, 4, m.interintra_mode);
update_cdf_2d(N_BS_SIZES, 3, m.motion_mode);
update_cdf_2d(4, 3, m.interintra_mode);
update_cdf_2d(N_BS_SIZES, 2, m.motion_mode);
update_bit_1d(N_BS_SIZES, m.obmc);
update_cdf_1d(N_MV_JOINTS, mv.joint);
for (k = 0; k < 2; k++) {
update_cdf_1d(11, mv.comp[k].classes);
update_cdf_1d(N_MV_JOINTS - 1, mv.joint);
for (int k = 0; k < 2; k++) {
update_cdf_1d(10, mv.comp[k].classes);
update_bit_0d(mv.comp[k].class0);
update_bit_1d(10, mv.comp[k].classN);
update_cdf_2d(2, 4, mv.comp[k].class0_fp);
update_cdf_1d(4, mv.comp[k].classN_fp);
update_cdf_2d(2, 3, mv.comp[k].class0_fp);
update_cdf_1d(3, mv.comp[k].classN_fp);
update_bit_0d(mv.comp[k].class0_hp);
update_bit_0d(mv.comp[k].classN_hp);
update_bit_0d(mv.comp[k].sign);
......@@ -4062,7 +4047,7 @@ void dav1d_cdf_thread_update(const Dav1dFrameHeader *const hdr,
/*
* CDF threading wrappers.
*/
static inline int get_qcat_idx(int q) {
static inline int get_qcat_idx(const int q) {
if (q <= 20) return 0;
if (q <= 60) return 1;
if (q <= 120) return 2;
......@@ -4089,7 +4074,7 @@ void dav1d_cdf_thread_copy(CdfContext *const dst, const CdfThreadContext *const
}
int dav1d_cdf_thread_alloc(CdfThreadContext *const cdf,
struct thread_data *const t)
struct thread_data *const t)
{
cdf->ref = dav1d_ref_create(sizeof(CdfContext) +
(t != NULL) * sizeof(atomic_uint));
......
......@@ -37,94 +37,94 @@
/* Buffers padded to [8] or [16] for SIMD where needed. */
typedef struct CdfModeContext {
uint16_t y_mode[4][N_INTRA_PRED_MODES + 1 + 2];
uint16_t use_filter_intra[N_BS_SIZES][2];
uint16_t filter_intra[5 + 1];
uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 1 + 1];
uint16_t angle_delta[8][8];
uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1];
uint16_t newmv_mode[6][2];
uint16_t globalmv_mode[2][2];
uint16_t refmv_mode[6][2];
uint16_t drl_bit[3][2];
uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES + 1];
uint16_t intra[4][2];
uint16_t comp[5][2];
uint16_t comp_dir[5][2];
uint16_t jnt_comp[6][2];
uint16_t mask_comp[6][2];
uint16_t wedge_comp[9][2];
uint16_t wedge_idx[9][16 + 1];
uint16_t interintra[7][2];
uint16_t interintra_mode[4][5];
uint16_t interintra_wedge[7][2];
uint16_t ref[6][3][2];
uint16_t comp_fwd_ref[3][3][2];
uint16_t comp_bwd_ref[2][3][2];
uint16_t comp_uni_ref[3][3][2];
uint16_t txsz[N_TX_SIZES - 1][3][4];
uint16_t txpart[7][3][2];
uint16_t txtp_inter1[2][16 + 1];
uint16_t txtp_inter2[12 + 1 + 3];
uint16_t txtp_inter3[4][2];
uint16_t txtp_intra1[2][N_INTRA_PRED_MODES][7 + 1];
uint16_t txtp_intra2[3][N_INTRA_PRED_MODES][5 + 1 + 2];
uint16_t skip[3][2];
uint16_t skip_mode[3][2];
uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 1 + 5];
uint16_t seg_pred[3][2];
uint16_t seg_id[3][DAV1D_MAX_SEGMENTS + 1];
uint16_t cfl_sign[8 + 1];
uint16_t cfl_alpha[6][16 + 1];
uint16_t restore_wiener[2];
uint16_t restore_sgrproj[2];
uint16_t restore_switchable[3 + 1];
uint16_t delta_q[4 + 1];
uint16_t delta_lf[5][4 + 1];
uint16_t obmc[N_BS_SIZES][2];
uint16_t motion_mode[N_BS_SIZES][3 + 1];
uint16_t pal_y[7][3][2];
uint16_t pal_uv[2][2];
uint16_t pal_sz[2][7][7 + 1];
uint16_t color_map[2][7][5][8 + 1];
uint16_t intrabc[2];
ALIGN(uint16_t y_mode[4][N_INTRA_PRED_MODES + 3], 32);
ALIGN(uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 2], 32);
ALIGN(uint16_t wedge_idx[9][16], 32);
ALIGN(uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 6], 32);
ALIGN(uint16_t cfl_alpha[6][16], 32);
ALIGN(uint16_t txtp_inter1[2][16], 32);
ALIGN(uint16_t txtp_inter2[12 + 4], 32);
ALIGN(uint16_t txtp_intra1[2][N_INTRA_PRED_MODES][7 + 1], 16);
ALIGN(uint16_t txtp_intra2[3][N_INTRA_PRED_MODES][5 + 3], 16);
ALIGN(uint16_t cfl_sign[8], 16);
ALIGN(uint16_t angle_delta[8][8], 16);
ALIGN(uint16_t filter_intra[5 + 3], 16);
ALIGN(uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES], 16);
ALIGN(uint16_t seg_id[3][DAV1D_MAX_SEGMENTS], 16);
ALIGN(uint16_t pal_sz[2][7][7 + 1], 16);
ALIGN(uint16_t color_map[2][7][5][8], 16);
ALIGN(uint16_t filter[2][8][DAV1D_N_SWITCHABLE_FILTERS + 1], 8);
ALIGN(uint16_t txsz[N_TX_SIZES - 1][3][4], 8);
ALIGN(uint16_t motion_mode[N_BS_SIZES][3 + 1], 8);
ALIGN(uint16_t delta_q[4], 8);
ALIGN(uint16_t delta_lf[5][4], 8);
ALIGN(uint16_t interintra_mode[4][4], 8);
ALIGN(uint16_t restore_switchable[3 + 1], 8);
ALIGN(uint16_t restore_wiener[2], 4);
ALIGN(uint16_t restore_sgrproj[2], 4);
ALIGN(uint16_t interintra[7][2], 4);
ALIGN(uint16_t interintra_wedge[7][2], 4);
ALIGN(uint16_t txtp_inter3[4][2], 4);
ALIGN(uint16_t use_filter_intra[N_BS_SIZES][2], 4);
ALIGN(uint16_t newmv_mode[6][2], 4);
ALIGN(uint16_t globalmv_mode[2][2], 4);
ALIGN(uint16_t refmv_mode[6][2], 4);
ALIGN(uint16_t drl_bit[3][2], 4);
ALIGN(uint16_t intra[4][2], 4);
ALIGN(uint16_t comp[5][2], 4);
ALIGN(uint16_t comp_dir[5][2], 4);
ALIGN(uint16_t jnt_comp[6][2], 4);
ALIGN(uint16_t mask_comp[6][2], 4);
ALIGN(uint16_t wedge_comp[9][2], 4);
ALIGN(uint16_t ref[6][3][2], 4);
ALIGN(uint16_t comp_fwd_ref[3][3][2], 4);
ALIGN(uint16_t comp_bwd_ref[2][3][2], 4);
ALIGN(uint16_t comp_uni_ref[3][3][2], 4);
ALIGN(uint16_t txpart[7][3][2], 4);
ALIGN(uint16_t skip[3][2], 4);
ALIGN(uint16_t skip_mode[3][2], 4);
ALIGN(uint16_t seg_pred[3][2], 4);
ALIGN(uint16_t obmc[N_BS_SIZES][2], 4);
ALIGN(uint16_t pal_y[7][3][2], 4);
ALIGN(uint16_t pal_uv[2][2], 4);
ALIGN(uint16_t intrabc[2], 4);
} CdfModeContext;
typedef struct CdfCoefContext {
uint16_t skip[N_TX_SIZES][13][2];
uint16_t eob_bin_16[2][2][6];
uint16_t eob_bin_32[2][2][7 + 1];
uint16_t eob_bin_64[2][2][8];
uint16_t eob_bin_128[2][2][9];
uint16_t eob_bin_256[2][2][10 + 6];
uint16_t eob_bin_512[2][11 + 5];
uint16_t eob_bin_1024[2][12 + 4];
uint16_t eob_hi_bit[N_TX_SIZES][2][11 /*22*/][2];
uint16_t eob_base_tok[N_TX_SIZES][2][4][4];
uint16_t base_tok[N_TX_SIZES][2][41][5];
uint16_t dc_sign[2][3][2];
uint16_t br_tok[4 /*5*/][2][21][5];
ALIGN(uint16_t eob_bin_16[2][2][5 + 3], 16);
ALIGN(uint16_t eob_bin_32[2][2][6 + 2], 16);
ALIGN(uint16_t eob_bin_64[2][2][7 + 1], 16);
ALIGN(uint16_t eob_bin_128[2][2][8 + 0], 16);
ALIGN(uint16_t eob_bin_256[2][2][9 + 7], 32);
ALIGN(uint16_t eob_bin_512[2][10 + 6], 32);
ALIGN(uint16_t eob_bin_1024[2][11 + 5], 32);
ALIGN(uint16_t eob_base_tok[N_TX_SIZES][2][4][4], 8);
ALIGN(uint16_t base_tok[N_TX_SIZES][2][41][4], 8);
ALIGN(uint16_t br_tok[4 /*5*/][2][21][4], 8);
ALIGN(uint16_t eob_hi_bit[N_TX_SIZES][2][11 /*22*/][2], 4);
ALIGN(uint16_t skip[N_TX_SIZES][13][2], 4);
ALIGN(uint16_t dc_sign[2][3][2], 4);
} CdfCoefContext;
typedef struct CdfMvComponent {
uint16_t classes[11 + 1 + 4];
uint16_t class0[2];
uint16_t classN[10][2];
uint16_t class0_fp[2][4 + 1];
uint16_t classN_fp[4 + 1];
uint16_t class0_hp[2];
uint16_t classN_hp[2];
uint16_t sign[2];
ALIGN(uint16_t classes[11 + 5], 32);
ALIGN(uint16_t class0_fp[2][4], 8);
ALIGN(uint16_t classN_fp[4], 8);
ALIGN(uint16_t class0_hp[2], 4);
ALIGN(uint16_t classN_hp[2], 4);
ALIGN(uint16_t class0[2], 4);
ALIGN(uint16_t classN[10][2], 4);
ALIGN(uint16_t sign[2], 4);
} CdfMvComponent;
typedef struct CdfMvContext {
CdfMvComponent comp[2];
uint16_t joint[N_MV_JOINTS + 1];
ALIGN(uint16_t joint[N_MV_JOINTS], 8);
} CdfMvContext;
typedef struct CdfContext {
CdfModeContext m;
uint16_t kfym[5][5][N_INTRA_PRED_MODES + 1 + 2];
ALIGN(uint16_t kfym[5][5][N_INTRA_PRED_MODES + 3], 32);
CdfCoefContext coef;
CdfMvContext mv, dmv;
} CdfContext;
......
......@@ -81,14 +81,14 @@ static int read_mv_component_diff(Dav1dTileContext *const t,
const int have_hp = f->frame_hdr->hp;
const int sign = dav1d_msac_decode_bool_adapt(&ts->msac, mv_comp->sign);
const int cl = dav1d_msac_decode_symbol_adapt16(&ts->msac,
mv_comp->classes, 11);
mv_comp->classes, 10);
int up, fp, hp;
if (!cl) {
up = dav1d_msac_decode_bool_adapt(&ts->msac, mv_comp->class0);
if (have_fp) {
fp = dav1d_msac_decode_symbol_adapt4(&ts->msac,
mv_comp->class0_fp[up], 4);
mv_comp->class0_fp[up], 3);
hp = have_hp ? dav1d_msac_decode_bool_adapt(&ts->msac,
mv_comp->class0_hp) : 1;
} else {
......@@ -102,7 +102,7 @@ static int read_mv_component_diff(Dav1dTileContext *const t,
mv_comp->classN[n]) << n;
if (have_fp) {
fp = dav1d_msac_decode_symbol_adapt4(&ts->msac,
mv_comp->classN_fp, 4);
mv_comp->classN_fp, 3);
hp = have_hp ? dav1d_msac_decode_bool_adapt(&ts->msac,
mv_comp->classN_hp) : 1;
} else {
......@@ -120,7 +120,7 @@ static void read_mv_residual(Dav1dTileContext *const t, mv *const ref_mv,
CdfMvContext *const mv_cdf, const int have_fp)
{
switch (dav1d_msac_decode_symbol_adapt4(&t->ts->msac, t->ts->cdf.mv.joint,
N_MV_JOINTS))
N_MV_JOINTS - 1))
{
case MV_JOINT_HV:
ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
......@@ -380,7 +380,7 @@ static void read_pal_plane(Dav1dTileContext *const t, Av1Block *const b,
Dav1dTileState *const ts = t->ts;
const Dav1dFrameContext *const f = t->f;
const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.pal_sz[pl][sz_ctx], 7) + 2;
ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
uint16_t cache[16], used_cache[8];
int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
int n_cache = 0;
......@@ -586,7 +586,7 @@ static void read_pal_indices(Dav1dTileContext *const t,
Dav1dTileState *const ts = t->ts;
const ptrdiff_t stride = bw4 * 4;
pal_idx[0] = dav1d_msac_decode_uniform(&ts->msac, b->pal_sz[pl]);
uint16_t (*const color_map_cdf)[8 + 1] =
uint16_t (*const color_map_cdf)[8] =
ts->cdf.m.color_map[pl][b->pal_sz[pl] - 2];
uint8_t (*const order)[8] = t->scratch.pal_order;
uint8_t *const ctx = t->scratch.pal_ctx;
......@@ -597,7 +597,7 @@ static void read_pal_indices(Dav1dTileContext *const t,
order_palette(pal_idx, stride, i, first, last, order, ctx);
for (int j = first, m = 0; j >= last; j--, m++) {
const int color_idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
color_map_cdf[ctx[m]], b->pal_sz[pl]);
color_map_cdf[ctx[m]], b->pal_sz[pl] - 1);
pal_idx[(i - j) * stride + j] = order[m][color_idx];
}
}
......@@ -813,7 +813,7 @@ static int decode_b(Dav1dTileContext *const t,
&seg_ctx, f->cur_segmap, f->b4_stride);
const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.seg_id[seg_ctx],
DAV1D_MAX_SEGMENTS);
DAV1D_MAX_SEGMENTS - 1);
const unsigned last_active_seg_id =
f->frame_hdr->segmentation.seg_data.last_active_segid;
b->seg_id = neg_deinterleave(diff, pred_seg_id,
......@@ -885,7 +885,7 @@ static int decode_b(Dav1dTileContext *const t,
} else {
const unsigned diff = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.seg_id[seg_ctx],
DAV1D_MAX_SEGMENTS);
DAV1D_MAX_SEGMENTS - 1);
const unsigned last_active_seg_id =
f->frame_hdr->segmentation.seg_data.last_active_segid;
b->seg_id = neg_deinterleave(diff, pred_seg_id,
......@@ -933,7 +933,7 @@ static int decode_b(Dav1dTileContext *const t,
if (have_delta_q) {
int delta_q = dav1d_msac_decode_symbol_adapt4(&ts->msac,
ts->cdf.m.delta_q, 4);
ts->cdf.m.delta_q, 3);
if (delta_q == 3) {
const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
delta_q = dav1d_msac_decode_bools(&ts->msac, n_bits) +
......@@ -954,7 +954,7 @@ static int decode_b(Dav1dTileContext *const t,
for (int i = 0; i < n_lfs; i++) {
int delta_lf = dav1d_msac_decode_symbol_adapt4(&ts->msac,
ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 4);
ts->cdf.m.delta_lf[i + f->frame_hdr->delta.lf.multi], 3);
if (delta_lf == 3) {
const int n_bits = 1 + dav1d_msac_decode_bools(&ts->msac, 3);
delta_lf = dav1d_msac_decode_bools(&ts->msac, n_bits) +
......@@ -1019,7 +1019,7 @@ static int decode_b(Dav1dTileContext *const t,
ts->cdf.kfym[dav1d_intra_mode_context[t->a->mode[bx4]]]
[dav1d_intra_mode_context[t->l.mode[by4]]];
b->y_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, ymode_cdf,
N_INTRA_PRED_MODES);
N_INTRA_PRED_MODES - 1);
if (DEBUG_BLOCK_INFO)
printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);
......@@ -1028,7 +1028,7 @@ static int decode_b(Dav1dTileContext *const t,
b->y_mode <= VERT_LEFT_PRED)
{
uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 7);
const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
b->y_angle = angle - 3;
} else {
b->y_angle = 0;
......@@ -1039,20 +1039,20 @@ static int decode_b(Dav1dTileContext *const t,
cbw4 == 1 && cbh4 == 1 : !!(cfl_allowed_mask & (1 << bs));
uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
b->uv_mode = dav1d_msac_decode_symbol_adapt16(&ts->msac, uvmode_cdf,
N_UV_INTRA_PRED_MODES - !cfl_allowed);
N_UV_INTRA_PRED_MODES - 1 - !cfl_allowed);
if (DEBUG_BLOCK_INFO)
printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);
if (b->uv_mode == CFL_PRED) {
#define SIGN(a) (!!(a) + ((a) > 0))
const int sign = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.cfl_sign, 8) + 1;
ts->cdf.m.cfl_sign, 7) + 1;
const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
assert(sign_u == sign / 3);
if (sign_u) {
const int ctx = (sign_u == 2) * 3 + sign_v;
b->cfl_alpha[0] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
ts->cdf.m.cfl_alpha[ctx], 16) + 1;
ts->cdf.m.cfl_alpha[ctx], 15) + 1;
if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
} else {
b->cfl_alpha[0] = 0;
......@@ -1060,7 +1060,7 @@ static int decode_b(Dav1dTileContext *const t,
if (sign_v) {
const int ctx = (sign_v == 2) * 3 + sign_u;
b->cfl_alpha[1] = dav1d_msac_decode_symbol_adapt16(&ts->msac,
ts->cdf.m.cfl_alpha[ctx], 16) + 1;
ts->cdf.m.cfl_alpha[ctx], 15) + 1;
if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
} else {
b->cfl_alpha[1] = 0;
......@@ -1073,7 +1073,7 @@ static int decode_b(Dav1dTileContext *const t,
b->uv_mode <= VERT_LEFT_PRED)
{
uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 7);
const int angle = dav1d_msac_decode_symbol_adapt8(&ts->msac, acdf, 6);
b->uv_angle = angle - 3;
} else {
b->uv_angle = 0;
......@@ -1114,7 +1114,7 @@ static int decode_b(Dav1dTileContext *const t,
if (is_filter) {
b->y_mode = FILTER_PRED;
b->y_angle = dav1d_msac_decode_symbol_adapt4(&ts->msac,
ts->cdf.m.filter_intra, 5);
ts->cdf.m.filter_intra, 4);
}
if (DEBUG_BLOCK_INFO)
printf("Post-filterintramode[%d/%d]: r=%d\n",
......@@ -1157,7 +1157,7 @@ static int decode_b(Dav1dTileContext *const t,
const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
int depth = dav1d_msac_decode_symbol_adapt4(&ts->msac, tx_cdf,
imin(t_dim->max + 1, 3));
imin(t_dim->max, 2));
while (depth--) {
b->tx = t_dim->sub;
......@@ -1479,7 +1479,7 @@ static int decode_b(Dav1dTileContext *const t,
b->inter_mode = dav1d_msac_decode_symbol_adapt8(&ts->msac,
ts->cdf.m.comp_inter_mode[ctx],
N_COMP_INTER_PRED_MODES);
N_COMP_INTER_PRED_MODES - 1);
if (DEBUG_BLOCK_INFO)
printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
b->inter_mode, ctx, n_mvs, ts->msac.rng);
......@@ -1587,7 +1587,7 @@ static int decode_b(Dav1dTileContext *const t,
ts->cdf.m.wedge_comp[ctx]);
if (b->comp_type == COMP_INTER_WEDGE)
b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
ts->cdf.m.wedge_idx[ctx], 16);
ts->cdf.m.wedge_idx[ctx], 15);
} else {
b->comp_type = COMP_INTER_SEG;
}
......@@ -1742,14 +1742,14 @@ static int decode_b(Dav1dTileContext *const t,
{
b->interintra_mode = dav1d_msac_decode_symbol_adapt4(&ts->msac,
ts->cdf.m.interintra_mode[ii_sz_grp],
N_INTER_INTRA_PRED_MODES);
N_INTER_INTRA_PRED_MODES - 1);
const int wedge_ctx = dav1d_wedge_ctx_lut[bs];
b->interintra_type = INTER_INTRA_BLEND +
dav1d_msac_decode_bool_adapt(&ts->msac,
ts->cdf.m.interintra_wedge[wedge_ctx]);
if (b->interintra_type == INTER_INTRA_WEDGE)
b->wedge_idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
ts->cdf.m.wedge_idx[wedge_ctx], 16);
ts->cdf.m.wedge_idx[wedge_ctx], 15);
} else {
b->interintra_type = INTER_INTRA_NONE;
}
......@@ -1782,7 +1782,7 @@ static int decode_b(Dav1dTileContext *const t,
b->motion_mode = allow_warp ?
dav1d_msac_decode_symbol_adapt4(&ts->msac,
ts->cdf.m.motion_mode[bs], 3) :
ts->cdf.m.motion_mode[bs], 2) :
dav1d_msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]);
if (b->motion_mode == MM_WARP) {
has_subpel_filter = 0;
......@@ -1822,7 +1822,7 @@ static int decode_b(Dav1dTileContext *const t,
by4, bx4);
filter[0] = dav1d_msac_decode_symbol_adapt4(&ts->msac,
ts->cdf.m.filter[0][ctx1],
DAV1D_N_SWITCHABLE_FILTERS);
DAV1D_N_SWITCHABLE_FILTERS - 1);
if (f->seq_hdr->dual_filter) {
const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1,