Commit 0fdee4da authored by Ronald S. Bultje's avatar Ronald S. Bultje

Add support for super-res

Fixes #172.
parent d27598e4
...@@ -89,7 +89,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -89,7 +89,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
const int sbsz = 16; const int sbsz = 16;
const int sb64w = f->sb128w << 1; const int sb64w = f->sb128w << 1;
const int damping = f->frame_hdr.cdef.damping + BITDEPTH - 8; const int damping = f->frame_hdr.cdef.damping + BITDEPTH - 8;
const enum Dav1dPixelLayout layout = f->cur.p.p.layout; const enum Dav1dPixelLayout layout = f->cur.p.layout;
const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout; const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400; const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420; const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
...@@ -106,7 +106,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -106,7 +106,7 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
if (edges & HAVE_BOTTOM) { if (edges & HAVE_BOTTOM) {
// backup pre-filter data for next iteration // backup pre-filter data for next iteration
backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.p.stride, backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.stride,
8, f->bw * 4, layout); 8, f->bw * 4, layout);
} }
...@@ -148,11 +148,11 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -148,11 +148,11 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
if (last_skip && edges & HAVE_LEFT) { if (last_skip && edges & HAVE_LEFT) {
// we didn't backup the prefilter data because it wasn't // we didn't backup the prefilter data because it wasn't
// there, so do it here instead // there, so do it here instead
backup2x8(lr_bak[bit], bptrs, f->cur.p.stride, 0, layout); backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout);
} }
if (edges & HAVE_RIGHT) { if (edges & HAVE_RIGHT) {
// backup pre-filter data for next iteration // backup pre-filter data for next iteration
backup2x8(lr_bak[!bit], bptrs, f->cur.p.stride, 8, layout); backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout);
} }
// the actual filter // the actual filter
...@@ -165,10 +165,10 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -165,10 +165,10 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
uv_sec_lvl += uv_sec_lvl == 3; uv_sec_lvl += uv_sec_lvl == 3;
uv_sec_lvl <<= BITDEPTH - 8; uv_sec_lvl <<= BITDEPTH - 8;
unsigned variance; unsigned variance;
const int dir = dsp->cdef.dir(bptrs[0], f->cur.p.stride[0], const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],
&variance); &variance);
if (y_lvl) { if (y_lvl) {
dsp->cdef.fb[0](bptrs[0], f->cur.p.stride[0], lr_bak[bit][0], dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],
(pixel *const [2]) { (pixel *const [2]) {
&f->lf.cdef_line_ptr[tf][0][0][bx * 4], &f->lf.cdef_line_ptr[tf][0][0][bx * 4],
&f->lf.cdef_line_ptr[tf][0][1][bx * 4], &f->lf.cdef_line_ptr[tf][0][1][bx * 4],
...@@ -179,10 +179,10 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -179,10 +179,10 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
} }
if (uv_lvl && has_chroma) { if (uv_lvl && has_chroma) {
const int uvdir = const int uvdir =
f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir : f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
((uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir]; ((uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir];
for (int pl = 1; pl <= 2; pl++) { for (int pl = 1; pl <= 2; pl++) {
dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.p.stride[1], dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],
lr_bak[bit][pl], lr_bak[bit][pl],
(pixel *const [2]) { (pixel *const [2]) {
&f->lf.cdef_line_ptr[tf][pl][0][bx * 4 >> ss_hor], &f->lf.cdef_line_ptr[tf][pl][0][bx * 4 >> ss_hor],
...@@ -209,9 +209,9 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f, ...@@ -209,9 +209,9 @@ void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
iptrs[2] += sbsz * 4 >> ss_hor; iptrs[2] += sbsz * 4 >> ss_hor;
} }
ptrs[0] += 8 * PXSTRIDE(f->cur.p.stride[0]); ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);
ptrs[1] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver; ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
ptrs[2] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver; ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;
f->lf.top_pre_cdef_toggle ^= 1; f->lf.top_pre_cdef_toggle ^= 1;
} }
} }
...@@ -422,11 +422,11 @@ static void read_pal_plane(Dav1dTileContext *const t, Av1Block *const b, ...@@ -422,11 +422,11 @@ static void read_pal_plane(Dav1dTileContext *const t, Av1Block *const b,
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) + f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][pl] : t->pal[pl]; ((t->bx >> 1) + (t->by & 1))][pl] : t->pal[pl];
if (i < pal_sz) { if (i < pal_sz) {
int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc); int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
if (i < pal_sz) { if (i < pal_sz) {
int bits = f->cur.p.p.bpc - 3 + msac_decode_bools(&ts->msac, 2); int bits = f->cur.p.bpc - 3 + msac_decode_bools(&ts->msac, 2);
const int max = (1 << f->cur.p.p.bpc) - 1; const int max = (1 << f->cur.p.bpc) - 1;
do { do {
const int delta = msac_decode_bools(&ts->msac, bits); const int delta = msac_decode_bools(&ts->msac, bits);
...@@ -478,9 +478,9 @@ static void read_pal_uv(Dav1dTileContext *const t, Av1Block *const b, ...@@ -478,9 +478,9 @@ static void read_pal_uv(Dav1dTileContext *const t, Av1Block *const b,
f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) + f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
((t->bx >> 1) + (t->by & 1))][2] : t->pal[2]; ((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];
if (msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) { if (msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) {
const int bits = f->cur.p.p.bpc - 4 + msac_decode_bools(&ts->msac, 2); const int bits = f->cur.p.bpc - 4 + msac_decode_bools(&ts->msac, 2);
int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc); int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
const int max = (1 << f->cur.p.p.bpc) - 1; const int max = (1 << f->cur.p.bpc) - 1;
for (int i = 1; i < b->pal_sz[1]; i++) { for (int i = 1; i < b->pal_sz[1]; i++) {
int delta = msac_decode_bools(&ts->msac, bits); int delta = msac_decode_bools(&ts->msac, bits);
if (delta && msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) delta = -delta; if (delta && msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) delta = -delta;
...@@ -488,7 +488,7 @@ static void read_pal_uv(Dav1dTileContext *const t, Av1Block *const b, ...@@ -488,7 +488,7 @@ static void read_pal_uv(Dav1dTileContext *const t, Av1Block *const b,
} }
} else { } else {
for (int i = 0; i < b->pal_sz[1]; i++) for (int i = 0; i < b->pal_sz[1]; i++)
pal[i] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc); pal[i] = msac_decode_bools(&ts->msac, f->cur.p.bpc);
} }
if (DEBUG_BLOCK_INFO) { if (DEBUG_BLOCK_INFO) {
printf("Post-pal[pl=2]: r=%d ", ts->msac.rng); printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
...@@ -634,7 +634,7 @@ static void read_vartx_tree(Dav1dTileContext *const t, ...@@ -634,7 +634,7 @@ static void read_vartx_tree(Dav1dTileContext *const t,
} else { } else {
assert(f->frame_hdr.txfm_mode == TX_LARGEST); assert(f->frame_hdr.txfm_mode == TX_LARGEST);
} }
b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout]; b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
} else { } else {
assert(imin(bw4, bh4) <= 16 || b->max_ytx == TX_64X64); assert(imin(bw4, bh4) <= 16 || b->max_ytx == TX_64X64);
int y, x, y_off, x_off; int y, x, y_off, x_off;
...@@ -652,7 +652,7 @@ static void read_vartx_tree(Dav1dTileContext *const t, ...@@ -652,7 +652,7 @@ static void read_vartx_tree(Dav1dTileContext *const t,
if (DEBUG_BLOCK_INFO) if (DEBUG_BLOCK_INFO)
printf("Post-vartxtree[%x/%x]: r=%d\n", printf("Post-vartxtree[%x/%x]: r=%d\n",
b->tx_split[0], b->tx_split[1], t->ts->msac.rng); b->tx_split[0], b->tx_split[1], t->ts->msac.rng);
b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout]; b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
} }
} }
...@@ -694,8 +694,8 @@ static int decode_b(Dav1dTileContext *const t, ...@@ -694,8 +694,8 @@ static int decode_b(Dav1dTileContext *const t,
&f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem; &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
const uint8_t *const b_dim = dav1d_block_dimensions[bs]; const uint8_t *const b_dim = dav1d_block_dimensions[bs];
const int bx4 = t->bx & 31, by4 = t->by & 31; const int bx4 = t->bx & 31, by4 = t->by & 31;
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420; const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444; const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver; const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
const int bw4 = b_dim[0], bh4 = b_dim[1]; const int bw4 = b_dim[0], bh4 = b_dim[1];
const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by); const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
...@@ -1138,7 +1138,7 @@ static int decode_b(Dav1dTileContext *const t, ...@@ -1138,7 +1138,7 @@ static int decode_b(Dav1dTileContext *const t,
t_dim = &dav1d_txfm_dimensions[TX_4X4]; t_dim = &dav1d_txfm_dimensions[TX_4X4];
} else { } else {
b->tx = dav1d_max_txfm_size_for_bs[bs][0]; b->tx = dav1d_max_txfm_size_for_bs[bs][0];
b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout]; b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];
t_dim = &dav1d_txfm_dimensions[b->tx]; t_dim = &dav1d_txfm_dimensions[b->tx];
if (f->frame_hdr.txfm_mode == TX_SWITCHABLE && t_dim->max > TX_4X4) { if (f->frame_hdr.txfm_mode == TX_SWITCHABLE && t_dim->max > TX_4X4) {
const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4); const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);
...@@ -1166,7 +1166,7 @@ static int decode_b(Dav1dTileContext *const t, ...@@ -1166,7 +1166,7 @@ static int decode_b(Dav1dTileContext *const t,
&f->frame_hdr, (const uint8_t (*)[8][2]) &f->frame_hdr, (const uint8_t (*)[8][2])
&ts->lflvl[b->seg_id][0][0][0], &ts->lflvl[b->seg_id][0][0][0],
t->bx, t->by, f->w4, f->h4, bs, t->bx, t->by, f->w4, f->h4, bs,
b->tx, b->uvtx, f->cur.p.p.layout, b->tx, b->uvtx, f->cur.p.layout,
&t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4], &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL, has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL); has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
...@@ -1543,7 +1543,7 @@ static int decode_b(Dav1dTileContext *const t, ...@@ -1543,7 +1543,7 @@ static int decode_b(Dav1dTileContext *const t,
if (f->seq_hdr.jnt_comp) { if (f->seq_hdr.jnt_comp) {
const int jnt_ctx = const int jnt_ctx =
get_jnt_comp_ctx(f->seq_hdr.order_hint_n_bits, get_jnt_comp_ctx(f->seq_hdr.order_hint_n_bits,
f->cur.p.poc, f->refp[b->ref[0]].p.poc, f->cur.poc, f->refp[b->ref[0]].p.poc,
f->refp[b->ref[1]].p.poc, t->a, &t->l, f->refp[b->ref[1]].p.poc, t->a, &t->l,
by4, bx4); by4, bx4);
b->comp_type = COMP_INTER_WEIGHTED_AVG + b->comp_type = COMP_INTER_WEIGHTED_AVG +
...@@ -1833,7 +1833,7 @@ static int decode_b(Dav1dTileContext *const t, ...@@ -1833,7 +1833,7 @@ static int decode_b(Dav1dTileContext *const t,
dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride, dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride,
&f->frame_hdr, lf_lvls, t->bx, t->by, &f->frame_hdr, lf_lvls, t->bx, t->by,
f->w4, f->h4, b->skip, bs, b->tx_split, f->w4, f->h4, b->skip, bs, b->tx_split,
b->uvtx, f->cur.p.p.layout, b->uvtx, f->cur.p.layout,
&t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4], &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL, has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL); has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
...@@ -1938,7 +1938,7 @@ static int decode_sb(Dav1dTileContext *const t, const enum BlockLevel bl, ...@@ -1938,7 +1938,7 @@ static int decode_sb(Dav1dTileContext *const t, const enum BlockLevel bl,
const unsigned n_part = bl == BL_8X8 ? N_SUB8X8_PARTITIONS : const unsigned n_part = bl == BL_8X8 ? N_SUB8X8_PARTITIONS :
bl == BL_128X128 ? N_PARTITIONS - 2 : N_PARTITIONS; bl == BL_128X128 ? N_PARTITIONS - 2 : N_PARTITIONS;
bp = msac_decode_symbol_adapt(&t->ts->msac, pc, n_part); bp = msac_decode_symbol_adapt(&t->ts->msac, pc, n_part);
if (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I422 && if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
(bp == PARTITION_V || bp == PARTITION_V4 || (bp == PARTITION_V || bp == PARTITION_V4 ||
bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT)) bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
{ {
...@@ -2139,7 +2139,7 @@ static int decode_sb(Dav1dTileContext *const t, const enum BlockLevel bl, ...@@ -2139,7 +2139,7 @@ static int decode_sb(Dav1dTileContext *const t, const enum BlockLevel bl,
} else { } else {
uint16_t cdf[2] = { gather_left_partition_prob(pc, bl), 0 }; uint16_t cdf[2] = { gather_left_partition_prob(pc, bl), 0 };
is_split = msac_decode_symbol(&t->ts->msac, cdf, 2); is_split = msac_decode_symbol(&t->ts->msac, cdf, 2);
if (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split) if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
return 1; return 1;
if (DEBUG_BLOCK_INFO) if (DEBUG_BLOCK_INFO)
printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n", printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
...@@ -2230,12 +2230,30 @@ static void setup_tile(Dav1dTileState *const ts, ...@@ -2230,12 +2230,30 @@ static void setup_tile(Dav1dTileState *const ts,
ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh); ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
// Reference Restoration Unit (used for exp coding) // Reference Restoration Unit (used for exp coding)
Av1Filter *const lf_mask = int sb_idx, unit_idx;
f->lf.mask + (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start; if (f->frame_hdr.super_res.enabled) {
const int unit_idx = ((ts->tiling.row_start & 16) >> 3) + // vertical components only
((ts->tiling.col_start & 16) >> 4); sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;
unit_idx = (ts->tiling.row_start & 16) >> 3;
} else {
sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
unit_idx = ((ts->tiling.row_start & 16) >> 3) +
((ts->tiling.col_start & 16) >> 4);
}
for (int p = 0; p < 3; p++) { for (int p = 0; p < 3; p++) {
ts->lr_ref[p] = &lf_mask->lr[p][unit_idx]; if (f->frame_hdr.super_res.enabled) {
const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int d = f->frame_hdr.super_res.width_scale_denominator;
const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!p];
const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;
const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;
const int px_x = x << (unit_size_log2 + ss_hor);
const int u_idx = unit_idx + ((px_x & 64) >> 1);
ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + (px_x >> 7)].lr[p][u_idx];
} else {
ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
}
ts->lr_ref[p]->filter_v[0] = 3; ts->lr_ref[p]->filter_v[0] = 3;
ts->lr_ref[p]->filter_v[1] = -7; ts->lr_ref[p]->filter_v[1] = -7;
ts->lr_ref[p]->filter_v[2] = 15; ts->lr_ref[p]->filter_v[2] = 15;
...@@ -2250,6 +2268,87 @@ static void setup_tile(Dav1dTileState *const ts, ...@@ -2250,6 +2268,87 @@ static void setup_tile(Dav1dTileState *const ts,
atomic_init(&ts->progress, row_sb_start); atomic_init(&ts->progress, row_sb_start);
} }
static void read_restoration_info(Dav1dTileContext *const t,
Av1RestorationUnit *const lr, const int p,
const enum RestorationType frame_type)
{
const Dav1dFrameContext *const f = t->f;
Dav1dTileState *const ts = t->ts;
if (frame_type == RESTORATION_SWITCHABLE) {
const int filter =
msac_decode_symbol_adapt(&ts->msac,
ts->cdf.m.restore_switchable, 3);
lr->type = filter ? filter == 2 ? RESTORATION_SGRPROJ :
RESTORATION_WIENER :
RESTORATION_NONE;
} else {
const unsigned type =
msac_decode_bool_adapt(&ts->msac,
frame_type == RESTORATION_WIENER ?
ts->cdf.m.restore_wiener :
ts->cdf.m.restore_sgrproj);
lr->type = type ? frame_type : RESTORATION_NONE;
}
if (lr->type == RESTORATION_WIENER) {
lr->filter_v[0] =
!p ? msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->filter_v[0] + 5, 16,
1) - 5:
0;
lr->filter_v[1] =
msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->filter_v[1] + 23, 32,
2) - 23;
lr->filter_v[2] =
msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->filter_v[2] + 17, 64,
3) - 17;
lr->filter_h[0] =
!p ? msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->filter_h[0] + 5, 16,
1) - 5:
0;
lr->filter_h[1] =
msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->filter_h[1] + 23, 32,
2) - 23;
lr->filter_h[2] =
msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->filter_h[2] + 17, 64,
3) - 17;
memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
ts->lr_ref[p] = lr;
if (DEBUG_BLOCK_INFO)
printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
p, lr->filter_v[0], lr->filter_v[1],
lr->filter_v[2], lr->filter_h[0],
lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
} else if (lr->type == RESTORATION_SGRPROJ) {
const unsigned idx = msac_decode_bools(&ts->msac, 4);
lr->sgr_idx = idx;
lr->sgr_weights[0] = dav1d_sgr_params[idx][0] ?
msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->sgr_weights[0] + 96, 128,
4) - 96 :
0;
lr->sgr_weights[1] = dav1d_sgr_params[idx][1] ?
msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->sgr_weights[1] + 32, 128,
4) - 32 :
iclip(128 - lr->sgr_weights[0], -32, 95);
memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
ts->lr_ref[p] = lr;
if (DEBUG_BLOCK_INFO)
printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
p, lr->sgr_idx, lr->sgr_weights[0],
lr->sgr_weights[1], ts->msac.rng);
}
}
int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) { int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) {
const Dav1dFrameContext *const f = t->f; const Dav1dFrameContext *const f = t->f;
const enum BlockLevel root_bl = f->seq_hdr.sb128 ? BL_128X128 : BL_64X64; const enum BlockLevel root_bl = f->seq_hdr.sb128 ? BL_128X128 : BL_64X64;
...@@ -2275,9 +2374,6 @@ int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) { ...@@ -2275,9 +2374,6 @@ int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) {
return 0; return 0;
} }
const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
if (c->n_fc > 1 && f->frame_hdr.use_ref_frame_mvs) { if (c->n_fc > 1 && f->frame_hdr.use_ref_frame_mvs) {
for (int n = 0; n < 7; n++) for (int n = 0; n < 7; n++)
if (dav1d_thread_picture_wait(&f->refp[n], 4 * (t->by + sb_step), if (dav1d_thread_picture_wait(&f->refp[n], 4 * (t->by + sb_step),
...@@ -2311,99 +2407,52 @@ int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) { ...@@ -2311,99 +2407,52 @@ int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) {
for (int p = 0; p < 3; p++) { for (int p = 0; p < 3; p++) {
if (f->frame_hdr.restoration.type[p] == RESTORATION_NONE) if (f->frame_hdr.restoration.type[p] == RESTORATION_NONE)
continue; continue;
const int by = t->by >> (ss_ver & !!p);
const int bx = t->bx >> (ss_hor & !!p); const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
const int bh = f->bh >> (ss_ver & !!p); const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
const int bw = f->bw >> (ss_hor & !!p); const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!p];
const int y = t->by * 4 >> ss_ver;
const int unit_size_log2 = const int h = (f->cur.p.h + ss_ver) >> ss_ver;
f->frame_hdr.restoration.unit_size[!!p];
// 4pel unit size const int unit_size = 1 << unit_size_log2;
const int b_unit_size = 1 << (unit_size_log2 - 2); const unsigned mask = unit_size - 1;
const unsigned mask = b_unit_size - 1; if (y & mask) continue;
if (by & mask || bx & mask) continue; const int half_unit = unit_size >> 1;
const int half_unit = b_unit_size >> 1;
// Round half up at frame boundaries, if there's more than one // Round half up at frame boundaries, if there's more than one
// restoration unit // restoration unit
const int bottom_round = by && by + half_unit > bh; if (y && y + half_unit > h) continue;
const int right_round = bx && bx + half_unit > bw;
if (bottom_round || right_round) continue;
const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
Av1RestorationUnit *const lr = &t->lf_mask->lr[p][unit_idx];
const enum RestorationType frame_type =
f->frame_hdr.restoration.type[p];
if (frame_type == RESTORATION_SWITCHABLE) {
const int filter =
msac_decode_symbol_adapt(&ts->msac,
ts->cdf.m.restore_switchable, 3);
lr->type = filter ? filter == 2 ? RESTORATION_SGRPROJ :
RESTORATION_WIENER :
RESTORATION_NONE;
} else {
const unsigned type =
msac_decode_bool_adapt(&ts->msac,
frame_type == RESTORATION_WIENER ?
ts->cdf.m.restore_wiener :
ts->cdf.m.restore_sgrproj);
lr->type = type ? frame_type : RESTORATION_NONE;
}
if (lr->type == RESTORATION_WIENER) { const enum RestorationType frame_type = f->frame_hdr.restoration.type[p];
lr->filter_v[0] =
!p ? msac_decode_subexp(&ts->msac, if (f->frame_hdr.super_res.enabled) {
ts->lr_ref[p]->filter_v[0] + 5, 16, const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
1) - 5: const int n_units = imax(1, (w + half_unit) >> unit_size_log2);
0;
lr->filter_v[1] = const int d = f->frame_hdr.super_res.width_scale_denominator;
msac_decode_subexp(&ts->msac, const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;
ts->lr_ref[p]->filter_v[1] + 23, 32, const int x0 = ((4 * t->bx * d >> ss_hor) + rnd) >> shift;
2) - 23; const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;
lr->filter_v[2] =
msac_decode_subexp(&ts->msac, for (int x = x0; x < imin(x1, n_units); x++) {
ts->lr_ref[p]->filter_v[2] + 17, 64, const int px_x = x << (unit_size_log2 + ss_hor);
3) - 17; const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);
const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);
lr->filter_h[0] = Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
!p ? msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->filter_h[0] + 5, 16, read_restoration_info(t, lr, p, frame_type);
1) - 5: }
0; } else {
lr->filter_h[1] = const int x = 4 * t->bx >> ss_hor;
msac_decode_subexp(&ts->msac, if (x & mask) continue;
ts->lr_ref[p]->filter_h[1] + 23, 32, const int w = (f->cur.p.w + ss_hor) >> ss_hor;
2) - 23; // Round half up at frame boundaries, if there's more than one
lr->filter_h[2] = // restoration unit
msac_decode_subexp(&ts->msac, if (x && x + half_unit > w) continue;
ts->lr_ref[p]->filter_h[2] + 17, 64, const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);
3) - 17; const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights)); Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];
ts->lr_ref[p] = lr;
if (DEBUG_BLOCK_INFO) read_restoration_info(t, lr, p, frame_type);
printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
p, lr->filter_v[0], lr->filter_v[1],
lr->filter_v[2], lr->filter_h[0],
lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
} else if (lr->type == RESTORATION_SGRPROJ) {
const unsigned idx = msac_decode_bools(&ts->msac, 4);
lr->sgr_idx = idx;
lr->sgr_weights[0] = dav1d_sgr_params[idx][0] ?
msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->sgr_weights[0] + 96, 128,
4) - 96 :
0;
lr->sgr_weights[1] = dav1d_sgr_params[idx][1] ?
msac_decode_subexp(&ts->msac,
ts->lr_ref[p]->sgr_weights[1] + 32, 128,
4) - 32 :
iclip(128 - lr->sgr_weights[0], -32, 95);
memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
ts->lr_ref[p] = lr;
if (DEBUG_BLOCK_INFO)
printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
p, lr->sgr_idx, lr->sgr_weights[0],
lr->sgr_weights[1], ts->msac.rng);
} }
} }
if (decode_sb(t, root_bl, c->intra_edge.root[root_bl])) if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))
...@@ -2423,8 +2472,8 @@ int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) { ...@@ -2423,8 +2472,8 @@ int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) {
int align_h = (f->bh + 31) & ~31; int align_h = (f->bh + 31) & ~31;
memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by], memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
&t->l.tx_lpf_y[t->by & 16], sb_step); &t->l.tx_lpf_y[t->by & 16], sb_step);
const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
align_h >>= ss_ver; align_h >>= ss_ver;
memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)], memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],
&t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver); &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);
...@@ -2512,21 +2561,12 @@ int dav1d_decode_frame(Dav1dFrameContext *const f) { ...@@ -2512,21 +2561,12 @@ int dav1d_decode_frame(Dav1dFrameContext *const f) {
// update allocation of block contexts for above // update allocation of block contexts for above
if (f->sb128w > f->lf.line_sz) { if (f->sb128w > f->lf.line_sz) {
dav1d_freep_aligned(&f->lf.cdef_line); dav1d_freep_aligned(&f->lf.cdef_line);
dav1d_freep_aligned(&f->lf.lr_lpf_line);
// note that we allocate all pixel arrays as if we were dealing with