lf_mask.c 18.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
/*
 * Copyright © 2018, VideoLAN and dav1d authors
 * Copyright © 2018, Two Orioles, LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"

#include <assert.h>
#include <string.h>

#include "common/intops.h"

35
#include "src/ctx.h"
36 37 38 39
#include "src/levels.h"
#include "src/lf_mask.h"
#include "src/tables.h"

40
static void decomp_tx(uint8_t (*const txa)[2 /* txsz, step */][32 /* y */][32 /* x */],
41 42 43 44 45
                      const enum RectTxfmSize from,
                      const int depth,
                      const int y_off, const int x_off,
                      const uint16_t *const tx_masks)
{
46
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[from];
47 48
    const int is_split =
        depth > 1 ? 0 : (tx_masks[depth] >> (y_off * 4 + x_off)) & 1;
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67

    if (is_split) {
        const enum RectTxfmSize sub = t_dim->sub;
        const int htw4 = t_dim->w >> 1, hth4 = t_dim->h >> 1;

        decomp_tx(txa, sub, depth + 1, y_off * 2 + 0, x_off * 2 + 0, tx_masks);
        if (t_dim->w >= t_dim->h)
            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][0][htw4],
                      sub, depth + 1, y_off * 2 + 0, x_off * 2 + 1, tx_masks);
        if (t_dim->h >= t_dim->w) {
            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][0],
                      sub, depth + 1, y_off * 2 + 1, x_off * 2 + 0, tx_masks);
            if (t_dim->w >= t_dim->h)
                decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][htw4],
                          sub, depth + 1, y_off * 2 + 1, x_off * 2 + 1, tx_masks);
        }
    } else {
        const int lw = imin(2, t_dim->lw), lh = imin(2, t_dim->lh);

68 69 70 71 72
#define set_ctx(type, dir, diridx, off, mul, rep_macro) \
        for (int y = 0; y < t_dim->h; y++) { \
            rep_macro(type, txa[0][0][y], off, mul * lw); \
            rep_macro(type, txa[1][0][y], off, mul * lh); \
            txa[0][1][y][0] = t_dim->w; \
73
        }
74 75 76 77 78 79
        case_set_upto16(t_dim->w,,, 0);
#undef set_ctx
#define set_ctx(type, dir, diridx, off, mul, rep_macro) \
        rep_macro(type, txa[1][1][0], off, mul * t_dim->h)
        case_set_upto16(t_dim->w,,, 0);
#undef set_ctx
80 81 82
    }
}

83
static inline void mask_edges_inter(uint16_t (*const masks)[32][3][2],
84 85 86 87 88 89
                                    const int by4, const int bx4,
                                    const int w4, const int h4, const int skip,
                                    const enum RectTxfmSize max_tx,
                                    const uint16_t *const tx_masks,
                                    uint8_t *const a, uint8_t *const l)
{
90
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[max_tx];
91 92 93
    int y, x;

    uint8_t txa[2 /* edge */][2 /* txsz, step */][32 /* y */][32 /* x */];
94 95
    for (int y_off = 0, y = 0; y < h4; y += t_dim->h, y_off++)
        for (int x_off = 0, x = 0; x < w4; x += t_dim->w, x_off++)
96 97 98 99
            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][y][x],
                      max_tx, 0, y_off, x_off, tx_masks);

    // left block edge
100
    unsigned mask = 1U << by4;
101 102 103 104 105
    for (y = 0; y < h4; y++, mask <<= 1) {
        const int sidx = mask >= 0x10000;
        const unsigned smask = mask >> (sidx << 4);
        masks[0][bx4][imin(txa[0][0][y][0], l[y])][sidx] |= smask;
    }
106 107

    // top block edge
108 109 110 111 112
    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
        const int sidx = mask >= 0x10000;
        const unsigned smask = mask >> (sidx << 4);
        masks[1][by4][imin(txa[1][0][0][x], a[x])][sidx] |= smask;
    }
113 114 115

    if (!skip) {
        // inner (tx) left|right edges
116
        for (y = 0, mask = 1U << by4; y < h4; y++, mask <<= 1) {
117 118
            const int sidx = mask >= 0x10000U;
            const unsigned smask = mask >> (sidx << 4);
119 120
            int ltx = txa[0][0][y][0];
            int step = txa[0][1][y][0];
121 122
            for (x = step; x < w4; x += step) {
                const int rtx = txa[0][0][y][x];
123
                masks[0][bx4 + x][imin(rtx, ltx)][sidx] |= smask;
124 125
                ltx = rtx;
                step = txa[0][1][y][x];
126 127 128 129 130 131 132
            }
        }

        //            top
        // inner (tx) --- edges
        //           bottom
        for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
133 134
            const int sidx = mask >= 0x10000U;
            const unsigned smask = mask >> (sidx << 4);
135 136 137 138
            int ttx = txa[1][0][0][x];
            int step = txa[1][1][0][x];
            for (y = step; y < h4; y += step) {
                const int btx = txa[1][0][y][x];
139
                masks[1][by4 + y][imin(ttx, btx)][sidx] |= smask;
140 141 142 143 144 145 146 147 148 149 150
                ttx = btx;
                step = txa[1][1][y][x];
            }
        }
    }

    for (y = 0; y < h4; y++)
        l[y] = txa[0][0][y][w4 - 1];
    memcpy(a, txa[1][0][h4 - 1], w4);
}

151
static inline void mask_edges_intra(uint16_t (*const masks)[32][3][2],
152 153 154 155 156
                                    const int by4, const int bx4,
                                    const int w4, const int h4,
                                    const enum RectTxfmSize tx,
                                    uint8_t *const a, uint8_t *const l)
{
157
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
158 159 160 161 162
    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
    const int twl4c = imin(2, twl4), thl4c = imin(2, thl4);
    int y, x;

    // left block edge
163
    unsigned mask = 1U << by4;
164 165 166 167 168
    for (y = 0; y < h4; y++, mask <<= 1) {
        const int sidx = mask >= 0x10000;
        const unsigned smask = mask >> (sidx << 4);
        masks[0][bx4][imin(twl4c, l[y])][sidx] |= smask;
    }
169 170

    // top block edge
171 172 173 174 175
    for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
        const int sidx = mask >= 0x10000;
        const unsigned smask = mask >> (sidx << 4);
        masks[1][by4][imin(thl4c, a[x])][sidx] |= smask;
    }
176 177

    // inner (tx) left|right edges
178 179
    const int hstep = t_dim->w;
    unsigned t = 1U << by4;
180
    unsigned inner = (unsigned) ((((uint64_t) t) << h4) - t);
181 182 183 184 185
    unsigned inner1 = inner & 0xffff, inner2 = inner >> 16;
    for (x = hstep; x < w4; x += hstep) {
        if (inner1) masks[0][bx4 + x][twl4c][0] |= inner1;
        if (inner2) masks[0][bx4 + x][twl4c][1] |= inner2;
    }
186 187 188 189 190

    //            top
    // inner (tx) --- edges
    //           bottom
    const int vstep = t_dim->h;
191
    t = 1U << bx4;
192
    inner = (unsigned) ((((uint64_t) t) << w4) - t);
193 194 195 196 197 198
    inner1 = inner & 0xffff;
    inner2 = inner >> 16;
    for (y = vstep; y < h4; y += vstep) {
        if (inner1) masks[1][by4 + y][thl4c][0] |= inner1;
        if (inner2) masks[1][by4 + y][thl4c][1] |= inner2;
    }
199

200 201 202 203 204 205 206 207 208 209 210 211 212 213
#define set_ctx(type, dir, diridx, off, mul, rep_macro) \
    rep_macro(type, a, off, mul * thl4c)
#define default_memset(dir, diridx, off, var) \
    memset(a, thl4c, var)
    case_set_upto32_with_default(w4,,, 0);
#undef default_memset
#undef set_ctx
#define set_ctx(type, dir, diridx, off, mul, rep_macro) \
    rep_macro(type, l, off, mul * twl4c)
#define default_memset(dir, diridx, off, var) \
    memset(l, twl4c, var)
    case_set_upto32_with_default(h4,,, 0);
#undef default_memset
#undef set_ctx
214 215
}

216
static inline void mask_edges_chroma(uint16_t (*const masks)[32][2][2],
217 218 219 220
                                     const int cby4, const int cbx4,
                                     const int cw4, const int ch4,
                                     const int skip_inter,
                                     const enum RectTxfmSize tx,
221 222
                                     uint8_t *const a, uint8_t *const l,
                                     const int ss_hor, const int ss_ver)
223
{
224
    const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
225 226 227
    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
    const int twl4c = !!twl4, thl4c = !!thl4;
    int y, x;
228 229 230
    const int vbits = 4 - ss_ver, hbits = 4 - ss_hor;
    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
    const unsigned vmax = 1 << vmask, hmax = 1 << hmask;
231 232

    // left block edge
233
    unsigned mask = 1U << cby4;
234 235 236 237 238
    for (y = 0; y < ch4; y++, mask <<= 1) {
        const int sidx = mask >= vmax;
        const unsigned smask = mask >> (sidx << vbits);
        masks[0][cbx4][imin(twl4c, l[y])][sidx] |= smask;
    }
239 240

    // top block edge
241 242 243 244 245
    for (x = 0, mask = 1U << cbx4; x < cw4; x++, mask <<= 1) {
        const int sidx = mask >= hmax;
        const unsigned smask = mask >> (sidx << hbits);
        masks[1][cby4][imin(thl4c, a[x])][sidx] |= smask;
    }
246 247 248

    if (!skip_inter) {
        // inner (tx) left|right edges
249
        const int hstep = t_dim->w;
250
        unsigned t = 1U << cby4;
251
        unsigned inner = (unsigned) ((((uint64_t) t) << ch4) - t);
252 253 254 255 256
        unsigned inner1 = inner & ((1 << vmask) - 1), inner2 = inner >> vmask;
        for (x = hstep; x < cw4; x += hstep) {
            if (inner1) masks[0][cbx4 + x][twl4c][0] |= inner1;
            if (inner2) masks[0][cbx4 + x][twl4c][1] |= inner2;
        }
257 258 259 260 261

        //            top
        // inner (tx) --- edges
        //           bottom
        const int vstep = t_dim->h;
262
        t = 1U << cbx4;
263
        inner = (unsigned) ((((uint64_t) t) << cw4) - t);
264 265 266 267 268
        inner1 = inner & ((1 << hmask) - 1), inner2 = inner >> hmask;
        for (y = vstep; y < ch4; y += vstep) {
            if (inner1) masks[1][cby4 + y][thl4c][0] |= inner1;
            if (inner2) masks[1][cby4 + y][thl4c][1] |= inner2;
        }
269 270
    }

271 272 273 274 275 276 277 278 279 280 281 282 283 284
#define set_ctx(type, dir, diridx, off, mul, rep_macro) \
    rep_macro(type, a, off, mul * thl4c)
#define default_memset(dir, diridx, off, var) \
    memset(a, thl4c, var)
    case_set_upto32_with_default(cw4,,, 0);
#undef default_memset
#undef set_ctx
#define set_ctx(type, dir, diridx, off, mul, rep_macro) \
    rep_macro(type, l, off, mul * twl4c)
#define default_memset(dir, diridx, off, var) \
    memset(l, twl4c, var)
    case_set_upto32_with_default(ch4,,, 0);
#undef default_memset
#undef set_ctx
285 286 287
}

void dav1d_create_lf_mask_intra(Av1Filter *const lflvl,
288
                                uint8_t (*const level_cache)[4],
289
                                const ptrdiff_t b4_stride,
290
                                const Dav1dFrameHeader *const hdr,
291 292 293 294 295 296 297 298 299 300 301 302 303
                                const uint8_t (*filter_level)[8][2],
                                const int bx, const int by,
                                const int iw, const int ih,
                                const enum BlockSize bs,
                                const enum RectTxfmSize ytx,
                                const enum RectTxfmSize uvtx,
                                const enum Dav1dPixelLayout layout,
                                uint8_t *const ay, uint8_t *const ly,
                                uint8_t *const auv, uint8_t *const luv)
{
    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1])
        return;

304
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
305 306 307 308 309
    const int bw4 = imin(iw - bx, b_dim[0]);
    const int bh4 = imin(ih - by, b_dim[1]);
    const int bx4 = bx & 31;
    const int by4 = by & 31;

310 311 312 313 314 315 316 317
    if (bw4 && bh4) {
        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
        for (int y = 0; y < bh4; y++) {
            for (int x = 0; x < bw4; x++) {
                level_cache_ptr[x][0] = filter_level[0][0][0];
                level_cache_ptr[x][1] = filter_level[1][0][0];
            }
            level_cache_ptr += b4_stride;
318 319
        }

320 321
        mask_edges_intra(lflvl->filter_y, by4, bx4, bw4, bh4, ytx, ay, ly);
    }
322 323 324 325 326

    if (!auv) return;

    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
327 328 329 330
    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
                          (b_dim[0] + ss_hor) >> ss_hor);
    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
                          (b_dim[1] + ss_ver) >> ss_ver);
331 332 333

    if (!cbw4 || !cbh4) return;

334 335 336
    const int cbx4 = bx4 >> ss_hor;
    const int cby4 = by4 >> ss_ver;

337 338
    uint8_t (*level_cache_ptr)[4] =
        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
339 340 341 342 343 344 345 346
    for (int y = 0; y < cbh4; y++) {
        for (int x = 0; x < cbw4; x++) {
            level_cache_ptr[x][2] = filter_level[2][0][0];
            level_cache_ptr[x][3] = filter_level[3][0][0];
        }
        level_cache_ptr += b4_stride;
    }

347 348
    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, 0, uvtx,
                      auv, luv, ss_hor, ss_ver);
349 350 351
}

void dav1d_create_lf_mask_inter(Av1Filter *const lflvl,
352
                                uint8_t (*const level_cache)[4],
353
                                const ptrdiff_t b4_stride,
354
                                const Dav1dFrameHeader *const hdr,
355 356 357 358 359 360 361 362 363 364 365 366 367
                                const uint8_t (*filter_level)[8][2],
                                const int bx, const int by,
                                const int iw, const int ih,
                                const int skip, const enum BlockSize bs,
                                const uint16_t *const tx_masks,
                                const enum RectTxfmSize uvtx,
                                const enum Dav1dPixelLayout layout,
                                uint8_t *const ay, uint8_t *const ly,
                                uint8_t *const auv, uint8_t *const luv)
{
    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1])
        return;

368
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
369 370 371 372 373
    const int bw4 = imin(iw - bx, b_dim[0]);
    const int bh4 = imin(ih - by, b_dim[1]);
    const int bx4 = bx & 31;
    const int by4 = by & 31;

374 375 376 377 378 379 380 381
    if (bw4 && bh4) {
        uint8_t (*level_cache_ptr)[4] = level_cache + by * b4_stride + bx;
        for (int y = 0; y < bh4; y++) {
            for (int x = 0; x < bw4; x++) {
                level_cache_ptr[x][0] = filter_level[0][0][0];
                level_cache_ptr[x][1] = filter_level[1][0][0];
            }
            level_cache_ptr += b4_stride;
382 383
        }

384 385 386
        mask_edges_inter(lflvl->filter_y, by4, bx4, bw4, bh4, skip,
                         dav1d_max_txfm_size_for_bs[bs][0], tx_masks, ay, ly);
    }
387 388 389 390 391

    if (!auv) return;

    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
392 393 394 395
    const int cbw4 = imin(((iw + ss_hor) >> ss_hor) - (bx >> ss_hor),
                          (b_dim[0] + ss_hor) >> ss_hor);
    const int cbh4 = imin(((ih + ss_ver) >> ss_ver) - (by >> ss_ver),
                          (b_dim[1] + ss_ver) >> ss_ver);
396 397 398

    if (!cbw4 || !cbh4) return;

399 400 401
    const int cbx4 = bx4 >> ss_hor;
    const int cby4 = by4 >> ss_ver;

402 403
    uint8_t (*level_cache_ptr)[4] =
        level_cache + (by >> ss_ver) * b4_stride + (bx >> ss_hor);
404 405 406 407 408 409 410 411
    for (int y = 0; y < cbh4; y++) {
        for (int x = 0; x < cbw4; x++) {
            level_cache_ptr[x][2] = filter_level[2][0][0];
            level_cache_ptr[x][3] = filter_level[3][0][0];
        }
        level_cache_ptr += b4_stride;
    }

412 413
    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, skip, uvtx,
                      auv, luv, ss_hor, ss_ver);
414 415 416 417
}

void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) {
    // set E/I/H values from loopfilter level
418
    const int sharp = filter_sharpness;
419
    for (int level = 0; level < 64; level++) {
420 421 422 423 424 425 426 427 428 429 430
        int limit = level;

        if (sharp > 0) {
            limit >>= (sharp + 3) >> 2;
            limit = imin(limit, 9 - sharp);
        }
        limit = imax(limit, 1);

        lim_lut->i[level] = limit;
        lim_lut->e[level] = 2 * (level + 2) + limit;
    }
431 432
    lim_lut->sharp[0] = (sharp + 3) >> 2;
    lim_lut->sharp[1] = sharp ? 9 - sharp : 0xff;
433 434
}

435 436 437
static inline void calc_lf_value(uint8_t (*const lflvl_values)[2],
                                 const int is_chroma, const int base_lvl,
                                 const int lf_delta, const int seg_delta,
438
                                 const Dav1dLoopfilterModeRefDeltas *const mr_delta)
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
{
    const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);

    if (!base_lvl && is_chroma) {
        memset(lflvl_values, 0, 8 * 2);
    } else if (!mr_delta) {
        memset(lflvl_values, base, 8 * 2);
    } else {
        const int sh = base >= 32;
        lflvl_values[0][0] = lflvl_values[0][1] =
            iclip(base + (mr_delta->ref_delta[0] * (1 << sh)), 0, 63);
        for (int r = 1; r < 8; r++) {
            for (int m = 0; m < 2; m++) {
                const int delta =
                    mr_delta->mode_delta[m] + mr_delta->ref_delta[r];
                lflvl_values[r][m] = iclip(base + (delta * (1 << sh)), 0, 63);
            }
        }
    }
}

void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
461
                          const Dav1dFrameHeader *const hdr,
462 463 464 465 466 467 468 469 470
                          const int8_t lf_delta[4])
{
    const int n_seg = hdr->segmentation.enabled ? 8 : 1;

    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
        memset(lflvl_values, 0, 8 * 4 * 2 * n_seg);
        return;
    }

471
    const Dav1dLoopfilterModeRefDeltas *const mr_deltas =
472 473 474
        hdr->loopfilter.mode_ref_delta_enabled ?
        &hdr->loopfilter.mode_ref_deltas : NULL;
    for (int s = 0; s < n_seg; s++) {
475
        const Dav1dSegmentationData *const segd =
476 477
            hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL;

478 479 480 481 482 483 484 485 486 487 488
        calc_lf_value(lflvl_values[s][0], 0, hdr->loopfilter.level_y[0],
                      lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas);
        calc_lf_value(lflvl_values[s][1], 0, hdr->loopfilter.level_y[1],
                      lf_delta[hdr->delta.lf.multi ? 1 : 0],
                      segd ? segd->delta_lf_y_h : 0, mr_deltas);
        calc_lf_value(lflvl_values[s][2], 1, hdr->loopfilter.level_u,
                      lf_delta[hdr->delta.lf.multi ? 2 : 0],
                      segd ? segd->delta_lf_u : 0, mr_deltas);
        calc_lf_value(lflvl_values[s][3], 1, hdr->loopfilter.level_v,
                      lf_delta[hdr->delta.lf.multi ? 3 : 0],
                      segd ? segd->delta_lf_v : 0, mr_deltas);
489 490
    }
}