lf_apply_tmpl.c 13.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
/*
 * Copyright © 2018, VideoLAN and dav1d authors
 * Copyright © 2018, Two Orioles, LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"

#include <assert.h>
#include <string.h>

#include "common/intops.h"

#include "src/lf_apply.h"

static inline void filter_plane_cols_y(const Dav1dFrameContext *const f,
                                       const int have_left,
                                       const uint8_t (*lvl)[4],
                                       const ptrdiff_t b4_stride,
41
                                       const uint16_t (*const mask)[3][2],
42
                                       pixel *dst, const ptrdiff_t ls,
43
                                       const int w,
44 45 46 47 48
                                       const int starty4, const int endy4)
{
    const Dav1dDSPContext *const dsp = f->dsp;

    // filter edges between columns (e.g. block1 | block2)
49 50
    for (int x = 0; x < w; x++) {
        if (!have_left && !x) continue;
51 52 53 54 55 56
        uint32_t hmask[4];
        if (!starty4) {
            hmask[0] = mask[x][0][0];
            hmask[1] = mask[x][1][0];
            hmask[2] = mask[x][2][0];
            if (endy4 > 16) {
57 58 59
                hmask[0] |= (unsigned) mask[x][0][1] << 16;
                hmask[1] |= (unsigned) mask[x][1][1] << 16;
                hmask[2] |= (unsigned) mask[x][2][1] << 16;
60 61 62 63 64 65 66 67
            }
        } else {
            hmask[0] = mask[x][0][1];
            hmask[1] = mask[x][1][1];
            hmask[2] = mask[x][2][1];
        }
        hmask[3] = 0;
        dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
68
                                     (const uint8_t(*)[4]) &lvl[x][0], b4_stride,
69
                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
70 71 72 73 74 75 76
    }
}

static inline void filter_plane_rows_y(const Dav1dFrameContext *const f,
                                       const int have_top,
                                       const uint8_t (*lvl)[4],
                                       const ptrdiff_t b4_stride,
77
                                       const uint16_t (*const mask)[3][2],
78
                                       pixel *dst, const ptrdiff_t ls,
79
                                       const int w,
80 81 82 83 84 85 86 87 88 89
                                       const int starty4, const int endy4)
{
    const Dav1dDSPContext *const dsp = f->dsp;

    //                                 block1
    // filter edges between rows (e.g. ------)
    //                                 block2
    for (int y = starty4; y < endy4;
         y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
    {
90
        if (!have_top && !y) continue;
91
        const uint32_t vmask[4] = {
92 93 94
            mask[y][0][0] | ((unsigned) mask[y][0][1] << 16),
            mask[y][1][0] | ((unsigned) mask[y][1][1] << 16),
            mask[y][2][0] | ((unsigned) mask[y][2][1] << 16),
95 96 97
            0,
        };
        dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
98
                                     (const uint8_t(*)[4]) &lvl[0][1], b4_stride,
99
                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
100 101 102 103 104 105 106
    }
}

static inline void filter_plane_cols_uv(const Dav1dFrameContext *const f,
                                        const int have_left,
                                        const uint8_t (*lvl)[4],
                                        const ptrdiff_t b4_stride,
107
                                        const uint16_t (*const mask)[2][2],
108
                                        pixel *const u, pixel *const v,
109
                                        const ptrdiff_t ls, const int w,
110 111
                                        const int starty4, const int endy4,
                                        const int ss_ver)
112 113 114 115
{
    const Dav1dDSPContext *const dsp = f->dsp;

    // filter edges between columns (e.g. block1 | block2)
116 117
    for (int x = 0; x < w; x++) {
        if (!have_left && !x) continue;
118 119 120 121 122
        uint32_t hmask[3];
        if (!starty4) {
            hmask[0] = mask[x][0][0];
            hmask[1] = mask[x][1][0];
            if (endy4 > (16 >> ss_ver)) {
123 124
                hmask[0] |= (unsigned) mask[x][0][1] << (16 >> ss_ver);
                hmask[1] |= (unsigned) mask[x][1][1] << (16 >> ss_ver);
125 126 127 128 129 130 131
            }
        } else {
            hmask[0] = mask[x][0][1];
            hmask[1] = mask[x][1][1];
        }
        hmask[2] = 0;
        dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
132
                                     (const uint8_t(*)[4]) &lvl[x][2], b4_stride,
133
                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
134
        dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
135
                                     (const uint8_t(*)[4]) &lvl[x][3], b4_stride,
136
                                     &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
137 138 139 140 141 142 143
    }
}

static inline void filter_plane_rows_uv(const Dav1dFrameContext *const f,
                                        const int have_top,
                                        const uint8_t (*lvl)[4],
                                        const ptrdiff_t b4_stride,
144
                                        const uint16_t (*const mask)[2][2],
145
                                        pixel *const u, pixel *const v,
146
                                        const ptrdiff_t ls, const int w,
147 148
                                        const int starty4, const int endy4,
                                        const int ss_hor)
149 150
{
    const Dav1dDSPContext *const dsp = f->dsp;
151
    ptrdiff_t off_l = 0;
152 153 154 155

    //                                 block1
    // filter edges between rows (e.g. ------)
    //                                 block2
156
    for (int y = starty4; y < endy4;
157
         y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride)
158
    {
159
        if (!have_top && !y) continue;
160
        const uint32_t vmask[3] = {
161 162
            mask[y][0][0] | ((unsigned) mask[y][0][1] << (16 >> ss_hor)),
            mask[y][1][0] | ((unsigned) mask[y][1][1] << (16 >> ss_hor)),
163 164 165
            0,
        };
        dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
166
                                     (const uint8_t(*)[4]) &lvl[0][2], b4_stride,
167
                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
168
        dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
169
                                     (const uint8_t(*)[4]) &lvl[0][3], b4_stride,
170
                                     &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
171 172 173 174 175 176 177 178
    }
}

void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
                                    pixel *const p[3], Av1Filter *const lflvl,
                                    int sby, const int start_of_tile_row)
{
    int x, have_left;
179
    // Don't filter outside the frame
180
    const int have_top = sby > 0;
181
    const int is_sb64 = !f->seq_hdr->sb128;
182 183 184 185
    const int starty4 = (sby & is_sb64) << 4;
    const int sbsz = 32 >> is_sb64;
    const int sbl2 = 5 - is_sb64;
    const int halign = (f->bh + 31) & ~31;
186 187
    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
188
    const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
189
    const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
190
    const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
191
    const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
192 193 194 195

    // fix lpf strength at tile col boundaries
    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
196
    for (int tile_col = 1;; tile_col++) {
197
        x = f->frame_hdr->tiling.col_start_sb[tile_col];
198
        if ((x << sbl2) >= f->bw) break;
199
        const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
200
        x >>= is_sb64;
201

202
        uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
203
        for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
204
            const int sidx = mask >= 0x10000U;
205 206 207 208 209 210 211
            const unsigned smask = mask >> (sidx << 4);
            const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
                                !!(y_hmask[1][sidx] & smask);
            y_hmask[2][sidx] &= ~smask;
            y_hmask[1][sidx] &= ~smask;
            y_hmask[0][sidx] &= ~smask;
            y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
212
        }
213

214
        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
215
            uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
216 217 218
            for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
                 y++, uv_mask <<= 1)
            {
219 220 221 222 223 224
                const int sidx = uv_mask >= vmax;
                const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
                const int idx = !!(uv_hmask[1][sidx] & smask);
                uv_hmask[1][sidx] &= ~smask;
                uv_hmask[0][sidx] &= ~smask;
                uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
225
            }
226 227 228 229 230 231 232 233 234 235 236
        }
        lpf_y  += halign;
        lpf_uv += halign >> ss_ver;
    }

    // fix lpf strength at tile row boundaries
    if (start_of_tile_row) {
        const BlockContext *a;
        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
             x < f->sb128w; x++, a++)
        {
237
            uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
238
            const unsigned w = imin(32, f->w4 - (x << 5));
239
            for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
240
                const int sidx = mask >= 0x10000U;
241 242 243 244 245 246 247
                const unsigned smask = mask >> (sidx << 4);
                const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
                                    !!(y_vmask[1][sidx] & smask);
                y_vmask[2][sidx] &= ~smask;
                y_vmask[1][sidx] &= ~smask;
                y_vmask[0][sidx] &= ~smask;
                y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
248 249
            }

250
            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
251
                const unsigned cw = (w + ss_hor) >> ss_hor;
252
                uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
253
                for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
254 255 256 257 258 259
                    const int sidx = uv_mask >= hmax;
                    const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
                    const int idx = !!(uv_vmask[1][sidx] & smask);
                    uv_vmask[1][sidx] &= ~smask;
                    uv_vmask[0][sidx] &= ~smask;
                    uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
260
                }
261 262 263 264 265 266 267 268 269 270
            }
        }
    }

    pixel *ptr;
    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
         x++, have_left = 1, ptr += 128, level_ptr += 32)
    {
        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
271
                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],
272
                            imin(32, f->w4 - x * 32), starty4, endy4);
273 274 275 276 277
    }

    level_ptr = f->lf.level + f->b4_stride * sby * sbsz;
    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
278
                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],
279
                            imin(32, f->w4 - x * 32), starty4, endy4);
280 281
    }

282
    if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
283 284 285
        return;

    ptrdiff_t uv_off;
286
    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
287
    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
288
         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
289 290 291
    {
        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
                             lflvl[x].filter_uv[0],
292
                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
293
                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
294
                             starty4 >> ss_ver, uv_endy4, ss_ver);
295 296
    }

297
    level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
298
    for (uv_off = 0, x = 0; x < f->sb128w;
299
         x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
300 301 302
    {
        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
                             lflvl[x].filter_uv[1],
303
                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
304
                             (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
305
                             starty4 >> ss_ver, uv_endy4, ss_hor);
306 307
    }
}