cdef_apply_tmpl.c 10.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/*
 * Copyright © 2018, VideoLAN and dav1d authors
 * Copyright © 2018, Two Orioles, LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"

#include <string.h>

#include "common/intops.h"

#include "src/cdef_apply.h"

static void backup2lines(pixel *const dst[3][2],
                         /*const*/ pixel *const src[3],
                         const ptrdiff_t src_stride[2], int y_off, int w,
                         const enum Dav1dPixelLayout layout)
{
    pixel_copy(dst[0][0], src[0] + (y_off - 2) * PXSTRIDE(src_stride[0]), w);
    pixel_copy(dst[0][1], src[0] + (y_off - 1) * PXSTRIDE(src_stride[0]), w);

    if (layout == DAV1D_PIXEL_LAYOUT_I400) return;
    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;

    w >>= ss_hor;
    y_off >>= ss_ver;
    pixel_copy(dst[1][0], src[1] + (y_off - 2) * PXSTRIDE(src_stride[1]), w);
    pixel_copy(dst[1][1], src[1] + (y_off - 1) * PXSTRIDE(src_stride[1]), w);
    pixel_copy(dst[2][0], src[2] + (y_off - 2) * PXSTRIDE(src_stride[1]), w);
    pixel_copy(dst[2][1], src[2] + (y_off - 1) * PXSTRIDE(src_stride[1]), w);
}

static void backup2x8(pixel dst[3][8][2],
                      /*const*/ pixel *const src[3],
                      const ptrdiff_t src_stride[2], int x_off,
                      const enum Dav1dPixelLayout layout)
{
    for (int y = 0, y_off = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0]))
        pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2);

    if (layout == DAV1D_PIXEL_LAYOUT_I400) return;
    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;

    x_off >>= ss_hor;
    for (int y = 0, y_off = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) {
        pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2);
        pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2);
    }
}

static void restore2x8(pixel *const dst[3],
                       const ptrdiff_t dst_stride[2],
                       const pixel src[3][8][2], const enum Dav1dPixelLayout layout)
{
    for (int y = 0, y_off = 0; y < 8; y++, y_off += PXSTRIDE(dst_stride[0]))
        pixel_copy(&dst[0][y_off - 2], src[0][y], 2);

    if (layout == DAV1D_PIXEL_LAYOUT_I400) return;
    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;

    for (int y = 0, y_off = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(dst_stride[1])) {
        pixel_copy(&dst[1][y_off - 2], src[1][y], 2);
        pixel_copy(&dst[2][y_off - 2], src[2][y], 2);
    }
}

static int adjust_strength(const int strength, const unsigned var) {
    if (!var) return 0;
    const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0;
    return (strength * (4 + i) + 8) >> 4;
}

void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
                             pixel *const p[3],
                             const Av1Filter *const lflvl,
                             const int by_start, const int by_end)
{
    const Dav1dDSPContext *const dsp = f->dsp;
    enum CdefEdgeFlags edges = HAVE_BOTTOM | (by_start > 0 ? HAVE_TOP : 0);
    pixel *ptrs[3] = { p[0], p[1], p[2] };
    const int sbsz = 16;
    const int sb64w = f->sb128w << 1;
    const int damping = f->frame_hdr.cdef.damping + BITDEPTH - 8;
    const enum Dav1dPixelLayout layout = f->cur.p.p.layout;
    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
    const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;

    // FIXME a design improvement that could be made here is to keep a set of
    // flags for each block position on whether the block was filtered; if not,
    // the backup of pre-filter data is empty, and the restore is therefore
    // unnecessary as well.

    for (int by = by_start; by < by_end; by += 2, edges |= HAVE_TOP) {
        const int tf = f->lf.top_pre_cdef_toggle;
        if (by + 2 >= f->bh) edges &= ~HAVE_BOTTOM;

        if (edges & HAVE_BOTTOM) {
            // backup pre-filter data for next iteration
            backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.p.stride,
                         8, f->bw * 4, layout);
        }

        pixel lr_bak[2 /* idx */][3 /* plane */][8 /* y */][2 /* x */];
        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
        edges &= ~HAVE_LEFT;
        edges |= HAVE_RIGHT;
        for (int sbx = 0, last_skip = 1; sbx < sb64w; sbx++, edges |= HAVE_LEFT) {
            const int sb128x = sbx >>1;
            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
            if (cdef_idx == -1 ||
                (!f->frame_hdr.cdef.y_strength[cdef_idx] &&
                 !f->frame_hdr.cdef.uv_strength[cdef_idx]))
            {
                last_skip = 1;
                goto next_sb;
            }

            const int y_lvl = f->frame_hdr.cdef.y_strength[cdef_idx];
            const int uv_lvl = f->frame_hdr.cdef.uv_strength[cdef_idx];
            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
                 bx += 2, edges |= HAVE_LEFT)
            {
                if (bx + 2 >= f->bw) edges &= ~HAVE_RIGHT;

                // check if this 8x8 block had any coded coefficients; if not,
                // go to the next block
155
156
157
158
                const unsigned bx_mask = 3U << (bx & 14);
                const int by_idx = by & 30, bx_idx = (bx & 16) >> 4;
                if (!((lflvl[sb128x].noskip_mask[by_idx + 0][bx_idx] |
                       lflvl[sb128x].noskip_mask[by_idx + 1][bx_idx]) & bx_mask))
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
                {
                    last_skip = 1;
                    goto next_b;
                }

                if (!last_skip) {
                    // backup post-filter data (will be restored at the end)
                    backup2x8(lr_bak[1], bptrs, f->cur.p.stride, 0, layout);

                    // restore pre-filter data from last iteration
                    restore2x8(bptrs, f->cur.p.stride, lr_bak[0], layout);
                }
                if (edges & HAVE_RIGHT) {
                    // backup pre-filter data for next iteration
                    backup2x8(lr_bak[0], bptrs, f->cur.p.stride, 8, layout);
                }

                // the actual filter
                const int y_pri_lvl = (y_lvl >> 2) << (BITDEPTH - 8);
                int y_sec_lvl = y_lvl & 3;
                y_sec_lvl += y_sec_lvl == 3;
                y_sec_lvl <<= BITDEPTH - 8;
                const int uv_pri_lvl = (uv_lvl >> 2) << (BITDEPTH - 8);
                int uv_sec_lvl = uv_lvl & 3;
                uv_sec_lvl += uv_sec_lvl == 3;
                uv_sec_lvl <<= BITDEPTH - 8;
                unsigned variance;
                const int dir = dsp->cdef.dir(bptrs[0], f->cur.p.stride[0],
                                              &variance);
                if (y_lvl) {
                    dsp->cdef.fb[0](bptrs[0], f->cur.p.stride[0],
                                    (pixel *const [2]) {
                                        &f->lf.cdef_line_ptr[tf][0][0][bx * 4],
                                        &f->lf.cdef_line_ptr[tf][0][1][bx * 4],
                                    },
                                    adjust_strength(y_pri_lvl, variance),
                                    y_sec_lvl, y_pri_lvl ? dir : 0,
                                    damping, edges);
                }
                if (uv_lvl && has_chroma) {
                    const int uvdir =
                        f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
                        ((uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir];
                    for (int pl = 1; pl <= 2; pl++) {
                        dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.p.stride[1],
                                             (pixel *const [2]) {
                                                 &f->lf.cdef_line_ptr[tf][pl][0][bx * 4 >> ss_hor],
                                                 &f->lf.cdef_line_ptr[tf][pl][1][bx * 4 >> ss_hor],
                                             },
                                             uv_pri_lvl, uv_sec_lvl,
                                             uv_pri_lvl ? uvdir : 0,
                                             damping - 1, edges);
                    }
                }

                if (!last_skip) {
                    // restore post-filter data from the beginning of this loop
                    restore2x8(bptrs, f->cur.p.stride, lr_bak[1], layout);
                }
                last_skip = 0;

            next_b:
                bptrs[0] += 8;
                bptrs[1] += 8 >> ss_hor;
                bptrs[2] += 8 >> ss_hor;
            }

        next_sb:
            iptrs[0] += sbsz * 4;
            iptrs[1] += sbsz * 4 >> ss_hor;
            iptrs[2] += sbsz * 4 >> ss_hor;
        }

        ptrs[0] += 8 * PXSTRIDE(f->cur.p.stride[0]);
        ptrs[1] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
        ptrs[2] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
        f->lf.top_pre_cdef_toggle ^= 1;
    }
}