internal.h 8.91 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
/*
 * Copyright © 2018, VideoLAN and dav1d authors
 * Copyright © 2018, Two Orioles, LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef __DAV1D_SRC_INTERNAL_H__
#define __DAV1D_SRC_INTERNAL_H__

#include <stdatomic.h>

#include "dav1d/data.h"

typedef struct Dav1dFrameContext Dav1dFrameContext;
typedef struct Dav1dTileState Dav1dTileState;
typedef struct Dav1dTileContext Dav1dTileContext;

#include "common/attributes.h"

#include "src/cdef.h"
#include "src/cdf.h"
43
#include "src/data.h"
44 45 46 47 48 49 50 51 52 53 54 55 56
#include "src/env.h"
#include "src/intra_edge.h"
#include "src/ipred.h"
#include "src/itx.h"
#include "src/levels.h"
#include "src/lf_mask.h"
#include "src/loopfilter.h"
#include "src/looprestoration.h"
#include "src/mc.h"
#include "src/msac.h"
#include "src/picture.h"
#include "src/recon.h"
#include "src/ref_mvs.h"
57
#include "src/thread.h"
58 59 60 61 62 63 64 65 66 67 68 69

typedef struct Dav1dDSPContext {
    Dav1dIntraPredDSPContext ipred;
    Dav1dMCDSPContext mc;
    Dav1dInvTxfmDSPContext itx;
    Dav1dLoopFilterDSPContext lf;
    Dav1dCdefDSPContext cdef;
    Dav1dLoopRestorationDSPContext lr;
} Dav1dDSPContext;

struct Dav1dContext {
    Dav1dFrameContext *fc;
70
    unsigned n_fc;
71 72 73 74 75 76 77

    // cache of OBUs that make up a single frame before we submit them
    // to a frame worker to be decoded
    struct {
        Dav1dData data;
        int start, end;
    } tile[256];
78
    int n_tile_data;
79
    int n_tiles;
80
    Dav1dRef *seq_hdr_ref;
81
    Dav1dSequenceHeader *seq_hdr;
82
    Dav1dRef *frame_hdr_ref;
83
    Dav1dFrameHeader *frame_hdr;
84 85

    // decoded output picture queue
86
    Dav1dData in;
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
    Dav1dPicture out;
    struct {
        Dav1dThreadPicture *out_delayed;
        unsigned next;
    } frame_thread;

    // reference/entropy state
    struct {
        Dav1dThreadPicture p;
        Dav1dRef *segmap;
        Dav1dRef *refmvs;
        unsigned refpoc[7];
    } refs[8];
    CdfThreadContext cdf[8];

    Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */];

    // tree to keep track of which edges are available
    struct {
        EdgeNode *root[2 /* BL_128X128 vs. BL_64X64 */];
        EdgeBranch branch_sb128[1 + 4 + 16 + 64];
        EdgeBranch branch_sb64[1 + 4 + 16];
        EdgeTip tip_sb128[256];
        EdgeTip tip_sb64[64];
    } intra_edge;
112 113

    Dav1dPicAllocator allocator;
114
    int apply_grain;
115 116
    int operating_point;
    unsigned operating_point_idc;
117
    int all_layers;
118 119 120
};

struct Dav1dFrameContext {
121
    Dav1dRef *seq_hdr_ref;
122
    Dav1dSequenceHeader *seq_hdr;
123
    Dav1dRef *frame_hdr_ref;
124
    Dav1dFrameHeader *frame_hdr;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
125 126 127
    Dav1dThreadPicture refp[7];
    Dav1dPicture cur; // during block coding / reconstruction
    Dav1dThreadPicture sr_cur; // after super-resolution upscaling
128 129 130 131 132 133 134
    Dav1dRef *mvs_ref;
    refmvs *mvs, *ref_mvs[7];
    Dav1dRef *ref_mvs_ref[7];
    Dav1dRef *cur_segmap_ref, *prev_segmap_ref;
    uint8_t *cur_segmap;
    const uint8_t *prev_segmap;
    unsigned refpoc[7], refrefpoc[7][7];
135
    uint8_t gmv_warp_allowed[7];
136 137 138 139 140 141 142
    CdfThreadContext in_cdf, out_cdf;
    struct {
        Dav1dData data;
        int start, end;
    } tile[256];
    int n_tile_data;

143 144 145 146 147
    // for scalable references
    struct ScalableMotionParams {
        int scale; // if no scaling, this is 0
        int step;
    } svc[7][2 /* x, y */];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
148
    int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */];
149

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
    const Dav1dContext *c;
    Dav1dTileContext *tc;
    int n_tc;
    Dav1dTileState *ts;
    int n_ts;
    const Dav1dDSPContext *dsp;
    struct {
        recon_b_intra_fn recon_b_intra;
        recon_b_inter_fn recon_b_inter;
        filter_sbrow_fn filter_sbrow;
        backup_ipred_edge_fn backup_ipred_edge;
        read_coef_blocks_fn read_coef_blocks;
    } bd_fn;

    int ipred_edge_sz;
    pixel *ipred_edge[3];
    ptrdiff_t b4_stride;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
167
    int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w;
168
    uint16_t dq[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
    const uint8_t *qm[2 /* is_1d */][N_RECT_TX_SIZES][3 /* plane */];
    BlockContext *a;
    int a_sz /* w*tile_rows */;
    AV1_COMMON *libaom_cm; // FIXME
    uint8_t jnt_weights[7][7];

    struct {
        struct thread_data td;
        int pass, die;
        // indexed using t->by * f->b4_stride + t->bx
        Av1Block *b;
        struct CodedBlockInfo {
            int16_t eob[3 /* plane */];
            uint8_t txtp[3 /* plane */];
        } *cbi;
        int8_t *txtp;
        // indexed using (t->by >> 1) * (f->b4_stride >> 1) + (t->bx >> 1)
        uint16_t (*pal)[3 /* plane */][8 /* idx */];
        // iterated over inside tile state
        uint8_t *pal_idx;
        coef *cf;
        // start offsets per tile
        int *tile_start_off;
    } frame_thread;

    // loopfilter
    struct {
        uint8_t (*level)[4];
        Av1Filter *mask;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
198
        Av1Restoration *lr_mask;
199
        int top_pre_cdef_toggle;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
200
        int mask_sz /* w*h */, lr_mask_sz, line_sz /* w */, lr_line_sz, re_sz /* h */;
201 202 203 204 205 206 207 208 209 210 211
        Av1FilterLUT lim_lut;
        int last_sharpness;
        uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
        uint8_t *tx_lpf_right_edge[2];
        pixel *cdef_line;
        pixel *cdef_line_ptr[2 /* pre, post */][3 /* plane */][2 /* y */];
        pixel *lr_lpf_line;
        pixel *lr_lpf_line_ptr[3 /* plane */];

        // in-loop filter per-frame state keeping
        int tile_row; // for carry-over at tile row edges
Ronald S. Bultje's avatar
Ronald S. Bultje committed
212
        pixel *p[3], *sr_p[3];
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
        Av1Filter *mask_ptr, *prev_mask_ptr;
    } lf;

    // threading (refer to tc[] for per-thread things)
    struct FrameTileThreadData {
        uint64_t available;
        pthread_mutex_t lock;
        pthread_cond_t cond, icond;
        int tasks_left, num_tasks;
        int (*task_idx_to_sby_and_tile_idx)[2];
        int titsati_sz, titsati_init[2];
    } tile_thread;
};

struct Dav1dTileState {
    struct {
        int col_start, col_end, row_start, row_end; // in 4px units
        int col, row; // in tile units
    } tiling;

    CdfContext cdf;
    MsacContext msac;

236
    atomic_int progress; // in sby units, TILE_ERROR after a decoding error
237 238 239 240 241 242 243 244 245
    struct {
        pthread_mutex_t lock;
        pthread_cond_t cond;
    } tile_thread;
    struct {
        uint8_t *pal_idx;
        coef *cf;
    } frame_thread;

246
    uint16_t dqmem[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
    const uint16_t (*dq)[3][2];
    int last_qidx;

    int8_t last_delta_lf[4];
    uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
    const uint8_t (*lflvl)[4][8][2];

    Av1RestorationUnit *lr_ref[3];
};

struct Dav1dTileContext {
    const Dav1dFrameContext *f;
    Dav1dTileState *ts;
    int bx, by;
    BlockContext l, *a;
    coef *cf;
263
    pixel *emu_edge; // stride=192 for non-SVC, or 320 for SVC
264 265 266 267 268 269
    // FIXME types can be changed to pixel (and dynamically allocated)
    // which would make copy/assign operations slightly faster?
    uint16_t al_pal[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */];
    uint16_t pal[3 /* plane */][8 /* palette_idx */];
    uint8_t pal_sz_uv[2 /* a/l */][32 /* bx4/by4 */];
    uint8_t txtp_map[32 * 32]; // inter-only
270
    Dav1dWarpedMotionParams warpmv;
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
    union {
        void *mem;
        uint8_t *pal_idx;
        int16_t *ac;
        pixel *interintra, *lap;
        coef *compinter;
    } scratch;
    ALIGN(uint8_t scratch_seg_mask[128 * 128], 32);

    Av1Filter *lf_mask;
    int8_t *cur_sb_cdef_idx_ptr;
    // for chroma sub8x8, we need to know the filter for all 4 subblocks in
    // a 4x4 area, but the top/left one can go out of cache already, so this
    // keeps it accessible
    enum Filter2d tl_4x4_filter;

    struct {
        struct thread_data td;
        struct FrameTileThreadData *fttd;
        int die;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
291
        atomic_int flush;
292 293 294 295
    } tile_thread;
};

#endif /* __DAV1D_SRC_INTERNAL_H__ */