common.h 28.1 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * common.h: misc common functions
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Henrik Gramner's avatar
Henrik Gramner committed
4
 * Copyright (C) 2003-2017 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
8
9
10
11
12
13
14
15
16
17
18
19
20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
22
23
24
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
25
26
 *****************************************************************************/

27
28
#ifndef X264_COMMON_H
#define X264_COMMON_H
Laurent Aimar's avatar
Laurent Aimar committed
29

30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include "base.h"

/* Macros for templating function calls according to bit depth */
#define x264_template(w) x264_glue3(x264, BIT_DEPTH, w)

/****************************************************************************
 * API Templates
 ****************************************************************************/
#define x264_nal_encode x264_template(nal_encode)
#define x264_encoder_reconfig x264_template(encoder_reconfig)
#define x264_encoder_parameters x264_template(encoder_parameters)
#define x264_encoder_headers x264_template(encoder_headers)
#define x264_encoder_encode x264_template(encoder_encode)
#define x264_encoder_close x264_template(encoder_close)
#define x264_encoder_delayed_frames x264_template(encoder_delayed_frames)
#define x264_encoder_maximum_delayed_frames x264_template(encoder_maximum_delayed_frames)
#define x264_encoder_intra_refresh x264_template(encoder_intra_refresh)
#define x264_encoder_invalidate_reference x264_template(encoder_invalidate_reference)

/* This undef allows to rename the external symbol and force link failure in case
 * of incompatible libraries. Then the define enables templating as above. */
#undef x264_encoder_open
#define x264_encoder_open x264_template(encoder_open)

Loren Merritt's avatar
Loren Merritt committed
54
55
56
/****************************************************************************
 * Macros
 ****************************************************************************/
Henrik Gramner's avatar
Henrik Gramner committed
57
#define X264_PCM_COST (FRAME_SIZE(256*BIT_DEPTH)+16)
58
#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
Fiona Glaser's avatar
Fiona Glaser committed
59
60
#define QP_MAX_SPEC (51+QP_BD_OFFSET)
#define QP_MAX (QP_MAX_SPEC+18)
61
#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
62
// arbitrary, but low because SATD scores are 1/4 normal
63
#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
Fiona Glaser's avatar
Fiona Glaser committed
64
#define SPEC_QP(x) X264_MIN((x), QP_MAX_SPEC)
Loren Merritt's avatar
Loren Merritt committed
65

66
67
#define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
#define FILLER_OVERHEAD (NALU_OVERHEAD+1)
68
#define SEI_OVERHEAD (NALU_OVERHEAD - (h->param.b_annexb && !h->param.i_avcintra_class && (h->out.i_nal-1)))
69

70
71
72
73
74
75
76
77
78
79
#if HAVE_INTERLACED
#   define MB_INTERLACED h->mb.b_interlaced
#   define SLICE_MBAFF h->sh.b_mbaff
#   define PARAM_INTERLACED h->param.b_interlaced
#else
#   define MB_INTERLACED 0
#   define SLICE_MBAFF 0
#   define PARAM_INTERLACED 0
#endif

80
81
82
83
84
85
86
87
88
89
90
#ifdef CHROMA_FORMAT
#    define CHROMA_H_SHIFT (CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422)
#    define CHROMA_V_SHIFT (CHROMA_FORMAT == CHROMA_420)
#else
#    define CHROMA_FORMAT h->sps->i_chroma_format_idc
#    define CHROMA_H_SHIFT h->mb.chroma_h_shift
#    define CHROMA_V_SHIFT h->mb.chroma_v_shift
#endif

#define CHROMA_SIZE(s) ((s)>>(CHROMA_H_SHIFT+CHROMA_V_SHIFT))
#define FRAME_SIZE(s) ((s)+2*CHROMA_SIZE(s))
Henrik Gramner's avatar
Henrik Gramner committed
91
#define CHROMA444 (CHROMA_FORMAT == CHROMA_444)
Fiona Glaser's avatar
Fiona Glaser committed
92

93
#if HIGH_BIT_DEPTH
94
95
96
    typedef uint16_t pixel;
    typedef uint64_t pixel4;
    typedef int32_t  dctcoef;
97
    typedef uint32_t udctcoef;
98

99
100
101
102
103
104
#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
#   define MPIXEL_X4(src) M64(src)
#else
    typedef uint8_t  pixel;
    typedef uint32_t pixel4;
    typedef int16_t  dctcoef;
105
    typedef uint16_t udctcoef;
106
107
108
109
110
111

#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
#   define MPIXEL_X4(src) M32(src)
#endif

#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
112

113
114
115
/****************************************************************************
 * Includes
 ****************************************************************************/
116
117
118
#if HAVE_OPENCL
#include "opencl.h"
#endif
119
#include "cabac.h"
120
#include "bitstream.h"
Laurent Aimar's avatar
Laurent Aimar committed
121
122
123
124
125
126
#include "set.h"
#include "predict.h"
#include "pixel.h"
#include "mc.h"
#include "frame.h"
#include "dct.h"
127
#include "quant.h"
128
#include "threadpool.h"
Laurent Aimar's avatar
Laurent Aimar committed
129

130
/****************************************************************************
131
 * General functions
132
 ****************************************************************************/
133

134
/* log */
135
#define x264_log x264_template(log)
136
137
void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );

138
#define x264_cavlc_init x264_template(cavlc_init)
139
void x264_cavlc_init( x264_t *h );
140
#define x264_cabac_init x264_template(cabac_init)
Fiona Glaser's avatar
Fiona Glaser committed
141
void x264_cabac_init( x264_t *h );
Loren Merritt's avatar
Loren Merritt committed
142

143
static ALWAYS_INLINE pixel x264_clip_pixel( int x )
Loren Merritt's avatar
Loren Merritt committed
144
{
145
    return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
Loren Merritt's avatar
Loren Merritt committed
146
147
}

148
149
150
/****************************************************************************
 *
 ****************************************************************************/
Laurent Aimar's avatar
Laurent Aimar committed
151
152
153
154
155
156
157
typedef struct
{
    x264_sps_t *sps;
    x264_pps_t *pps;

    int i_type;
    int i_first_mb;
Loren Merritt's avatar
Loren Merritt committed
158
    int i_last_mb;
Laurent Aimar's avatar
Laurent Aimar committed
159
160
161
162
163

    int i_pps_id;

    int i_frame_num;

164
    int b_mbaff;
Laurent Aimar's avatar
Laurent Aimar committed
165
166
167
168
169
    int b_field_pic;
    int b_bottom_field;

    int i_idr_pic_id;   /* -1 if nal_type != 5 */

170
    int i_poc;
Laurent Aimar's avatar
Laurent Aimar committed
171
172
173
174
175
176
177
178
179
180
181
    int i_delta_poc_bottom;

    int i_delta_poc[2];
    int i_redundant_pic_cnt;

    int b_direct_spatial_mv_pred;

    int b_num_ref_idx_override;
    int i_num_ref_idx_l0_active;
    int i_num_ref_idx_l1_active;

182
    int b_ref_pic_list_reordering[2];
Lamont Alston's avatar
Lamont Alston committed
183
184
    struct
    {
185
186
        int idc;
        int arg;
187
    } ref_pic_list_order[2][X264_REF_MAX];
188

Dylan Yudaken's avatar
Dylan Yudaken committed
189
    /* P-frame weighting */
190
    int b_weighted_pred;
191
    x264_weight_t weight[X264_REF_MAX*2][3];
Dylan Yudaken's avatar
Dylan Yudaken committed
192

Lamont Alston's avatar
Lamont Alston committed
193
194
195
196
197
198
    int i_mmco_remove_from_end;
    int i_mmco_command_count;
    struct /* struct for future expansion */
    {
        int i_difference_of_pic_nums;
        int i_poc;
199
    } mmco[X264_REF_MAX];
Lamont Alston's avatar
Lamont Alston committed
200

Laurent Aimar's avatar
Laurent Aimar committed
201
202
    int i_cabac_init_idc;

203
    int i_qp;
Laurent Aimar's avatar
Laurent Aimar committed
204
205
206
207
208
209
210
211
212
213
214
    int i_qp_delta;
    int b_sp_for_swidth;
    int i_qs_delta;

    /* deblocking filter */
    int i_disable_deblocking_filter_idc;
    int i_alpha_c0_offset;
    int i_beta_offset;

} x264_slice_header_t;

Steven Walters's avatar
Steven Walters committed
215
216
typedef struct x264_lookahead_t
{
217
    volatile uint8_t              b_exit_thread;
Steven Walters's avatar
Steven Walters committed
218
219
    uint8_t                       b_thread_active;
    uint8_t                       b_analyse_keyframe;
Fiona Glaser's avatar
Fiona Glaser committed
220
    int                           i_last_keyframe;
Steven Walters's avatar
Steven Walters committed
221
222
    int                           i_slicetype_length;
    x264_frame_t                  *last_nonb;
223
224
225
226
    x264_pthread_t                thread_handle;
    x264_sync_frame_list_t        ifbuf;
    x264_sync_frame_list_t        next;
    x264_sync_frame_list_t        ofbuf;
Steven Walters's avatar
Steven Walters committed
227
228
} x264_lookahead_t;

Laurent Aimar's avatar
Laurent Aimar committed
229
230
typedef struct x264_ratecontrol_t   x264_ratecontrol_t;

231
232
233
234
235
236
237
238
239
typedef struct x264_left_table_t
{
    uint8_t intra[4];
    uint8_t nnz[4];
    uint8_t nnz_chroma[4];
    uint8_t mv[4];
    uint8_t ref[4];
} x264_left_table_t;

240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
/* Current frame stats */
typedef struct
{
    /* MV bits (MV+Ref+Block Type) */
    int i_mv_bits;
    /* Texture bits (DCT coefs) */
    int i_tex_bits;
    /* ? */
    int i_misc_bits;
    /* MB type counts */
    int i_mb_count[19];
    int i_mb_count_i;
    int i_mb_count_p;
    int i_mb_count_skip;
    int i_mb_count_8x8dct[2];
    int i_mb_count_ref[2][X264_REF_MAX*2];
    int i_mb_partition[17];
    int i_mb_cbp[6];
    int i_mb_pred_mode[4][13];
    int i_mb_field[3];
    /* Adaptive direct mv pred */
    int i_direct_score[2];
    /* Metrics */
    int64_t i_ssd[3];
    double f_ssim;
    int i_ssim_cnt;
} x264_frame_stat_t;

Laurent Aimar's avatar
Laurent Aimar committed
268
269
270
271
272
struct x264_t
{
    /* encoder parameters */
    x264_param_t    param;

Steven Walters's avatar
Steven Walters committed
273
    x264_t          *thread[X264_THREAD_MAX+1];
Fiona Glaser's avatar
Fiona Glaser committed
274
    x264_t          *lookahead_thread[X264_LOOKAHEAD_THREAD_MAX];
Loren Merritt's avatar
Loren Merritt committed
275
276
    int             b_thread_active;
    int             i_thread_phase; /* which thread to use for the next frame */
277
    int             i_thread_idx;   /* which thread this is */
278
279
    int             i_threadslice_start; /* first row in this thread slice */
    int             i_threadslice_end; /* row after the end of this thread slice */
280
    int             i_threadslice_pass; /* which pass of encoding we are on */
281
    x264_threadpool_t *threadpool;
Fiona Glaser's avatar
Fiona Glaser committed
282
    x264_threadpool_t *lookaheadpool;
283
284
    x264_pthread_mutex_t mutex;
    x264_pthread_cond_t cv;
Loren Merritt's avatar
Loren Merritt committed
285

Laurent Aimar's avatar
Laurent Aimar committed
286
287
288
289
    /* bitstream output */
    struct
    {
        int         i_nal;
Fiona Glaser's avatar
Fiona Glaser committed
290
291
        int         i_nals_allocated;
        x264_nal_t  *nal;
Laurent Aimar's avatar
Laurent Aimar committed
292
293
294
295
296
        int         i_bitstream;    /* size of p_bitstream */
        uint8_t     *p_bitstream;   /* will hold data for all nal */
        bs_t        bs;
    } out;

297
298
299
    uint8_t *nal_buffer;
    int      nal_buffer_size;

300
301
302
    x264_t          *reconfig_h;
    int             reconfig;

Loren Merritt's avatar
Loren Merritt committed
303
304
    /**** thread synchronization starts here ****/

Laurent Aimar's avatar
Laurent Aimar committed
305
306
    /* frame number/poc */
    int             i_frame;
307
    int             i_frame_num;
Laurent Aimar's avatar
Laurent Aimar committed
308

309
310
311
312
    int             i_thread_frames; /* Number of different frames being encoded by threads;
                                      * 1 when sliced-threads is on. */
    int             i_nal_type;
    int             i_nal_ref_idc;
Loren Merritt's avatar
Loren Merritt committed
313

314
    int64_t         i_disp_fields;  /* Number of displayed fields (both coded and implied via pic_struct) */
315
    int             i_disp_fields_last_frame;
316
317
318
    int64_t         i_prev_duration; /* Duration of previous frame */
    int64_t         i_coded_fields; /* Number of coded fields (both coded and implied via pic_struct) */
    int64_t         i_cpb_delay;    /* Equal to number of fields preceding this field
319
                                     * since last buffering_period SEI */
320
321
    int64_t         i_coded_fields_lookahead; /* Use separate counters for lookahead */
    int64_t         i_cpb_delay_lookahead;
322

323
    int64_t         i_cpb_delay_pir_offset;
Kieran Kunhya's avatar
Kieran Kunhya committed
324
    int64_t         i_cpb_delay_pir_offset_next;
Kieran Kunhya's avatar
Kieran Kunhya committed
325

326
    int             b_queued_intra_refresh;
327
    int64_t         i_last_idr_pts;
328

Laurent Aimar's avatar
Laurent Aimar committed
329
330
    int             i_idr_pic_id;

Loren Merritt's avatar
Loren Merritt committed
331
332
    /* quantization matrix for decoding, [cqm][qp%6][coef] */
    int             (*dequant4_mf[4])[16];   /* [4][6][16] */
Fiona Glaser's avatar
Fiona Glaser committed
333
    int             (*dequant8_mf[4])[64];   /* [4][6][64] */
Loren Merritt's avatar
Loren Merritt committed
334
    /* quantization matrix for trellis, [cqm][qp][coef] */
335
336
    int             (*unquant4_mf[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
    int             (*unquant8_mf[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
Loren Merritt's avatar
Loren Merritt committed
337
    /* quantization matrix for deadzone */
338
339
340
341
342
343
    udctcoef        (*quant4_mf[4])[16];     /* [4][QP_MAX_SPEC+1][16] */
    udctcoef        (*quant8_mf[4])[64];     /* [4][QP_MAX_SPEC+1][64] */
    udctcoef        (*quant4_bias[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
    udctcoef        (*quant8_bias[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
    udctcoef        (*quant4_bias0[4])[16];  /* [4][QP_MAX_SPEC+1][16] */
    udctcoef        (*quant8_bias0[4])[64];  /* [4][QP_MAX_SPEC+1][64] */
Fiona Glaser's avatar
Fiona Glaser committed
344
    udctcoef        (*nr_offset_emergency)[4][64];
Loren Merritt's avatar
Loren Merritt committed
345

346
    /* mv/ref/mode cost arrays. */
Fiona Glaser's avatar
Fiona Glaser committed
347
348
    uint16_t *cost_mv[QP_MAX+1];
    uint16_t *cost_mv_fpel[QP_MAX+1][4];
349
350
351
    struct
    {
        uint16_t ref[QP_MAX+1][3][33];
352
        uint16_t i4x4_mode[QP_MAX+1][17];
353
    } *cost_table;
354

355
356
    const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */

Laurent Aimar's avatar
Laurent Aimar committed
357
358
359
    /* Slice header */
    x264_slice_header_t sh;

360
361
362
363
    /* SPS / PPS */
    x264_sps_t      sps[1];
    x264_pps_t      pps[1];

Fiona Glaser's avatar
Fiona Glaser committed
364
365
366
367
    /* Slice header backup, for SEI_DEC_REF_PIC_MARKING */
    int b_sh_backup;
    x264_slice_header_t sh_backup;

Laurent Aimar's avatar
Laurent Aimar committed
368
369
370
371
372
    /* cabac context */
    x264_cabac_t    cabac;

    struct
    {
373
        /* Frames to be encoded (whose types have been decided) */
Steven Walters's avatar
Steven Walters committed
374
375
376
        x264_frame_t **current;
        /* Unused frames: 0 = fenc, 1 = fdec */
        x264_frame_t **unused[2];
Laurent Aimar's avatar
Laurent Aimar committed
377

Dylan Yudaken's avatar
Dylan Yudaken committed
378
379
380
        /* Unused blank frames (for duplicates) */
        x264_frame_t **blank_unused;

381
        /* frames used for reference + sentinels */
382
        x264_frame_t *reference[X264_REF_MAX+2];
Laurent Aimar's avatar
Laurent Aimar committed
383

Lamont Alston's avatar
Lamont Alston committed
384
385
386
387
388
        int i_last_keyframe;       /* Frame number of the last keyframe */
        int i_last_idr;            /* Frame number of the last IDR (not RP)*/
        int i_poc_last_open_gop;   /* Poc of the I frame of the last open-gop. The value
                                    * is only assigned during the period between that
                                    * I frame and the next P or I frame, else -1 */
389
390

        int i_input;    /* Number of input frames already accepted */
391
392
393
394

        int i_max_dpb;  /* Number of frames allocated in the decoded picture buffer */
        int i_max_ref0;
        int i_max_ref1;
395
        int i_delay;    /* Number of frames buffered for B reordering */
396
397
        int     i_bframe_delay;
        int64_t i_bframe_delay_time;
398
        int64_t i_first_pts;
Yusuke Nakamura's avatar
Yusuke Nakamura committed
399
        int64_t i_prev_reordered_pts[2];
400
401
        int64_t i_largest_pts;
        int64_t i_second_largest_pts;
402
        int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
Loren Merritt's avatar
Loren Merritt committed
403
        int b_have_sub8x8_esa;
Laurent Aimar's avatar
Laurent Aimar committed
404
405
406
407
408
409
410
411
412
    } frames;

    /* current frame being encoded */
    x264_frame_t    *fenc;

    /* frame being reconstructed */
    x264_frame_t    *fdec;

    /* references lists */
413
414
    int             i_ref[2];
    x264_frame_t    *fref[2][X264_REF_MAX+3];
415
    x264_frame_t    *fref_nearest[2];
416
    int             b_ref_reorder[2];
Laurent Aimar's avatar
Laurent Aimar committed
417

418
419
    /* hrd */
    int initial_cpb_removal_delay;
Kieran Kunhya's avatar
Kieran Kunhya committed
420
    int initial_cpb_removal_delay_offset;
421
    int64_t i_reordered_pts_delay;
Laurent Aimar's avatar
Laurent Aimar committed
422
423
424
425

    /* Current MB DCT coeffs */
    struct
    {
426
        ALIGNED_64( dctcoef luma16x16_dc[3][16] );
Henrik Gramner's avatar
Henrik Gramner committed
427
        ALIGNED_16( dctcoef chroma_dc[2][8] );
428
        // FIXME share memory?
429
        ALIGNED_64( dctcoef luma8x8[12][64] );
430
        ALIGNED_64( dctcoef luma4x4[16*3][16] );
Laurent Aimar's avatar
Laurent Aimar committed
431
432
433
434
435
    } dct;

    /* MB table and cache for current frame/mb */
    struct
    {
436
437
        int     i_mb_width;
        int     i_mb_height;
438
439
        int     i_mb_count;                 /* number of mbs in a frame */

Henrik Gramner's avatar
Henrik Gramner committed
440
441
442
443
        /* Chroma subsampling */
        int     chroma_h_shift;
        int     chroma_v_shift;

Laurent Aimar's avatar
Laurent Aimar committed
444
445
        /* Strides */
        int     i_mb_stride;
446
447
        int     i_b8_stride;
        int     i_b4_stride;
Simon Horlick's avatar
Simon Horlick committed
448
449
        int     left_b8[2];
        int     left_b4[2];
Laurent Aimar's avatar
Laurent Aimar committed
450
451
452
453
454

        /* Current index */
        int     i_mb_x;
        int     i_mb_y;
        int     i_mb_xy;
455
456
        int     i_b8_xy;
        int     i_b4_xy;
Loren Merritt's avatar
Loren Merritt committed
457

458
        /* Search parameters */
Loren Merritt's avatar
Loren Merritt committed
459
        int     i_me_method;
460
        int     i_subpel_refine;
Loren Merritt's avatar
Loren Merritt committed
461
        int     b_chroma_me;
Loren Merritt's avatar
Loren Merritt committed
462
        int     b_trellis;
463
        int     b_noise_reduction;
464
        int     b_dct_decimate;
465
466
        int     i_psy_rd; /* Psy RD strength--fixed point value*/
        int     i_psy_trellis; /* Psy trellis strength--fixed point value*/
467

468
        int     b_interlaced;
469
        int     b_adaptive_mbaff; /* MBAFF+subme 0 requires non-adaptive MBAFF i.e. all field mbs */
470

471
        /* Allowed qpel MV range to stay within the picture + emulated edge pixels */
472
473
        int     mv_min[2];
        int     mv_max[2];
Simon Horlick's avatar
Simon Horlick committed
474
475
        int     mv_miny_row[3]; /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
        int     mv_maxy_row[3];
476
477
478
479
        /* Subpel MV range for motion search.
         * same mv_min/max but includes levels' i_mv_range. */
        int     mv_min_spel[2];
        int     mv_max_spel[2];
Simon Horlick's avatar
Simon Horlick committed
480
481
        int     mv_miny_spel_row[3];
        int     mv_maxy_spel_row[3];
482
        /* Fullpel MV range for motion search */
483
        ALIGNED_8( int16_t mv_limit_fpel[2][2] ); /* min_x, min_y, max_x, max_y */
Simon Horlick's avatar
Simon Horlick committed
484
485
        int     mv_miny_fpel_row[3];
        int     mv_maxy_fpel_row[3];
486

Loren Merritt's avatar
Loren Merritt committed
487
        /* neighboring MBs */
Laurent Aimar's avatar
Laurent Aimar committed
488
        unsigned int i_neighbour;
489
490
        unsigned int i_neighbour8[4];       /* neighbours of each 8x8 or 4x4 block that are available */
        unsigned int i_neighbour4[16];      /* at the time the block is coded */
491
        unsigned int i_neighbour_intra;     /* for constrained intra pred */
492
        unsigned int i_neighbour_frame;     /* ignoring slice boundaries */
Loren Merritt's avatar
Loren Merritt committed
493
        int     i_mb_type_top;
494
        int     i_mb_type_left[2];
Loren Merritt's avatar
Loren Merritt committed
495
496
        int     i_mb_type_topleft;
        int     i_mb_type_topright;
497
        int     i_mb_prev_xy;
498
        int     i_mb_left_xy[2];
499
        int     i_mb_top_xy;
500
501
        int     i_mb_topleft_xy;
        int     i_mb_topright_xy;
Simon Horlick's avatar
Simon Horlick committed
502
503
504
        int     i_mb_top_y;
        int     i_mb_topleft_y;
        int     i_mb_topright_y;
Fiona Glaser's avatar
Fiona Glaser committed
505
        const x264_left_table_t *left_index_table;
506
        int     i_mb_top_mbpair_xy;
Simon Horlick's avatar
Simon Horlick committed
507
        int     topleft_partition;
508
        int     b_allow_skip;
509
        int     field_decoding_flag;
Laurent Aimar's avatar
Laurent Aimar committed
510

Loren Merritt's avatar
Loren Merritt committed
511
        /**** thread synchronization ends here ****/
Loren Merritt's avatar
Loren Merritt committed
512
        /* subsequent variables are either thread-local or constant,
Loren Merritt's avatar
Loren Merritt committed
513
514
         * and won't be copied from one thread to another */

Laurent Aimar's avatar
Laurent Aimar committed
515
        /* mb table */
Henrik Gramner's avatar
Henrik Gramner committed
516
        uint8_t *base;                      /* base pointer for all malloced data in this mb */
Laurent Aimar's avatar
Laurent Aimar committed
517
        int8_t  *type;                      /* mb type */
518
        uint8_t *partition;                 /* mb partition */
Laurent Aimar's avatar
Laurent Aimar committed
519
        int8_t  *qp;                        /* mb qp */
Anton Mitrofanov's avatar
Anton Mitrofanov committed
520
        int16_t *cbp;                       /* mb cbp: 0x0?: luma, 0x?0: chroma, 0x100: luma dc, 0x200 and 0x400: chroma dc, 0x1000 PCM (all set for PCM) */
521
522
        int8_t  (*intra4x4_pred_mode)[8];   /* intra4x4 pred mode. for non I4x4 set to I_PRED_4x4_DC(2) */
                                            /* actually has only 7 entries; set to 8 for write-combining optimizations */
Fiona Glaser's avatar
Fiona Glaser committed
523
        uint8_t (*non_zero_count)[16*3];    /* nzc. for I_PCM set to 16 */
Laurent Aimar's avatar
Laurent Aimar committed
524
525
        int8_t  *chroma_pred_mode;          /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */
        int16_t (*mv[2])[2];                /* mb mv. set to 0 for intra mb */
Henrik Gramner's avatar
Henrik Gramner committed
526
        uint8_t (*mvd[2])[8][2];            /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
527
        int8_t   *ref[2];                   /* mb ref. set to -1 if non used (intra or Lx only) */
528
        int16_t (*mvr[2][X264_REF_MAX*2])[2];/* 16x16 mv for each possible ref */
529
        int8_t  *skipbp;                    /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
530
        int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
531
        uint32_t *slice_table;              /* sh->first_mb of the slice that the indexed mb is part of */
532
        uint8_t *field;
533

Dylan Yudaken's avatar
Dylan Yudaken committed
534
         /* buffer for weighted versions of the reference frames */
535
        pixel *p_weight_buf[X264_REF_MAX];
Dylan Yudaken's avatar
Dylan Yudaken committed
536

Laurent Aimar's avatar
Laurent Aimar committed
537
538
539
        /* current value */
        int     i_type;
        int     i_partition;
540
        ALIGNED_4( uint8_t i_sub_partition[4] );
541
        int     b_transform_8x8;
Laurent Aimar's avatar
Laurent Aimar committed
542
543
544
545
546
547
548

        int     i_cbp_luma;
        int     i_cbp_chroma;

        int     i_intra16x16_pred_mode;
        int     i_chroma_pred_mode;

549
550
551
552
553
        /* skip flags for i4x4 and i8x8
         * 0 = encode as normal.
         * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction.
         * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */
        int i_skip_intra;
Fiona Glaser's avatar
Fiona Glaser committed
554
555
        /* skip flag for motion compensation */
        /* if we've already done MC, we don't need to do it again */
556
        int b_skip_mc;
Fiona Glaser's avatar
Fiona Glaser committed
557
558
        /* set to true if we are re-encoding a macroblock. */
        int b_reencode_mb;
Fiona Glaser's avatar
Fiona Glaser committed
559
        int ip_offset; /* Used by PIR to offset the quantizer of intra-refresh blocks. */
Fiona Glaser's avatar
Fiona Glaser committed
560
        int b_deblock_rdo;
561
        int b_overflow; /* If CAVLC had a level code overflow during bitstream writing. */
562

Laurent Aimar's avatar
Laurent Aimar committed
563
564
        struct
        {
565
566
567
            /* space for p_fenc and p_fdec */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
568
            ALIGNED_64( pixel fenc_buf[48*FENC_STRIDE] );
Henrik Gramner's avatar
Henrik Gramner committed
569
            ALIGNED_64( pixel fdec_buf[54*FDEC_STRIDE] );
570

Loren Merritt's avatar
Loren Merritt committed
571
            /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
572
573
            ALIGNED_32( pixel i4x4_fdec_buf[16*16] );
            ALIGNED_32( pixel i8x8_fdec_buf[16*16] );
574
575
            ALIGNED_64( dctcoef i8x8_dct_buf[3][64] );
            ALIGNED_64( dctcoef i4x4_dct_buf[15][16] );
576
577
            uint32_t i4x4_nnz_buf[4];
            uint32_t i8x8_nnz_buf[4];
578

579
            /* Psy trellis DCT data */
580
581
            ALIGNED_64( dctcoef fenc_dct8[4][64] );
            ALIGNED_64( dctcoef fenc_dct4[16][16] );
582

583
            /* Psy RD SATD/SA8D scores cache */
Henrik Gramner's avatar
Henrik Gramner committed
584
585
586
587
588
            ALIGNED_64( uint32_t fenc_satd_cache[32] );
            ALIGNED_16( uint64_t fenc_hadamard_cache[9] );

            int i4x4_cbp;
            int i8x8_cbp;
589

Laurent Aimar's avatar
Laurent Aimar committed
590
            /* pointer over mb of the frame to be compressed */
591
            pixel *p_fenc[3]; /* y,u,v */
592
            /* pointer to the actual source frame, not a block copy */
Fiona Glaser's avatar
Fiona Glaser committed
593
            pixel *p_fenc_plane[3];
Laurent Aimar's avatar
Laurent Aimar committed
594

Loren Merritt's avatar
Loren Merritt committed
595
            /* pointer over mb of the frame to be reconstructed  */
596
            pixel *p_fdec[3];
Laurent Aimar's avatar
Laurent Aimar committed
597
598

            /* pointer over mb of the references */
599
            int i_fref[2];
Fiona Glaser's avatar
Fiona Glaser committed
600
601
            /* [12]: yN, yH, yV, yHV, (NV12 ? uv : I444 ? (uN, uH, uV, uHV, vN, ...)) */
            pixel *p_fref[2][X264_REF_MAX*2][12];
602
603
            pixel *p_fref_w[X264_REF_MAX*2];  /* weighted fullpel luma */
            uint16_t *p_integral[2][X264_REF_MAX];
Laurent Aimar's avatar
Laurent Aimar committed
604

605
            /* fref stride */
Laurent Aimar's avatar
Laurent Aimar committed
606
607
608
609
610
611
            int     i_stride[3];
        } pic;

        /* cache */
        struct
        {
612
            /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
Henrik Gramner's avatar
Henrik Gramner committed
613
            ALIGNED_16( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );
Laurent Aimar's avatar
Laurent Aimar committed
614

Henrik Gramner's avatar
Henrik Gramner committed
615
616
            /* i_non_zero_count if available else 0x80. intentionally misaligned by 8 for asm */
            ALIGNED_8( uint8_t non_zero_count[X264_SCAN8_SIZE] );
Laurent Aimar's avatar
Laurent Aimar committed
617

Loren Merritt's avatar
Loren Merritt committed
618
            /* -1 if unused, -2 if unavailable */
Fiona Glaser's avatar
Fiona Glaser committed
619
            ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
Laurent Aimar's avatar
Laurent Aimar committed
620

Loren Merritt's avatar
Loren Merritt committed
621
            /* 0 if not available */
Fiona Glaser's avatar
Fiona Glaser committed
622
623
            ALIGNED_16( int16_t mv[2][X264_SCAN8_LUMA_SIZE][2] );
            ALIGNED_8( uint8_t mvd[2][X264_SCAN8_LUMA_SIZE][2] );
624
625

            /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
Fiona Glaser's avatar
Fiona Glaser committed
626
            ALIGNED_4( int8_t skip[X264_SCAN8_LUMA_SIZE] );
627

628
629
            ALIGNED_4( int16_t direct_mv[2][4][2] );
            ALIGNED_4( int8_t  direct_ref[2][4] );
630
            int     direct_partition;
631
            ALIGNED_4( int16_t pskip_mv[2] );
632

633
634
            /* number of neighbors (top and left) that used 8x8 dct */
            int     i_neighbour_transform_size;
Simon Horlick's avatar
Simon Horlick committed
635
            int     i_neighbour_skip;
636
637
638
639

            /* neighbor CBPs */
            int     i_cbp_top;
            int     i_cbp_left;
Simon Horlick's avatar
Simon Horlick committed
640
641
642
643

            /* extra data required for mbaff in mv prediction */
            int16_t topright_mv[2][3][2];
            int8_t  topright_ref[2][3];
644
645
646

            /* current mb deblock strength */
            uint8_t (*deblock_strength)[8][4];
Laurent Aimar's avatar
Laurent Aimar committed
647
648
649
        } cache;

        /* */
650
        int     i_qp;       /* current qp */
651
        int     i_chroma_qp;
Laurent Aimar's avatar
Laurent Aimar committed
652
653
        int     i_last_qp;  /* last qp */
        int     i_last_dqp; /* last delta qp */
654
        int     b_variable_qp; /* whether qp is allowed to vary per macroblock */
Loren Merritt's avatar
Loren Merritt committed
655
        int     b_lossless;
Loren Merritt's avatar
Loren Merritt committed
656
657
        int     b_direct_auto_read; /* take stats for --direct auto from the 2pass log */
        int     b_direct_auto_write; /* analyse direct modes, to use and/or save */
Laurent Aimar's avatar
Laurent Aimar committed
658

Fiona Glaser's avatar
Fiona Glaser committed
659
660
661
662
663
        /* lambda values */
        int     i_trellis_lambda2[2][2]; /* [luma,chroma][inter,intra] */
        int     i_psy_rd_lambda;
        int     i_chroma_lambda2_offset;

664
        /* B_direct and weighted prediction */
Simon Horlick's avatar
Simon Horlick committed
665
        int16_t dist_scale_factor_buf[2][2][X264_REF_MAX*2][4];
666
        int16_t (*dist_scale_factor)[4];
Simon Horlick's avatar
Simon Horlick committed
667
        int8_t bipred_weight_buf[2][2][X264_REF_MAX*2][4];
668
        int8_t (*bipred_weight)[4];
669
        /* maps fref1[0]'s ref indices into the current list0 */
670
#define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
671
        int8_t  map_col_to_list0[X264_REF_MAX+2];
Fiona Glaser's avatar
Fiona Glaser committed
672
        int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
673
        int8_t deblock_ref_table[X264_REF_MAX*2+2];
Fiona Glaser's avatar
Fiona Glaser committed
674
#define deblock_ref_table(x) h->mb.deblock_ref_table[(x)+2]
Laurent Aimar's avatar
Laurent Aimar committed
675
676
677
678
679
680
681
682
    } mb;

    /* rate control encoding only */
    x264_ratecontrol_t *rc;

    /* stats */
    struct
    {
Loren Merritt's avatar
Loren Merritt committed
683
        /* Cumulated stats */
684

Laurent Aimar's avatar
Laurent Aimar committed
685
        /* per slice info */
Fiona Glaser's avatar
Fiona Glaser committed
686
687
688
        int     i_frame_count[3];
        int64_t i_frame_size[3];
        double  f_frame_qp[3];
689
        int     i_consecutive_bframes[X264_BFRAME_MAX+1];
690
        /* */
Fiona Glaser's avatar
Fiona Glaser committed
691
692
693
694
695
696
697
        double  f_ssd_global[3];
        double  f_psnr_average[3];
        double  f_psnr_mean_y[3];
        double  f_psnr_mean_u[3];
        double  f_psnr_mean_v[3];
        double  f_ssim_mean_y[3];
        double  f_frame_duration[3];
698
        /* */
Fiona Glaser's avatar
Fiona Glaser committed
699
        int64_t i_mb_count[3][19];
700
        int64_t i_mb_partition[2][17];
701
        int64_t i_mb_count_8x8dct[2];
702
        int64_t i_mb_count_ref[2][2][X264_REF_MAX*2];
703
        int64_t i_mb_cbp[6];
704
        int64_t i_mb_pred_mode[4][13];
Fiona Glaser's avatar
Fiona Glaser committed
705
        int64_t i_mb_field[3];
Loren Merritt's avatar
Loren Merritt committed
706
707
708
        /* */
        int     i_direct_score[2];
        int     i_direct_frames[2];
Dylan Yudaken's avatar
Dylan Yudaken committed
709
        /* num p-frames weighted */
Fiona Glaser's avatar
Fiona Glaser committed
710
        int     i_wpred[2];
711

712
713
        /* Current frame stats */
        x264_frame_stat_t frame;
Laurent Aimar's avatar
Laurent Aimar committed
714
715
    } stat;

Fiona Glaser's avatar
Fiona Glaser committed
716
    /* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4, 3 = chroma 8x8 */
Fiona Glaser's avatar
Fiona Glaser committed
717
718
719
720
    udctcoef (*nr_offset)[64];
    uint32_t (*nr_residual_sum)[64];
    uint32_t *nr_count;

721
722
    ALIGNED_32( udctcoef nr_offset_denoise[4][64] );
    ALIGNED_32( uint32_t nr_residual_sum_buf[2][4][64] );
Fiona Glaser's avatar
Fiona Glaser committed
723
    uint32_t nr_count_buf[2][4];
Anton Mitrofanov's avatar
Anton Mitrofanov committed
724

Henrik Gramner's avatar
Henrik Gramner committed
725
726
    uint8_t luma2chroma_pixel[7]; /* Subsampled pixel size */

727
    /* Buffers that are allocated per-thread even in sliced threads. */
728
    void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
Fiona Glaser's avatar
Fiona Glaser committed
729
    void *scratch_buffer2; /* if the first one's already in use */
Fiona Glaser's avatar
Fiona Glaser committed
730
    pixel *intra_border_backup[5][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
731
732
733
    /* Deblock strength values are stored for each 4x4 partition. In MBAFF
     * there are four extra values that need to be stored, located in [4][i]. */
    uint8_t (*deblock_strength[2])[2][8][4];
734

Loren Merritt's avatar
Loren Merritt committed
735
    /* CPU functions dependents */
Laurent Aimar's avatar
Laurent Aimar committed
736
    x264_predict_t      predict_16x16[4+3];
737
    x264_predict8x8_t   predict_8x8[9+3];
Laurent Aimar's avatar
Laurent Aimar committed
738
    x264_predict_t      predict_4x4[9+3];
Henrik Gramner's avatar
Henrik Gramner committed
739
740
741
    x264_predict_t      predict_chroma[4+3];
    x264_predict_t      predict_8x8c[4+3];
    x264_predict_t      predict_8x16c[4+3];
742
    x264_predict_8x8_filter_t predict_8x8_filter;
Laurent Aimar's avatar
Laurent Aimar committed
743
744

    x264_pixel_function_t pixf;
745
    x264_mc_functions_t   mc;
Laurent Aimar's avatar
Laurent Aimar committed
746
    x264_dct_function_t   dctf;
747
    x264_zigzag_function_t zigzagf;
748
749
    x264_zigzag_function_t zigzagf_interlaced;
    x264_zigzag_function_t zigzagf_progressive;
750
    x264_quant_function_t quantf;
Loren Merritt's avatar
Loren Merritt committed
751
    x264_deblock_function_t loopf;
752
    x264_bitstream_function_t bsf;
Laurent Aimar's avatar
Laurent Aimar committed
753

Steven Walters's avatar
Steven Walters committed
754
    x264_lookahead_t *lookahead;
Steve Borho's avatar
Steve Borho committed
755
756
757
758

#if HAVE_OPENCL
    x264_opencl_t opencl;
#endif
Laurent Aimar's avatar
Laurent Aimar committed
759
760
};

Anton Mitrofanov's avatar
Anton Mitrofanov committed
761
762
763
764
765
766
typedef struct
{
    int sad;
    int16_t mv[2];
} mvsad_t;

767
768
769
// included at the end because it needs x264_t
#include "macroblock.h"

770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
static int ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
{
    int cnt = 0;
    for( int i = 0; i < i_mvc; i++ )
    {
        int mx = (mvc[i][0] + 2) >> 2;
        int my = (mvc[i][1] + 2) >> 2;
        uint32_t mv = pack16to32_mask(mx, my);
        if( !mv || mv == pmv ) continue;
        dst[cnt][0] = x264_clip3( mx, mv_limit[0][0], mv_limit[1][0] );
        dst[cnt][1] = x264_clip3( my, mv_limit[0][1], mv_limit[1][1] );
        cnt++;
    }
    return cnt;
}

static int ALWAYS_INLINE x264_predictor_clip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int16_t mv_limit[2][2], uint32_t pmv )
{
    int cnt = 0;
    int qpel_limit[4] = {mv_limit[0][0] << 2, mv_limit[0][1] << 2, mv_limit[1][0] << 2, mv_limit[1][1] << 2};
    for( int i = 0; i < i_mvc; i++ )
    {
        uint32_t mv = M32( mvc[i] );
        int mx = mvc[i][0];
        int my = mvc[i][1];
        if( !mv || mv == pmv ) continue;
        dst[cnt][0] = x264_clip3( mx, qpel_limit[0], qpel_limit[2] );
        dst[cnt][1] = x264_clip3( my, qpel_limit[1], qpel_limit[3] );
        cnt++;
    }
    return cnt;
}

Steven Walters's avatar
Steven Walters committed
803
#if ARCH_X86 || ARCH_X86_64
804
805
806
#include "x86/util.h"
#endif

807
808
#include "rectangle.h"

Laurent Aimar's avatar
Laurent Aimar committed
809
#endif