common.h 28.3 KB
Newer Older
1
/*****************************************************************************
2
 * common.h: misc common functions
3
 *****************************************************************************
Sean McGovern's avatar
Sean McGovern committed
4
 * Copyright (C) 2003-2011 x264 project
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22 23 24
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
25 26
 *****************************************************************************/

27 28
#ifndef X264_COMMON_H
#define X264_COMMON_H
29

Loren Merritt's avatar
Loren Merritt committed
30 31 32 33 34 35 36 37 38
/****************************************************************************
 * Macros
 ****************************************************************************/
#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) )
#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) )
#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c)))
#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c)))
#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d)))
#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d)))
39
#define XCHG(type,a,b) do{ type t = a; a = b; b = t; } while(0)
40
#define IS_DISPOSABLE(type) ( type == X264_TYPE_B )
Loren Merritt's avatar
Loren Merritt committed
41
#define FIX8(f) ((int)(f*(1<<8)+.5))
42
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
Loren Merritt's avatar
Loren Merritt committed
43

44
#define CHECKED_MALLOC( var, size )\
45
do {\
46 47 48
    var = x264_malloc( size );\
    if( !var )\
        goto fail;\
49 50 51 52 53 54
} while( 0 )
#define CHECKED_MALLOCZERO( var, size )\
do {\
    CHECKED_MALLOC( var, size );\
    memset( var, 0, size );\
} while( 0 )
55

56
#define X264_BFRAME_MAX 16
57
#define X264_REF_MAX 16
58
#define X264_THREAD_MAX 128
59
#define X264_PCM_COST (384*BIT_DEPTH+16)
60
#define X264_LOOKAHEAD_MAX 250
61 62 63 64 65
#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
#define QP_MAX (51+QP_BD_OFFSET)
#define QP_MAX_MAX (51+2*6)
#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
66
// arbitrary, but low because SATD scores are 1/4 normal
67
#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
Loren Merritt's avatar
Loren Merritt committed
68 69 70 71

// number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
#define X264_THREAD_HEIGHT 24
72

73 74 75 76 77 78 79
/* WEIGHTP_FAKE is set when mb_tree & psy are enabled, but normal weightp is disabled
 * (such as in baseline). It checks for fades in lookahead and adjusts qp accordingly
 * to increase quality. Defined as (-1) so that if(i_weighted_pred > 0) is true only when
 * real weights are being used. */

#define X264_WEIGHTP_FAKE (-1)

80 81 82
#define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
#define FILLER_OVERHEAD (NALU_OVERHEAD+1)

Loren Merritt's avatar
Loren Merritt committed
83 84 85
/****************************************************************************
 * Includes
 ****************************************************************************/
86
#include "osdep.h"
87
#include <stdarg.h>
88
#include <stddef.h>
89 90 91
#include <stdlib.h>
#include <string.h>
#include <assert.h>
92
#include <limits.h>
93

94
/* Unions for type-punning.
95 96 97
 * Mn: load or store n bits, aligned, native-endian
 * CPn: copy n bits, aligned, native-endian
 * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */
98 99 100
typedef union { uint16_t i; uint8_t  c[2]; } MAY_ALIAS x264_union16_t;
typedef union { uint32_t i; uint16_t b[2]; uint8_t  c[4]; } MAY_ALIAS x264_union32_t;
typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_ALIAS x264_union64_t;
101 102
typedef struct { uint64_t i[2]; } x264_uint128_t;
typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_t;
103 104 105
#define M16(src) (((x264_union16_t*)(src))->i)
#define M32(src) (((x264_union32_t*)(src))->i)
#define M64(src) (((x264_union64_t*)(src))->i)
106 107
#define M128(src) (((x264_union128_t*)(src))->i)
#define M128_ZERO ((x264_uint128_t){{0,0}})
108 109 110
#define CP16(dst,src) M16(dst) = M16(src)
#define CP32(dst,src) M32(dst) = M32(src)
#define CP64(dst,src) M64(dst) = M64(src)
111
#define CP128(dst,src) M128(dst) = M128(src)
112

113
#if HIGH_BIT_DEPTH
114 115 116
    typedef uint16_t pixel;
    typedef uint64_t pixel4;
    typedef int32_t  dctcoef;
117
    typedef uint32_t udctcoef;
118

119 120 121 122 123 124
#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
#   define MPIXEL_X4(src) M64(src)
#else
    typedef uint8_t  pixel;
    typedef uint32_t pixel4;
    typedef int16_t  dctcoef;
125
    typedef uint16_t udctcoef;
126 127 128 129 130

#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
#   define MPIXEL_X4(src) M32(src)
#endif

131 132
#define BIT_DEPTH X264_BIT_DEPTH

133
#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
134

135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
#define X264_SCAN8_SIZE (6*8)
#define X264_SCAN8_LUMA_SIZE (5*8)
#define X264_SCAN8_0 (4+1*8)

static const int x264_scan8[16+2*4+3] =
{
    /* Luma */
    4+1*8, 5+1*8, 4+2*8, 5+2*8,
    6+1*8, 7+1*8, 6+2*8, 7+2*8,
    4+3*8, 5+3*8, 4+4*8, 5+4*8,
    6+3*8, 7+3*8, 6+4*8, 7+4*8,

    /* Cb */
    1+1*8, 2+1*8,
    1+2*8, 2+2*8,

    /* Cr */
    1+4*8, 2+4*8,
    1+5*8, 2+5*8,

    /* Luma DC */
    4+5*8,

    /* Chroma DC */
    6+5*8, 7+5*8
};
/*
   0 1 2 3 4 5 6 7
 0
 1   B B   L L L L
 2   B B   L L L L
 3         L L L L
 4   R R   L L L L
 5   R R   Dy  DuDv
*/

171
#include "x264.h"
172
#include "bitstream.h"
173 174 175 176 177 178 179
#include "set.h"
#include "predict.h"
#include "pixel.h"
#include "mc.h"
#include "frame.h"
#include "dct.h"
#include "cabac.h"
180
#include "quant.h"
181
#include "cpu.h"
182
#include "threadpool.h"
183

184
/****************************************************************************
185
 * General functions
186
 ****************************************************************************/
187
/* x264_malloc : will do or emulate a memalign
Loren Merritt's avatar
Loren Merritt committed
188
 * you have to use x264_free for buffers allocated with x264_malloc */
189 190 191
void *x264_malloc( int );
void  x264_free( void * );

192 193 194
/* x264_slurp_file: malloc space for the whole file and read it */
char *x264_slurp_file( const char *filename );

195 196 197
/* mdate: return the current date in microsecond */
int64_t x264_mdate( void );

198 199 200 201
/* x264_param2string: return a (malloced) string containing most of
 * the encoding options */
char *x264_param2string( x264_param_t *p, int b_res );

202 203 204
/* log */
void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );

205
void x264_reduce_fraction( uint32_t *n, uint32_t *d );
Fiona Glaser's avatar
Fiona Glaser committed
206
void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
207
void x264_init_vlc_tables( void );
208

209
static ALWAYS_INLINE pixel x264_clip_pixel( int x )
Loren Merritt's avatar
Loren Merritt committed
210
{
211
    return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
Loren Merritt's avatar
Loren Merritt committed
212 213
}

214
static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
215
{
216
    return ( (v < i_min) ? i_min : (v > i_max) ? i_max : v );
217 218
}

219
static ALWAYS_INLINE double x264_clip3f( double v, double f_min, double f_max )
220 221 222
{
    return ( (v < f_min) ? f_min : (v > f_max) ? f_max : v );
}
223

224
static ALWAYS_INLINE int x264_median( int a, int b, int c )
225
{
Loren Merritt's avatar
Loren Merritt committed
226 227 228 229 230 231
    int t = (a-b)&((a-b)>>31);
    a -= t;
    b += t;
    b -= (b-c)&((b-c)>>31);
    b += (a-b)&((a-b)>>31);
    return b;
232 233
}

234
static ALWAYS_INLINE void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t *c )
Loren Merritt's avatar
Loren Merritt committed
235 236 237 238 239
{
    dst[0] = x264_median( a[0], b[0], c[0] );
    dst[1] = x264_median( a[1], b[1], c[1] );
}

240
static ALWAYS_INLINE int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc )
Fiona Glaser's avatar
Fiona Glaser committed
241
{
242 243
    int sum = 0;
    for( int i = 0; i < i_mvc-1; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
244 245 246 247 248 249 250
    {
        sum += abs( mvc[i][0] - mvc[i+1][0] )
             + abs( mvc[i][1] - mvc[i+1][1] );
    }
    return sum;
}

251
static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvdtop )
252 253 254 255 256
{
    int amvd0 = abs(mvdleft[0]) + abs(mvdtop[0]);
    int amvd1 = abs(mvdleft[1]) + abs(mvdtop[1]);
    amvd0 = (amvd0 > 2) + (amvd0 > 32);
    amvd1 = (amvd1 > 2) + (amvd1 > 32);
257
    return amvd0 + (amvd1<<8);
258 259
}

260
static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
261 262 263 264 265
{
    for( int i = 0; i < i_mvc; i++ )
    {
        int mx = (mvc[i][0] + 2) >> 2;
        int my = (mvc[i][1] + 2) >> 2;
266 267
        dst[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
        dst[i][1] = x264_clip3( my, mv_y_min, mv_y_max );
268 269 270
    }
}

271 272 273
extern const uint8_t x264_exp2_lut[64];
extern const float x264_log2_lut[128];
extern const float x264_log2_lz_lut[32];
274

Fiona Glaser's avatar
Fiona Glaser committed
275 276
/* Not a general-purpose function; multiplies input by -1/6 to convert
 * qp to qscale. */
277 278
static ALWAYS_INLINE int x264_exp2fix8( float x )
{
Anton Mitrofanov's avatar
Anton Mitrofanov committed
279 280 281
    int i = x*(-64.f/6.f) + 512.5f;
    if( i < 0 ) return 0;
    if( i > 1023 ) return 0xffff;
282
    return (x264_exp2_lut[i&63]+256) << (i>>6) >> 8;
283 284 285 286 287
}

static ALWAYS_INLINE float x264_log2( uint32_t x )
{
    int lz = x264_clz( x );
288
    return x264_log2_lut[(x<<lz>>24)&0x7f] + x264_log2_lz_lut[lz];
289 290
}

291 292 293
/****************************************************************************
 *
 ****************************************************************************/
294 295 296 297 298 299 300
enum slice_type_e
{
    SLICE_TYPE_P  = 0,
    SLICE_TYPE_B  = 1,
    SLICE_TYPE_I  = 2,
};

301
static const char slice_type_to_char[] = { 'P', 'B', 'I' };
Loren Merritt's avatar
Loren Merritt committed
302

303 304 305 306 307 308 309 310 311
enum sei_payload_type_e
{
    SEI_BUFFERING_PERIOD       = 0,
    SEI_PIC_TIMING             = 1,
    SEI_PAN_SCAN_RECT          = 2,
    SEI_FILLER                 = 3,
    SEI_USER_DATA_REGISTERED   = 4,
    SEI_USER_DATA_UNREGISTERED = 5,
    SEI_RECOVERY_POINT         = 6,
312
    SEI_FRAME_PACKING          = 45,
313 314
};

315 316 317 318 319 320 321
typedef struct
{
    x264_sps_t *sps;
    x264_pps_t *pps;

    int i_type;
    int i_first_mb;
322
    int i_last_mb;
323 324 325 326 327

    int i_pps_id;

    int i_frame_num;

328
    int b_mbaff;
329 330 331 332 333
    int b_field_pic;
    int b_bottom_field;

    int i_idr_pic_id;   /* -1 if nal_type != 5 */

334
    int i_poc;
335 336 337 338 339 340 341 342 343 344 345
    int i_delta_poc_bottom;

    int i_delta_poc[2];
    int i_redundant_pic_cnt;

    int b_direct_spatial_mv_pred;

    int b_num_ref_idx_override;
    int i_num_ref_idx_l0_active;
    int i_num_ref_idx_l1_active;

346
    int b_ref_pic_list_reordering[2];
347 348
    struct
    {
349 350
        int idc;
        int arg;
351
    } ref_pic_list_order[2][X264_REF_MAX];
352

353
    /* P-frame weighting */
354
    x264_weight_t weight[X264_REF_MAX*2][3];
355

356 357 358 359 360 361
    int i_mmco_remove_from_end;
    int i_mmco_command_count;
    struct /* struct for future expansion */
    {
        int i_difference_of_pic_nums;
        int i_poc;
362
    } mmco[X264_REF_MAX];
363

364 365
    int i_cabac_init_idc;

366
    int i_qp;
367 368 369 370 371 372 373 374 375 376 377
    int i_qp_delta;
    int b_sp_for_swidth;
    int i_qs_delta;

    /* deblocking filter */
    int i_disable_deblocking_filter_idc;
    int i_alpha_c0_offset;
    int i_beta_offset;

} x264_slice_header_t;

Steven Walters's avatar
Steven Walters committed
378 379
typedef struct x264_lookahead_t
{
380
    volatile uint8_t              b_exit_thread;
Steven Walters's avatar
Steven Walters committed
381 382
    uint8_t                       b_thread_active;
    uint8_t                       b_analyse_keyframe;
Fiona Glaser's avatar
Fiona Glaser committed
383
    int                           i_last_keyframe;
Steven Walters's avatar
Steven Walters committed
384 385
    int                           i_slicetype_length;
    x264_frame_t                  *last_nonb;
386 387 388 389
    x264_pthread_t                thread_handle;
    x264_sync_frame_list_t        ifbuf;
    x264_sync_frame_list_t        next;
    x264_sync_frame_list_t        ofbuf;
Steven Walters's avatar
Steven Walters committed
390 391
} x264_lookahead_t;

392 393 394 395 396 397 398
typedef struct x264_ratecontrol_t   x264_ratecontrol_t;

struct x264_t
{
    /* encoder parameters */
    x264_param_t    param;

Steven Walters's avatar
Steven Walters committed
399
    x264_t          *thread[X264_THREAD_MAX+1];
Loren Merritt's avatar
Loren Merritt committed
400 401
    int             b_thread_active;
    int             i_thread_phase; /* which thread to use for the next frame */
402 403
    int             i_threadslice_start; /* first row in this thread slice */
    int             i_threadslice_end; /* row after the end of this thread slice */
404
    x264_threadpool_t *threadpool;
405

406 407 408 409
    /* bitstream output */
    struct
    {
        int         i_nal;
410 411
        int         i_nals_allocated;
        x264_nal_t  *nal;
412 413 414 415 416
        int         i_bitstream;    /* size of p_bitstream */
        uint8_t     *p_bitstream;   /* will hold data for all nal */
        bs_t        bs;
    } out;

417 418 419
    uint8_t *nal_buffer;
    int      nal_buffer_size;

Loren Merritt's avatar
Loren Merritt committed
420 421
    /**** thread synchronization starts here ****/

422 423
    /* frame number/poc */
    int             i_frame;
424
    int             i_frame_num;
425

426 427 428 429
    int             i_thread_frames; /* Number of different frames being encoded by threads;
                                      * 1 when sliced-threads is on. */
    int             i_nal_type;
    int             i_nal_ref_idc;
430

431
    int64_t         i_disp_fields;  /* Number of displayed fields (both coded and implied via pic_struct) */
432
    int             i_disp_fields_last_frame;
433 434 435
    int64_t         i_prev_duration; /* Duration of previous frame */
    int64_t         i_coded_fields; /* Number of coded fields (both coded and implied via pic_struct) */
    int64_t         i_cpb_delay;    /* Equal to number of fields preceding this field
436
                                     * since last buffering_period SEI */
437 438
    int64_t         i_coded_fields_lookahead; /* Use separate counters for lookahead */
    int64_t         i_cpb_delay_lookahead;
439

440
    int64_t         i_cpb_delay_pir_offset;
441

442
    int             b_queued_intra_refresh;
443
    int64_t         i_last_idr_pts;
444

445
    /* We use only one SPS and one PPS */
446
    x264_sps_t      sps_array[1];
447
    x264_sps_t      *sps;
448
    x264_pps_t      pps_array[1];
449 450 451
    x264_pps_t      *pps;
    int             i_idr_pic_id;

452 453 454
    /* quantization matrix for decoding, [cqm][qp%6][coef] */
    int             (*dequant4_mf[4])[16];   /* [4][6][16] */
    int             (*dequant8_mf[2])[64];   /* [2][6][64] */
Loren Merritt's avatar
Loren Merritt committed
455
    /* quantization matrix for trellis, [cqm][qp][coef] */
456 457
    int             (*unquant4_mf[4])[16];   /* [4][52][16] */
    int             (*unquant8_mf[2])[64];   /* [2][52][64] */
Loren Merritt's avatar
Loren Merritt committed
458
    /* quantization matrix for deadzone */
459 460 461 462
    udctcoef        (*quant4_mf[4])[16];     /* [4][52][16] */
    udctcoef        (*quant8_mf[2])[64];     /* [2][52][64] */
    udctcoef        (*quant4_bias[4])[16];   /* [4][52][16] */
    udctcoef        (*quant8_bias[2])[64];   /* [2][52][64] */
Loren Merritt's avatar
Loren Merritt committed
463

464 465 466
    /* mv/ref cost arrays.  Indexed by lambda instead of
     * qp because, due to rounding, some quantizers share
     * lambdas.  This saves memory. */
467 468
    uint16_t *cost_mv[LAMBDA_MAX+1];
    uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
469

470 471
    const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */

472 473 474 475 476 477 478 479
    /* Slice header */
    x264_slice_header_t sh;

    /* cabac context */
    x264_cabac_t    cabac;

    struct
    {
480
        /* Frames to be encoded (whose types have been decided) */
Steven Walters's avatar
Steven Walters committed
481 482 483
        x264_frame_t **current;
        /* Unused frames: 0 = fenc, 1 = fdec */
        x264_frame_t **unused[2];
484

485 486 487
        /* Unused blank frames (for duplicates) */
        x264_frame_t **blank_unused;

488
        /* frames used for reference + sentinels */
489
        x264_frame_t *reference[X264_REF_MAX+2];
490

Lamont Alston's avatar
Lamont Alston committed
491 492 493 494 495
        int i_last_keyframe;       /* Frame number of the last keyframe */
        int i_last_idr;            /* Frame number of the last IDR (not RP)*/
        int i_poc_last_open_gop;   /* Poc of the I frame of the last open-gop. The value
                                    * is only assigned during the period between that
                                    * I frame and the next P or I frame, else -1 */
496 497

        int i_input;    /* Number of input frames already accepted */
498 499 500 501

        int i_max_dpb;  /* Number of frames allocated in the decoded picture buffer */
        int i_max_ref0;
        int i_max_ref1;
502
        int i_delay;    /* Number of frames buffered for B reordering */
503 504
        int     i_bframe_delay;
        int64_t i_bframe_delay_time;
505
        int64_t i_first_pts;
Yusuke Nakamura's avatar
Yusuke Nakamura committed
506
        int64_t i_prev_reordered_pts[2];
507 508
        int64_t i_largest_pts;
        int64_t i_second_largest_pts;
509
        int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
Loren Merritt's avatar
Loren Merritt committed
510
        int b_have_sub8x8_esa;
511 512 513 514 515 516 517 518 519
    } frames;

    /* current frame being encoded */
    x264_frame_t    *fenc;

    /* frame being reconstructed */
    x264_frame_t    *fdec;

    /* references lists */
520 521
    int             i_ref[2];
    x264_frame_t    *fref[2][X264_REF_MAX+3];
522
    x264_frame_t    *fref_nearest[2];
523
    int             b_ref_reorder[2];
524

525 526
    /* hrd */
    int initial_cpb_removal_delay;
Kieran Kunhya's avatar
Kieran Kunhya committed
527
    int initial_cpb_removal_delay_offset;
528
    int64_t i_reordered_pts_delay;
529 530 531 532

    /* Current MB DCT coeffs */
    struct
    {
533 534
        ALIGNED_16( dctcoef luma16x16_dc[16] );
        ALIGNED_16( dctcoef chroma_dc[2][4] );
535
        // FIXME share memory?
536 537
        ALIGNED_16( dctcoef luma8x8[4][64] );
        ALIGNED_16( dctcoef luma4x4[16+8][16] );
538 539 540 541 542
    } dct;

    /* MB table and cache for current frame/mb */
    struct
    {
543 544
        int     i_mb_width;
        int     i_mb_height;
545 546
        int     i_mb_count;                 /* number of mbs in a frame */

547 548
        /* Strides */
        int     i_mb_stride;
549 550
        int     i_b8_stride;
        int     i_b4_stride;
551 552 553 554 555

        /* Current index */
        int     i_mb_x;
        int     i_mb_y;
        int     i_mb_xy;
556 557
        int     i_b8_xy;
        int     i_b4_xy;
Loren Merritt's avatar
Loren Merritt committed
558

559
        /* Search parameters */
Loren Merritt's avatar
Loren Merritt committed
560
        int     i_me_method;
561
        int     i_subpel_refine;
562
        int     b_chroma_me;
Loren Merritt's avatar
Loren Merritt committed
563
        int     b_trellis;
564
        int     b_noise_reduction;
565
        int     b_dct_decimate;
566 567
        int     i_psy_rd; /* Psy RD strength--fixed point value*/
        int     i_psy_trellis; /* Psy trellis strength--fixed point value*/
568

569 570
        int     b_interlaced;

571
        /* Allowed qpel MV range to stay within the picture + emulated edge pixels */
572 573
        int     mv_min[2];
        int     mv_max[2];
574 575 576 577
        /* Subpel MV range for motion search.
         * same mv_min/max but includes levels' i_mv_range. */
        int     mv_min_spel[2];
        int     mv_max_spel[2];
578 579 580
        /* Fullpel MV range for motion search */
        int     mv_min_fpel[2];
        int     mv_max_fpel[2];
581

582
        /* neighboring MBs */
583
        unsigned int i_neighbour;
584 585
        unsigned int i_neighbour8[4];       /* neighbours of each 8x8 or 4x4 block that are available */
        unsigned int i_neighbour4[16];      /* at the time the block is coded */
586
        unsigned int i_neighbour_intra;     /* for constrained intra pred */
587
        unsigned int i_neighbour_frame;     /* ignoring slice boundaries */
Loren Merritt's avatar
Loren Merritt committed
588 589 590 591
        int     i_mb_type_top;
        int     i_mb_type_left;
        int     i_mb_type_topleft;
        int     i_mb_type_topright;
592
        int     i_mb_prev_xy;
593
        int     i_mb_left_xy;
594
        int     i_mb_top_xy;
595 596
        int     i_mb_topleft_xy;
        int     i_mb_topright_xy;
597

Loren Merritt's avatar
Loren Merritt committed
598
        /**** thread synchronization ends here ****/
Loren Merritt's avatar
Loren Merritt committed
599
        /* subsequent variables are either thread-local or constant,
Loren Merritt's avatar
Loren Merritt committed
600 601
         * and won't be copied from one thread to another */

602 603
        /* mb table */
        int8_t  *type;                      /* mb type */
604
        uint8_t *partition;                 /* mb partition */
605 606
        int8_t  *qp;                        /* mb qp */
        int16_t *cbp;                       /* mb cbp: 0x0?: luma, 0x?0: chroma, 0x100: luma dc, 0x0200 and 0x0400: chroma dc  (all set for PCM)*/
607 608
        int8_t  (*intra4x4_pred_mode)[8];   /* intra4x4 pred mode. for non I4x4 set to I_PRED_4x4_DC(2) */
                                            /* actually has only 7 entries; set to 8 for write-combining optimizations */
609 610 611
        uint8_t (*non_zero_count)[16+4+4];  /* nzc. for I_PCM set to 16 */
        int8_t  *chroma_pred_mode;          /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */
        int16_t (*mv[2])[2];                /* mb mv. set to 0 for intra mb */
612
        uint8_t (*mvd[2])[8][2];            /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
613
        int8_t   *ref[2];                   /* mb ref. set to -1 if non used (intra or Lx only) */
614
        int16_t (*mvr[2][X264_REF_MAX*2])[2];/* 16x16 mv for each possible ref */
615
        int8_t  *skipbp;                    /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
616
        int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
617 618
        uint16_t *slice_table;              /* sh->first_mb of the slice that the indexed mb is part of
                                             * NOTE: this will fail on resolutions above 2^16 MBs... */
619

620
         /* buffer for weighted versions of the reference frames */
621
        pixel *p_weight_buf[X264_REF_MAX];
622

623 624 625
        /* current value */
        int     i_type;
        int     i_partition;
626
        ALIGNED_4( uint8_t i_sub_partition[4] );
627
        int     b_transform_8x8;
628 629 630 631 632 633 634

        int     i_cbp_luma;
        int     i_cbp_chroma;

        int     i_intra16x16_pred_mode;
        int     i_chroma_pred_mode;

635 636 637 638 639
        /* skip flags for i4x4 and i8x8
         * 0 = encode as normal.
         * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction.
         * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */
        int i_skip_intra;
Fiona Glaser's avatar
Fiona Glaser committed
640 641
        /* skip flag for motion compensation */
        /* if we've already done MC, we don't need to do it again */
642
        int b_skip_mc;
643 644
        /* set to true if we are re-encoding a macroblock. */
        int b_reencode_mb;
Fiona Glaser's avatar
Fiona Glaser committed
645
        int ip_offset; /* Used by PIR to offset the quantizer of intra-refresh blocks. */
Fiona Glaser's avatar
Fiona Glaser committed
646
        int b_deblock_rdo;
647

648 649
        struct
        {
650 651 652
            /* space for p_fenc and p_fdec */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
653 654
            ALIGNED_16( pixel fenc_buf[24*FENC_STRIDE] );
            ALIGNED_16( pixel fdec_buf[27*FDEC_STRIDE] );
655

Loren Merritt's avatar
Loren Merritt committed
656
            /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
657 658
            ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
            ALIGNED_16( pixel i8x8_fdec_buf[16*16] );
659 660
            ALIGNED_16( dctcoef i8x8_dct_buf[3][64] );
            ALIGNED_16( dctcoef i4x4_dct_buf[15][16] );
661 662 663 664
            uint32_t i4x4_nnz_buf[4];
            uint32_t i8x8_nnz_buf[4];
            int i4x4_cbp;
            int i8x8_cbp;
665

666
            /* Psy trellis DCT data */
667 668
            ALIGNED_16( dctcoef fenc_dct8[4][64] );
            ALIGNED_16( dctcoef fenc_dct4[16][16] );
669

670 671 672
            /* Psy RD SATD/SA8D scores cache */
            ALIGNED_16( uint64_t fenc_hadamard_cache[9] );
            ALIGNED_16( uint32_t fenc_satd_cache[32] );
673

674
            /* pointer over mb of the frame to be compressed */
675
            pixel *p_fenc[3]; /* y,u,v */
676
            /* pointer to the actual source frame, not a block copy */
677
            pixel *p_fenc_plane[2]; /* y,uv */
678

Loren Merritt's avatar
Loren Merritt committed
679
            /* pointer over mb of the frame to be reconstructed  */
680
            pixel *p_fdec[3];
681 682

            /* pointer over mb of the references */
683
            int i_fref[2];
684 685 686
            pixel *p_fref[2][X264_REF_MAX*2][4+1]; /* last: yN, yH, yV, yHV, uv */
            pixel *p_fref_w[X264_REF_MAX*2];  /* weighted fullpel luma */
            uint16_t *p_integral[2][X264_REF_MAX];
687

688
            /* fref stride */
689 690 691 692 693 694
            int     i_stride[3];
        } pic;

        /* cache */
        struct
        {
695
            /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
696
            ALIGNED_8( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );
697

Loren Merritt's avatar
Loren Merritt committed
698
            /* i_non_zero_count if available else 0x80 */
Henrik Gramner's avatar
Henrik Gramner committed
699
            ALIGNED_16( uint8_t non_zero_count[X264_SCAN8_SIZE] );
700

Loren Merritt's avatar
Loren Merritt committed
701
            /* -1 if unused, -2 if unavailable */
702
            ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
703

Loren Merritt's avatar
Loren Merritt committed
704
            /* 0 if not available */
705 706
            ALIGNED_16( int16_t mv[2][X264_SCAN8_LUMA_SIZE][2] );
            ALIGNED_8( uint8_t mvd[2][X264_SCAN8_LUMA_SIZE][2] );
707 708

            /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
709
            ALIGNED_4( int8_t skip[X264_SCAN8_LUMA_SIZE] );
710

711 712
            ALIGNED_4( int16_t direct_mv[2][4][2] );
            ALIGNED_4( int8_t  direct_ref[2][4] );
713
            int     direct_partition;
714
            ALIGNED_4( int16_t pskip_mv[2] );
715

716 717
            /* number of neighbors (top and left) that used 8x8 dct */
            int     i_neighbour_transform_size;
718
            int     i_neighbour_interlaced;
719 720 721 722

            /* neighbor CBPs */
            int     i_cbp_top;
            int     i_cbp_left;
723 724 725
        } cache;

        /* */
726
        int     i_qp;       /* current qp */
727
        int     i_chroma_qp;
728 729
        int     i_last_qp;  /* last qp */
        int     i_last_dqp; /* last delta qp */
730
        int     b_variable_qp; /* whether qp is allowed to vary per macroblock */
731
        int     b_lossless;
Loren Merritt's avatar
Loren Merritt committed
732 733
        int     b_direct_auto_read; /* take stats for --direct auto from the 2pass log */
        int     b_direct_auto_write; /* analyse direct modes, to use and/or save */
734

735 736 737 738 739
        /* lambda values */
        int     i_trellis_lambda2[2][2]; /* [luma,chroma][inter,intra] */
        int     i_psy_rd_lambda;
        int     i_chroma_lambda2_offset;

740
        /* B_direct and weighted prediction */
741
        int16_t dist_scale_factor_buf[2][X264_REF_MAX*2][4];
742
        int16_t (*dist_scale_factor)[4];
743
        int8_t bipred_weight_buf[2][X264_REF_MAX*2][4];
744
        int8_t (*bipred_weight)[4];
745
        /* maps fref1[0]'s ref indices into the current list0 */
746
#define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
747
        int8_t  map_col_to_list0[X264_REF_MAX+2];
748
        int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
749
        int8_t deblock_ref_table[X264_REF_MAX*2+2];
Fiona Glaser's avatar
Fiona Glaser committed
750
#define deblock_ref_table(x) h->mb.deblock_ref_table[(x)+2]
751 752 753 754 755 756 757 758
    } mb;

    /* rate control encoding only */
    x264_ratecontrol_t *rc;

    /* stats */
    struct
    {
759 760 761
        /* Current frame stats */
        struct
        {
762 763 764 765
            /* MV bits (MV+Ref+Block Type) */
            int i_mv_bits;
            /* Texture bits (DCT coefs) */
            int i_tex_bits;
766 767 768
            /* ? */
            int i_misc_bits;
            /* MB type counts */
769
            int i_mb_count[19];
770
            int i_mb_count_i;
Loren Merritt's avatar
Loren Merritt committed
771 772
            int i_mb_count_p;
            int i_mb_count_skip;
773
            int i_mb_count_8x8dct[2];
774
            int i_mb_count_ref[2][X264_REF_MAX*2];
775
            int i_mb_partition[17];
776
            int i_mb_cbp[6];
777
            int i_mb_pred_mode[4][13];
Loren Merritt's avatar
Loren Merritt committed
778 779
            /* Adaptive direct mv pred */
            int i_direct_score[2];
780 781 782
            /* Metrics */
            int64_t i_ssd[3];
            double f_ssim;
783 784
        } frame;

Loren Merritt's avatar
Loren Merritt committed
785
        /* Cumulated stats */
786

787
        /* per slice info */
788 789 790
        int     i_frame_count[3];
        int64_t i_frame_size[3];
        double  f_frame_qp[3];
791
        int     i_consecutive_bframes[X264_BFRAME_MAX+1];
792
        /* */
793 794 795 796 797 798 799
        double  f_ssd_global[3];
        double  f_psnr_average[3];
        double  f_psnr_mean_y[3];
        double  f_psnr_mean_u[3];
        double  f_psnr_mean_v[3];
        double  f_ssim_mean_y[3];
        double  f_frame_duration[3];
800
        /* */
801
        int64_t i_mb_count[3][19];
802
        int64_t i_mb_partition[2][17];
803
        int64_t i_mb_count_8x8dct[2];
804
        int64_t i_mb_count_ref[2][2][X264_REF_MAX*2];
805
        int64_t i_mb_cbp[6];
806
        int64_t i_mb_pred_mode[4][13];
Loren Merritt's avatar
Loren Merritt committed
807 808 809
        /* */
        int     i_direct_score[2];
        int     i_direct_frames[2];
810
        /* num p-frames weighted */
811
        int     i_wpred[2];
812

813 814
    } stat;

815
    ALIGNED_16( uint32_t nr_residual_sum[2][64] );
816
    ALIGNED_16( udctcoef nr_offset[2][64] );
817 818
    uint32_t        nr_count[2];

819
    /* Buffers that are allocated per-thread even in sliced threads. */
820
    void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
821
    pixel *intra_border_backup[2][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
Fiona Glaser's avatar
Fiona Glaser committed
822
    uint8_t (*deblock_strength[2])[2][4][4];
823

Loren Merritt's avatar
Loren Merritt committed
824
    /* CPU functions dependents */
825
    x264_predict_t      predict_16x16[4+3];
826 827
    x264_predict_t      predict_8x8c[4+3];
    x264_predict8x8_t   predict_8x8[9+3];
828
    x264_predict_t      predict_4x4[9+3];
829
    x264_predict_8x8_filter_t predict_8x8_filter;
830 831

    x264_pixel_function_t pixf;
832
    x264_mc_functions_t   mc;
833
    x264_dct_function_t   dctf;
834
    x264_zigzag_function_t zigzagf;
835
    x264_quant_function_t quantf;
Loren Merritt's avatar
Loren Merritt committed
836
    x264_deblock_function_t loopf;
837
    x264_bitstream_function_t bsf;
838

Steven Walters's avatar
Steven Walters committed
839
#if HAVE_VISUALIZE
Loren Merritt's avatar
Loren Merritt committed
840 841
    struct visualize_t *visualize;
#endif
Steven Walters's avatar
Steven Walters committed
842
    x264_lookahead_t *lookahead;
843 844
};

845 846
// included at the end because it needs x264_t
#include "macroblock.h"
847
#include "rectangle.h"
848

Steven Walters's avatar
Steven Walters committed
849
#if HAVE_MMX
850 851 852
#include "x86/util.h"
#endif

853 854
#endif