common.h 28.4 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * common.h: misc common functions
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
4
 * Copyright (C) 2003-2010 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
Laurent Aimar's avatar
Laurent Aimar committed
8
9
10
11
12
13
14
15
16
17
18
19
20
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
22
23
24
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
25
26
 *****************************************************************************/

27
28
#ifndef X264_COMMON_H
#define X264_COMMON_H
Laurent Aimar's avatar
Laurent Aimar committed
29

Loren Merritt's avatar
Loren Merritt committed
30
31
32
33
34
35
36
37
38
/****************************************************************************
 * Macros
 ****************************************************************************/
#define X264_MIN(a,b) ( (a)<(b) ? (a) : (b) )
#define X264_MAX(a,b) ( (a)>(b) ? (a) : (b) )
#define X264_MIN3(a,b,c) X264_MIN((a),X264_MIN((b),(c)))
#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c)))
#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d)))
#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d)))
39
#define XCHG(type,a,b) do{ type t = a; a = b; b = t; } while(0)
Lamont Alston's avatar
Lamont Alston committed
40
#define IS_DISPOSABLE(type) ( type == X264_TYPE_B )
Loren Merritt's avatar
Loren Merritt committed
41
#define FIX8(f) ((int)(f*(1<<8)+.5))
42
#define ALIGN(x,a) (((x)+((a)-1))&~((a)-1))
Loren Merritt's avatar
Loren Merritt committed
43

44
#define CHECKED_MALLOC( var, size )\
45
do {\
46
47
48
    var = x264_malloc( size );\
    if( !var )\
        goto fail;\
49
50
51
52
53
54
} while( 0 )
#define CHECKED_MALLOCZERO( var, size )\
do {\
    CHECKED_MALLOC( var, size );\
    memset( var, 0, size );\
} while( 0 )
55

56
#define X264_BFRAME_MAX 16
57
#define X264_REF_MAX 16
58
#define X264_THREAD_MAX 128
59
#define X264_PCM_COST (384*BIT_DEPTH+16)
Fiona Glaser's avatar
Fiona Glaser committed
60
#define X264_LOOKAHEAD_MAX 250
61
62
63
64
65
#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
#define QP_MAX (51+QP_BD_OFFSET)
#define QP_MAX_MAX (51+2*6)
#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
66
// arbitrary, but low because SATD scores are 1/4 normal
67
#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
Loren Merritt's avatar
Loren Merritt committed
68
69
70
71

// number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
#define X264_THREAD_HEIGHT 24
72

Dylan Yudaken's avatar
Dylan Yudaken committed
73
74
75
76
77
78
79
/* WEIGHTP_FAKE is set when mb_tree & psy are enabled, but normal weightp is disabled
 * (such as in baseline). It checks for fades in lookahead and adjusts qp accordingly
 * to increase quality. Defined as (-1) so that if(i_weighted_pred > 0) is true only when
 * real weights are being used. */

#define X264_WEIGHTP_FAKE (-1)

80
81
82
#define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
#define FILLER_OVERHEAD (NALU_OVERHEAD+1)

Loren Merritt's avatar
Loren Merritt committed
83
84
85
/****************************************************************************
 * Includes
 ****************************************************************************/
86
#include "osdep.h"
87
#include <stdarg.h>
88
#include <stddef.h>
89
90
91
#include <stdlib.h>
#include <string.h>
#include <assert.h>
Fiona Glaser's avatar
Fiona Glaser committed
92
#include <limits.h>
Fiona Glaser's avatar
Fiona Glaser committed
93

94
/* Unions for type-punning.
Fiona Glaser's avatar
Fiona Glaser committed
95
96
97
 * Mn: load or store n bits, aligned, native-endian
 * CPn: copy n bits, aligned, native-endian
 * we don't use memcpy for CPn because memcpy's args aren't assumed to be aligned */
98
99
100
typedef union { uint16_t i; uint8_t  c[2]; } MAY_ALIAS x264_union16_t;
typedef union { uint32_t i; uint16_t b[2]; uint8_t  c[4]; } MAY_ALIAS x264_union32_t;
typedef union { uint64_t i; uint32_t a[2]; uint16_t b[4]; uint8_t c[8]; } MAY_ALIAS x264_union64_t;
Fiona Glaser's avatar
Fiona Glaser committed
101
102
typedef struct { uint64_t i[2]; } x264_uint128_t;
typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; uint8_t d[16]; } MAY_ALIAS x264_union128_t;
Fiona Glaser's avatar
Fiona Glaser committed
103
104
105
#define M16(src) (((x264_union16_t*)(src))->i)
#define M32(src) (((x264_union32_t*)(src))->i)
#define M64(src) (((x264_union64_t*)(src))->i)
Fiona Glaser's avatar
Fiona Glaser committed
106
107
#define M128(src) (((x264_union128_t*)(src))->i)
#define M128_ZERO ((x264_uint128_t){{0,0}})
Fiona Glaser's avatar
Fiona Glaser committed
108
109
110
#define CP16(dst,src) M16(dst) = M16(src)
#define CP32(dst,src) M32(dst) = M32(src)
#define CP64(dst,src) M64(dst) = M64(src)
Fiona Glaser's avatar
Fiona Glaser committed
111
#define CP128(dst,src) M128(dst) = M128(src)
Fiona Glaser's avatar
Fiona Glaser committed
112

113
#if HIGH_BIT_DEPTH
114
115
116
    typedef uint16_t pixel;
    typedef uint64_t pixel4;
    typedef int32_t  dctcoef;
117
    typedef uint32_t udctcoef;
118

119
120
121
122
123
124
#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
#   define MPIXEL_X4(src) M64(src)
#else
    typedef uint8_t  pixel;
    typedef uint32_t pixel4;
    typedef int16_t  dctcoef;
125
    typedef uint16_t udctcoef;
126
127
128
129
130

#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
#   define MPIXEL_X4(src) M32(src)
#endif

131
132
#define BIT_DEPTH X264_BIT_DEPTH

133
#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
134

135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#define X264_SCAN8_SIZE (6*8)
#define X264_SCAN8_LUMA_SIZE (5*8)
#define X264_SCAN8_0 (4+1*8)

static const int x264_scan8[16+2*4+3] =
{
    /* Luma */
    4+1*8, 5+1*8, 4+2*8, 5+2*8,
    6+1*8, 7+1*8, 6+2*8, 7+2*8,
    4+3*8, 5+3*8, 4+4*8, 5+4*8,
    6+3*8, 7+3*8, 6+4*8, 7+4*8,

    /* Cb */
    1+1*8, 2+1*8,
    1+2*8, 2+2*8,

    /* Cr */
    1+4*8, 2+4*8,
    1+5*8, 2+5*8,

    /* Luma DC */
    4+5*8,

    /* Chroma DC */
    6+5*8, 7+5*8
};
/*
   0 1 2 3 4 5 6 7
 0
 1   B B   L L L L
 2   B B   L L L L
 3         L L L L
 4   R R   L L L L
 5   R R   Dy  DuDv
*/

171
#include "x264.h"
172
#include "bitstream.h"
Laurent Aimar's avatar
Laurent Aimar committed
173
174
175
176
177
178
179
#include "set.h"
#include "predict.h"
#include "pixel.h"
#include "mc.h"
#include "frame.h"
#include "dct.h"
#include "cabac.h"
180
#include "quant.h"
181
#include "cpu.h"
182
#include "threadpool.h"
Laurent Aimar's avatar
Laurent Aimar committed
183

184
/****************************************************************************
185
 * General functions
186
 ****************************************************************************/
Laurent Aimar's avatar
Laurent Aimar committed
187
/* x264_malloc : will do or emulate a memalign
Loren Merritt's avatar
Loren Merritt committed
188
 * you have to use x264_free for buffers allocated with x264_malloc */
Laurent Aimar's avatar
Laurent Aimar committed
189
190
191
void *x264_malloc( int );
void  x264_free( void * );

192
193
194
/* x264_slurp_file: malloc space for the whole file and read it */
char *x264_slurp_file( const char *filename );

Laurent Aimar's avatar
Laurent Aimar committed
195
196
197
/* mdate: return the current date in microsecond */
int64_t x264_mdate( void );

198
199
200
201
/* x264_param2string: return a (malloced) string containing most of
 * the encoding options */
char *x264_param2string( x264_param_t *p, int b_res );

202
203
204
/* log */
void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );

205
void x264_reduce_fraction( uint32_t *n, uint32_t *d );
Fiona Glaser's avatar
Fiona Glaser committed
206
void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
207
void x264_init_vlc_tables( void );
Loren Merritt's avatar
Loren Merritt committed
208

209
static ALWAYS_INLINE pixel x264_clip_pixel( int x )
Loren Merritt's avatar
Loren Merritt committed
210
{
211
    return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
Loren Merritt's avatar
Loren Merritt committed
212
213
}

214
static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
Laurent Aimar's avatar
Laurent Aimar committed
215
{
Loren Merritt's avatar
Loren Merritt committed
216
    return ( (v < i_min) ? i_min : (v > i_max) ? i_max : v );
Laurent Aimar's avatar
Laurent Aimar committed
217
218
}

219
static ALWAYS_INLINE double x264_clip3f( double v, double f_min, double f_max )
Loren Merritt's avatar
Loren Merritt committed
220
221
222
{
    return ( (v < f_min) ? f_min : (v > f_max) ? f_max : v );
}
223

224
static ALWAYS_INLINE int x264_median( int a, int b, int c )
225
{
Loren Merritt's avatar
Loren Merritt committed
226
227
228
229
230
231
    int t = (a-b)&((a-b)>>31);
    a -= t;
    b += t;
    b -= (b-c)&((b-c)>>31);
    b += (a-b)&((a-b)>>31);
    return b;
232
233
}

234
static ALWAYS_INLINE void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t *c )
Loren Merritt's avatar
Loren Merritt committed
235
236
237
238
239
{
    dst[0] = x264_median( a[0], b[0], c[0] );
    dst[1] = x264_median( a[1], b[1], c[1] );
}

240
static ALWAYS_INLINE int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc )
Fiona Glaser's avatar
Fiona Glaser committed
241
{
242
243
    int sum = 0;
    for( int i = 0; i < i_mvc-1; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
244
245
246
247
248
249
250
    {
        sum += abs( mvc[i][0] - mvc[i+1][0] )
             + abs( mvc[i][1] - mvc[i+1][1] );
    }
    return sum;
}

251
static ALWAYS_INLINE uint16_t x264_cabac_mvd_sum( uint8_t *mvdleft, uint8_t *mvdtop )
Fiona Glaser's avatar
Fiona Glaser committed
252
253
254
255
256
{
    int amvd0 = abs(mvdleft[0]) + abs(mvdtop[0]);
    int amvd1 = abs(mvdleft[1]) + abs(mvdtop[1]);
    amvd0 = (amvd0 > 2) + (amvd0 > 32);
    amvd1 = (amvd1 > 2) + (amvd1 > 32);
257
    return amvd0 + (amvd1<<8);
Fiona Glaser's avatar
Fiona Glaser committed
258
259
}

260
static void ALWAYS_INLINE x264_predictor_roundclip( int16_t (*dst)[2], int16_t (*mvc)[2], int i_mvc, int mv_x_min, int mv_x_max, int mv_y_min, int mv_y_max )
261
262
263
264
265
{
    for( int i = 0; i < i_mvc; i++ )
    {
        int mx = (mvc[i][0] + 2) >> 2;
        int my = (mvc[i][1] + 2) >> 2;
266
267
        dst[i][0] = x264_clip3( mx, mv_x_min, mv_x_max );
        dst[i][1] = x264_clip3( my, mv_y_min, mv_y_max );
268
269
270
    }
}

271
272
273
extern const uint8_t x264_exp2_lut[64];
extern const float x264_log2_lut[128];
extern const float x264_log2_lz_lut[32];
Fiona Glaser's avatar
Fiona Glaser committed
274

Fiona Glaser's avatar
Fiona Glaser committed
275
276
/* Not a general-purpose function; multiplies input by -1/6 to convert
 * qp to qscale. */
Fiona Glaser's avatar
Fiona Glaser committed
277
278
static ALWAYS_INLINE int x264_exp2fix8( float x )
{
Anton Mitrofanov's avatar
Anton Mitrofanov committed
279
280
281
    int i = x*(-64.f/6.f) + 512.5f;
    if( i < 0 ) return 0;
    if( i > 1023 ) return 0xffff;
282
    return (x264_exp2_lut[i&63]+256) << (i>>6) >> 8;
Fiona Glaser's avatar
Fiona Glaser committed
283
284
285
286
287
}

static ALWAYS_INLINE float x264_log2( uint32_t x )
{
    int lz = x264_clz( x );
288
    return x264_log2_lut[(x<<lz>>24)&0x7f] + x264_log2_lz_lut[lz];
Fiona Glaser's avatar
Fiona Glaser committed
289
290
}

291
292
293
/****************************************************************************
 *
 ****************************************************************************/
Laurent Aimar's avatar
Laurent Aimar committed
294
295
296
297
298
299
300
enum slice_type_e
{
    SLICE_TYPE_P  = 0,
    SLICE_TYPE_B  = 1,
    SLICE_TYPE_I  = 2,
};

301
static const char slice_type_to_char[] = { 'P', 'B', 'I' };
Loren Merritt's avatar
Loren Merritt committed
302

303
304
305
306
307
308
309
310
311
312
313
enum sei_payload_type_e
{
    SEI_BUFFERING_PERIOD       = 0,
    SEI_PIC_TIMING             = 1,
    SEI_PAN_SCAN_RECT          = 2,
    SEI_FILLER                 = 3,
    SEI_USER_DATA_REGISTERED   = 4,
    SEI_USER_DATA_UNREGISTERED = 5,
    SEI_RECOVERY_POINT         = 6,
};

Laurent Aimar's avatar
Laurent Aimar committed
314
315
316
317
318
319
320
typedef struct
{
    x264_sps_t *sps;
    x264_pps_t *pps;

    int i_type;
    int i_first_mb;
Loren Merritt's avatar
Loren Merritt committed
321
    int i_last_mb;
Laurent Aimar's avatar
Laurent Aimar committed
322
323
324
325
326

    int i_pps_id;

    int i_frame_num;

327
    int b_mbaff;
Laurent Aimar's avatar
Laurent Aimar committed
328
329
330
331
332
    int b_field_pic;
    int b_bottom_field;

    int i_idr_pic_id;   /* -1 if nal_type != 5 */

333
    int i_poc;
Laurent Aimar's avatar
Laurent Aimar committed
334
335
336
337
338
339
340
341
342
343
344
    int i_delta_poc_bottom;

    int i_delta_poc[2];
    int i_redundant_pic_cnt;

    int b_direct_spatial_mv_pred;

    int b_num_ref_idx_override;
    int i_num_ref_idx_l0_active;
    int i_num_ref_idx_l1_active;

345
346
    int b_ref_pic_list_reordering_l0;
    int b_ref_pic_list_reordering_l1;
Lamont Alston's avatar
Lamont Alston committed
347
348
    struct
    {
349
350
        int idc;
        int arg;
351
    } ref_pic_list_order[2][X264_REF_MAX];
352

Dylan Yudaken's avatar
Dylan Yudaken committed
353
    /* P-frame weighting */
354
    x264_weight_t weight[X264_REF_MAX*2][3];
Dylan Yudaken's avatar
Dylan Yudaken committed
355

Lamont Alston's avatar
Lamont Alston committed
356
357
358
359
360
361
    int i_mmco_remove_from_end;
    int i_mmco_command_count;
    struct /* struct for future expansion */
    {
        int i_difference_of_pic_nums;
        int i_poc;
362
    } mmco[X264_REF_MAX];
Lamont Alston's avatar
Lamont Alston committed
363

Laurent Aimar's avatar
Laurent Aimar committed
364
365
    int i_cabac_init_idc;

366
    int i_qp;
Laurent Aimar's avatar
Laurent Aimar committed
367
368
369
370
371
372
373
374
375
376
377
    int i_qp_delta;
    int b_sp_for_swidth;
    int i_qs_delta;

    /* deblocking filter */
    int i_disable_deblocking_filter_idc;
    int i_alpha_c0_offset;
    int i_beta_offset;

} x264_slice_header_t;

Steven Walters's avatar
Steven Walters committed
378
379
typedef struct x264_lookahead_t
{
380
    volatile uint8_t              b_exit_thread;
Steven Walters's avatar
Steven Walters committed
381
382
    uint8_t                       b_thread_active;
    uint8_t                       b_analyse_keyframe;
Fiona Glaser's avatar
Fiona Glaser committed
383
    int                           i_last_keyframe;
Steven Walters's avatar
Steven Walters committed
384
385
    int                           i_slicetype_length;
    x264_frame_t                  *last_nonb;
386
387
388
389
    x264_pthread_t                thread_handle;
    x264_sync_frame_list_t        ifbuf;
    x264_sync_frame_list_t        next;
    x264_sync_frame_list_t        ofbuf;
Steven Walters's avatar
Steven Walters committed
390
391
} x264_lookahead_t;

Laurent Aimar's avatar
Laurent Aimar committed
392
393
394
395
396
397
398
typedef struct x264_ratecontrol_t   x264_ratecontrol_t;

struct x264_t
{
    /* encoder parameters */
    x264_param_t    param;

Steven Walters's avatar
Steven Walters committed
399
    x264_t          *thread[X264_THREAD_MAX+1];
Loren Merritt's avatar
Loren Merritt committed
400
401
    int             b_thread_active;
    int             i_thread_phase; /* which thread to use for the next frame */
402
403
    int             i_threadslice_start; /* first row in this thread slice */
    int             i_threadslice_end; /* row after the end of this thread slice */
404
    x264_threadpool_t *threadpool;
Loren Merritt's avatar
Loren Merritt committed
405

Laurent Aimar's avatar
Laurent Aimar committed
406
407
408
409
    /* bitstream output */
    struct
    {
        int         i_nal;
Fiona Glaser's avatar
Fiona Glaser committed
410
411
        int         i_nals_allocated;
        x264_nal_t  *nal;
Laurent Aimar's avatar
Laurent Aimar committed
412
413
414
415
416
        int         i_bitstream;    /* size of p_bitstream */
        uint8_t     *p_bitstream;   /* will hold data for all nal */
        bs_t        bs;
    } out;

417
418
419
    uint8_t *nal_buffer;
    int      nal_buffer_size;

Loren Merritt's avatar
Loren Merritt committed
420
421
    /**** thread synchronization starts here ****/

Laurent Aimar's avatar
Laurent Aimar committed
422
423
    /* frame number/poc */
    int             i_frame;
424
    int             i_frame_num;
Laurent Aimar's avatar
Laurent Aimar committed
425

426
427
428
429
    int             i_thread_frames; /* Number of different frames being encoded by threads;
                                      * 1 when sliced-threads is on. */
    int             i_nal_type;
    int             i_nal_ref_idc;
Loren Merritt's avatar
Loren Merritt committed
430

431
432
433
434
435
436
437
438
439
    int             i_disp_fields;  /* Number of displayed fields (both coded and implied via pic_struct) */
    int             i_disp_fields_last_frame;
    int             i_prev_duration; /* Duration of previous frame */
    int             i_coded_fields; /* Number of coded fields (both coded and implied via pic_struct) */
    int             i_cpb_delay;    /* Equal to number of fields preceding this field
                                     * since last buffering_period SEI */
    int             i_coded_fields_lookahead; /* Use separate counters for lookahead */
    int             i_cpb_delay_lookahead;

Kieran Kunhya's avatar
Kieran Kunhya committed
440
441
    int             i_cpb_delay_pir_offset;

442
    int             b_queued_intra_refresh;
443
    int64_t         i_last_idr_pts;
444

Laurent Aimar's avatar
Laurent Aimar committed
445
    /* We use only one SPS and one PPS */
446
    x264_sps_t      sps_array[1];
Laurent Aimar's avatar
Laurent Aimar committed
447
    x264_sps_t      *sps;
448
    x264_pps_t      pps_array[1];
Laurent Aimar's avatar
Laurent Aimar committed
449
450
451
    x264_pps_t      *pps;
    int             i_idr_pic_id;

Loren Merritt's avatar
Loren Merritt committed
452
453
454
    /* quantization matrix for decoding, [cqm][qp%6][coef] */
    int             (*dequant4_mf[4])[16];   /* [4][6][16] */
    int             (*dequant8_mf[2])[64];   /* [2][6][64] */
Loren Merritt's avatar
Loren Merritt committed
455
    /* quantization matrix for trellis, [cqm][qp][coef] */
456
457
    int             (*unquant4_mf[4])[16];   /* [4][52][16] */
    int             (*unquant8_mf[2])[64];   /* [2][52][64] */
Loren Merritt's avatar
Loren Merritt committed
458
    /* quantization matrix for deadzone */
459
460
461
462
    udctcoef        (*quant4_mf[4])[16];     /* [4][52][16] */
    udctcoef        (*quant8_mf[2])[64];     /* [2][52][64] */
    udctcoef        (*quant4_bias[4])[16];   /* [4][52][16] */
    udctcoef        (*quant8_bias[2])[64];   /* [2][52][64] */
Loren Merritt's avatar
Loren Merritt committed
463

464
465
466
    /* mv/ref cost arrays.  Indexed by lambda instead of
     * qp because, due to rounding, some quantizers share
     * lambdas.  This saves memory. */
467
468
    uint16_t *cost_mv[LAMBDA_MAX+1];
    uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
469

470
471
    const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */

Laurent Aimar's avatar
Laurent Aimar committed
472
473
474
475
476
477
478
479
    /* Slice header */
    x264_slice_header_t sh;

    /* cabac context */
    x264_cabac_t    cabac;

    struct
    {
480
        /* Frames to be encoded (whose types have been decided) */
Steven Walters's avatar
Steven Walters committed
481
482
483
        x264_frame_t **current;
        /* Unused frames: 0 = fenc, 1 = fdec */
        x264_frame_t **unused[2];
Laurent Aimar's avatar
Laurent Aimar committed
484

Dylan Yudaken's avatar
Dylan Yudaken committed
485
486
487
        /* Unused blank frames (for duplicates) */
        x264_frame_t **blank_unused;

488
        /* frames used for reference + sentinels */
489
        x264_frame_t *reference[X264_REF_MAX+2];
Laurent Aimar's avatar
Laurent Aimar committed
490

Lamont Alston's avatar
Lamont Alston committed
491
492
493
494
495
        int i_last_keyframe;       /* Frame number of the last keyframe */
        int i_last_idr;            /* Frame number of the last IDR (not RP)*/
        int i_poc_last_open_gop;   /* Poc of the I frame of the last open-gop. The value
                                    * is only assigned during the period between that
                                    * I frame and the next P or I frame, else -1 */
496
497

        int i_input;    /* Number of input frames already accepted */
498
499
500
501

        int i_max_dpb;  /* Number of frames allocated in the decoded picture buffer */
        int i_max_ref0;
        int i_max_ref1;
502
        int i_delay;    /* Number of frames buffered for B reordering */
503
504
        int     i_bframe_delay;
        int64_t i_bframe_delay_time;
505
        int64_t i_first_pts;
Yusuke Nakamura's avatar
Yusuke Nakamura committed
506
        int64_t i_prev_reordered_pts[2];
507
508
        int64_t i_largest_pts;
        int64_t i_second_largest_pts;
509
        int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
Loren Merritt's avatar
Loren Merritt committed
510
        int b_have_sub8x8_esa;
Laurent Aimar's avatar
Laurent Aimar committed
511
512
513
514
515
516
517
518
519
520
    } frames;

    /* current frame being encoded */
    x264_frame_t    *fenc;

    /* frame being reconstructed */
    x264_frame_t    *fdec;

    /* references lists */
    int             i_ref0;
521
    x264_frame_t    *fref0[X264_REF_MAX+3];     /* ref list 0 */
Laurent Aimar's avatar
Laurent Aimar committed
522
    int             i_ref1;
523
    x264_frame_t    *fref1[X264_REF_MAX+3];     /* ref list 1 */
524
    int             b_ref_reorder[2];
Laurent Aimar's avatar
Laurent Aimar committed
525

526
527
    /* hrd */
    int initial_cpb_removal_delay;
Kieran Kunhya's avatar
Kieran Kunhya committed
528
    int initial_cpb_removal_delay_offset;
529
    int64_t i_reordered_pts_delay;
Laurent Aimar's avatar
Laurent Aimar committed
530
531
532
533

    /* Current MB DCT coeffs */
    struct
    {
534
535
        ALIGNED_16( dctcoef luma16x16_dc[16] );
        ALIGNED_16( dctcoef chroma_dc[2][4] );
536
        // FIXME share memory?
537
538
        ALIGNED_16( dctcoef luma8x8[4][64] );
        ALIGNED_16( dctcoef luma4x4[16+8][16] );
Laurent Aimar's avatar
Laurent Aimar committed
539
540
541
542
543
    } dct;

    /* MB table and cache for current frame/mb */
    struct
    {
544
545
        int     i_mb_width;
        int     i_mb_height;
546
547
        int     i_mb_count;                 /* number of mbs in a frame */

Laurent Aimar's avatar
Laurent Aimar committed
548
549
        /* Strides */
        int     i_mb_stride;
550
551
        int     i_b8_stride;
        int     i_b4_stride;
Laurent Aimar's avatar
Laurent Aimar committed
552
553
554
555
556

        /* Current index */
        int     i_mb_x;
        int     i_mb_y;
        int     i_mb_xy;
557
558
        int     i_b8_xy;
        int     i_b4_xy;
Loren Merritt's avatar
Loren Merritt committed
559

560
        /* Search parameters */
Loren Merritt's avatar
Loren Merritt committed
561
        int     i_me_method;
562
        int     i_subpel_refine;
Loren Merritt's avatar
Loren Merritt committed
563
        int     b_chroma_me;
Loren Merritt's avatar
Loren Merritt committed
564
        int     b_trellis;
565
        int     b_noise_reduction;
566
        int     b_dct_decimate;
567
568
        int     i_psy_rd; /* Psy RD strength--fixed point value*/
        int     i_psy_trellis; /* Psy trellis strength--fixed point value*/
569

570
571
        int     b_interlaced;

572
        /* Allowed qpel MV range to stay within the picture + emulated edge pixels */
573
574
        int     mv_min[2];
        int     mv_max[2];
575
576
577
578
        /* Subpel MV range for motion search.
         * same mv_min/max but includes levels' i_mv_range. */
        int     mv_min_spel[2];
        int     mv_max_spel[2];
579
580
581
        /* Fullpel MV range for motion search */
        int     mv_min_fpel[2];
        int     mv_max_fpel[2];
582

Loren Merritt's avatar
Loren Merritt committed
583
        /* neighboring MBs */
Laurent Aimar's avatar
Laurent Aimar committed
584
        unsigned int i_neighbour;
585
586
        unsigned int i_neighbour8[4];       /* neighbours of each 8x8 or 4x4 block that are available */
        unsigned int i_neighbour4[16];      /* at the time the block is coded */
587
        unsigned int i_neighbour_intra;     /* for constrained intra pred */
588
        unsigned int i_neighbour_frame;     /* ignoring slice boundaries */
Loren Merritt's avatar
Loren Merritt committed
589
590
591
592
        int     i_mb_type_top;
        int     i_mb_type_left;
        int     i_mb_type_topleft;
        int     i_mb_type_topright;
593
        int     i_mb_prev_xy;
594
        int     i_mb_left_xy;
595
        int     i_mb_top_xy;
596
597
        int     i_mb_topleft_xy;
        int     i_mb_topright_xy;
Laurent Aimar's avatar
Laurent Aimar committed
598

Loren Merritt's avatar
Loren Merritt committed
599
        /**** thread synchronization ends here ****/
Loren Merritt's avatar
Loren Merritt committed
600
        /* subsequent variables are either thread-local or constant,
Loren Merritt's avatar
Loren Merritt committed
601
602
         * and won't be copied from one thread to another */

Laurent Aimar's avatar
Laurent Aimar committed
603
604
        /* mb table */
        int8_t  *type;                      /* mb type */
605
        uint8_t *partition;                 /* mb partition */
Laurent Aimar's avatar
Laurent Aimar committed
606
607
        int8_t  *qp;                        /* mb qp */
        int16_t *cbp;                       /* mb cbp: 0x0?: luma, 0x?0: chroma, 0x100: luma dc, 0x0200 and 0x0400: chroma dc  (all set for PCM)*/
608
609
        int8_t  (*intra4x4_pred_mode)[8];   /* intra4x4 pred mode. for non I4x4 set to I_PRED_4x4_DC(2) */
                                            /* actually has only 7 entries; set to 8 for write-combining optimizations */
Laurent Aimar's avatar
Laurent Aimar committed
610
611
612
        uint8_t (*non_zero_count)[16+4+4];  /* nzc. for I_PCM set to 16 */
        int8_t  *chroma_pred_mode;          /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */
        int16_t (*mv[2])[2];                /* mb mv. set to 0 for intra mb */
Henrik Gramner's avatar
Henrik Gramner committed
613
        uint8_t (*mvd[2])[8][2];            /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
614
        int8_t   *ref[2];                   /* mb ref. set to -1 if non used (intra or Lx only) */
615
        int16_t (*mvr[2][X264_REF_MAX*2])[2];/* 16x16 mv for each possible ref */
616
        int8_t  *skipbp;                    /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
617
        int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
618
619
        uint16_t *slice_table;              /* sh->first_mb of the slice that the indexed mb is part of
                                             * NOTE: this will fail on resolutions above 2^16 MBs... */
620

Dylan Yudaken's avatar
Dylan Yudaken committed
621
         /* buffer for weighted versions of the reference frames */
622
        pixel *p_weight_buf[X264_REF_MAX];
Dylan Yudaken's avatar
Dylan Yudaken committed
623

Laurent Aimar's avatar
Laurent Aimar committed
624
625
626
        /* current value */
        int     i_type;
        int     i_partition;
627
        ALIGNED_4( uint8_t i_sub_partition[4] );
628
        int     b_transform_8x8;
Laurent Aimar's avatar
Laurent Aimar committed
629
630
631
632
633
634
635

        int     i_cbp_luma;
        int     i_cbp_chroma;

        int     i_intra16x16_pred_mode;
        int     i_chroma_pred_mode;

636
637
638
639
640
        /* skip flags for i4x4 and i8x8
         * 0 = encode as normal.
         * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction.
         * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */
        int i_skip_intra;
Fiona Glaser's avatar
Fiona Glaser committed
641
642
        /* skip flag for motion compensation */
        /* if we've already done MC, we don't need to do it again */
643
        int b_skip_mc;
Fiona Glaser's avatar
Fiona Glaser committed
644
645
        /* set to true if we are re-encoding a macroblock. */
        int b_reencode_mb;
Fiona Glaser's avatar
Fiona Glaser committed
646
        int ip_offset; /* Used by PIR to offset the quantizer of intra-refresh blocks. */
Fiona Glaser's avatar
Fiona Glaser committed
647
        int b_deblock_rdo;
648

Laurent Aimar's avatar
Laurent Aimar committed
649
650
        struct
        {
651
652
653
            /* space for p_fenc and p_fdec */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
654
655
            ALIGNED_16( pixel fenc_buf[24*FENC_STRIDE] );
            ALIGNED_16( pixel fdec_buf[27*FDEC_STRIDE] );
656

Loren Merritt's avatar
Loren Merritt committed
657
            /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
658
659
            ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
            ALIGNED_16( pixel i8x8_fdec_buf[16*16] );
660
661
            ALIGNED_16( dctcoef i8x8_dct_buf[3][64] );
            ALIGNED_16( dctcoef i4x4_dct_buf[15][16] );
662
663
664
665
            uint32_t i4x4_nnz_buf[4];
            uint32_t i8x8_nnz_buf[4];
            int i4x4_cbp;
            int i8x8_cbp;
666

667
            /* Psy trellis DCT data */
668
669
            ALIGNED_16( dctcoef fenc_dct8[4][64] );
            ALIGNED_16( dctcoef fenc_dct4[16][16] );
670

671
672
673
            /* Psy RD SATD/SA8D scores cache */
            ALIGNED_16( uint64_t fenc_hadamard_cache[9] );
            ALIGNED_16( uint32_t fenc_satd_cache[32] );
674

Laurent Aimar's avatar
Laurent Aimar committed
675
            /* pointer over mb of the frame to be compressed */
676
            pixel *p_fenc[3]; /* y,u,v */
677
            /* pointer to the actual source frame, not a block copy */
678
            pixel *p_fenc_plane[2]; /* y,uv */
Laurent Aimar's avatar
Laurent Aimar committed
679

Loren Merritt's avatar
Loren Merritt committed
680
            /* pointer over mb of the frame to be reconstructed  */
681
            pixel *p_fdec[3];
Laurent Aimar's avatar
Laurent Aimar committed
682
683

            /* pointer over mb of the references */
684
            int i_fref[2];
685
686
687
            pixel *p_fref[2][X264_REF_MAX*2][4+1]; /* last: yN, yH, yV, yHV, uv */
            pixel *p_fref_w[X264_REF_MAX*2];  /* weighted fullpel luma */
            uint16_t *p_integral[2][X264_REF_MAX];
Laurent Aimar's avatar
Laurent Aimar committed
688

689
            /* fref stride */
Laurent Aimar's avatar
Laurent Aimar committed
690
691
692
693
694
695
            int     i_stride[3];
        } pic;

        /* cache */
        struct
        {
696
            /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
Fiona Glaser's avatar
Fiona Glaser committed
697
            ALIGNED_8( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );
Laurent Aimar's avatar
Laurent Aimar committed
698

Loren Merritt's avatar
Loren Merritt committed
699
            /* i_non_zero_count if available else 0x80 */
Henrik Gramner's avatar
Henrik Gramner committed
700
            ALIGNED_16( uint8_t non_zero_count[X264_SCAN8_SIZE] );
Laurent Aimar's avatar
Laurent Aimar committed
701

Loren Merritt's avatar
Loren Merritt committed
702
            /* -1 if unused, -2 if unavailable */
Fiona Glaser's avatar
Fiona Glaser committed
703
            ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
Laurent Aimar's avatar
Laurent Aimar committed
704

Loren Merritt's avatar
Loren Merritt committed
705
            /* 0 if not available */
Fiona Glaser's avatar
Fiona Glaser committed
706
707
            ALIGNED_16( int16_t mv[2][X264_SCAN8_LUMA_SIZE][2] );
            ALIGNED_8( uint8_t mvd[2][X264_SCAN8_LUMA_SIZE][2] );
708
709

            /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
Fiona Glaser's avatar
Fiona Glaser committed
710
            ALIGNED_4( int8_t skip[X264_SCAN8_LUMA_SIZE] );
711

712
713
            ALIGNED_4( int16_t direct_mv[2][4][2] );
            ALIGNED_4( int8_t  direct_ref[2][4] );
714
            int     direct_partition;
715
            ALIGNED_4( int16_t pskip_mv[2] );
716

717
718
            /* number of neighbors (top and left) that used 8x8 dct */
            int     i_neighbour_transform_size;
719
            int     i_neighbour_interlaced;
720
721
722
723

            /* neighbor CBPs */
            int     i_cbp_top;
            int     i_cbp_left;
Laurent Aimar's avatar
Laurent Aimar committed
724
725
726
        } cache;

        /* */
727
        int     i_qp;       /* current qp */
728
        int     i_chroma_qp;
Laurent Aimar's avatar
Laurent Aimar committed
729
730
        int     i_last_qp;  /* last qp */
        int     i_last_dqp; /* last delta qp */
731
        int     b_variable_qp; /* whether qp is allowed to vary per macroblock */
Loren Merritt's avatar
Loren Merritt committed
732
        int     b_lossless;
Loren Merritt's avatar
Loren Merritt committed
733
734
        int     b_direct_auto_read; /* take stats for --direct auto from the 2pass log */
        int     b_direct_auto_write; /* analyse direct modes, to use and/or save */
Laurent Aimar's avatar
Laurent Aimar committed
735

Fiona Glaser's avatar
Fiona Glaser committed
736
737
738
739
740
        /* lambda values */
        int     i_trellis_lambda2[2][2]; /* [luma,chroma][inter,intra] */
        int     i_psy_rd_lambda;
        int     i_chroma_lambda2_offset;

741
        /* B_direct and weighted prediction */
742
        int16_t dist_scale_factor_buf[2][X264_REF_MAX*2][4];
743
        int16_t (*dist_scale_factor)[4];
744
        int8_t bipred_weight_buf[2][X264_REF_MAX*2][4];
745
        int8_t (*bipred_weight)[4];
746
        /* maps fref1[0]'s ref indices into the current list0 */
747
#define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
748
        int8_t  map_col_to_list0[X264_REF_MAX+2];
Fiona Glaser's avatar
Fiona Glaser committed
749
        int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
750
        int8_t deblock_ref_table[X264_REF_MAX*2+2];
Fiona Glaser's avatar
Fiona Glaser committed
751
#define deblock_ref_table(x) h->mb.deblock_ref_table[(x)+2]
Laurent Aimar's avatar
Laurent Aimar committed
752
753
754
755
756
757
758
759
    } mb;

    /* rate control encoding only */
    x264_ratecontrol_t *rc;

    /* stats */
    struct
    {
760
761
762
        /* Current frame stats */
        struct
        {
763
764
765
766
            /* MV bits (MV+Ref+Block Type) */
            int i_mv_bits;
            /* Texture bits (DCT coefs) */
            int i_tex_bits;
767
768
769
            /* ? */
            int i_misc_bits;
            /* MB type counts */
770
            int i_mb_count[19];
771
            int i_mb_count_i;
Loren Merritt's avatar
Loren Merritt committed
772
773
            int i_mb_count_p;
            int i_mb_count_skip;
774
            int i_mb_count_8x8dct[2];
775
            int i_mb_count_ref[2][X264_REF_MAX*2];
776
            int i_mb_partition[17];
777
            int i_mb_cbp[6];
778
            int i_mb_pred_mode[4][13];
Loren Merritt's avatar
Loren Merritt committed
779
780
            /* Adaptive direct mv pred */
            int i_direct_score[2];
781
782
783
            /* Metrics */
            int64_t i_ssd[3];
            double f_ssim;
784
785
        } frame;

Loren Merritt's avatar
Loren Merritt committed
786
        /* Cumulated stats */
787

Laurent Aimar's avatar
Laurent Aimar committed
788
        /* per slice info */
Fiona Glaser's avatar
Fiona Glaser committed
789
790
791
        int     i_frame_count[3];
        int64_t i_frame_size[3];
        double  f_frame_qp[3];
792
        int     i_consecutive_bframes[X264_BFRAME_MAX+1];
793
        /* */
Fiona Glaser's avatar
Fiona Glaser committed
794
795
796
797
798
799
800
        double  f_ssd_global[3];
        double  f_psnr_average[3];
        double  f_psnr_mean_y[3];
        double  f_psnr_mean_u[3];
        double  f_psnr_mean_v[3];
        double  f_ssim_mean_y[3];
        double  f_frame_duration[3];
801
        /* */
Fiona Glaser's avatar
Fiona Glaser committed
802
        int64_t i_mb_count[3][19];
803
        int64_t i_mb_partition[2][17];
804
        int64_t i_mb_count_8x8dct[2];
805
        int64_t i_mb_count_ref[2][2][X264_REF_MAX*2];
806
        int64_t i_mb_cbp[6];
807
        int64_t i_mb_pred_mode[4][13];
Loren Merritt's avatar
Loren Merritt committed
808
809
810
        /* */
        int     i_direct_score[2];
        int     i_direct_frames[2];
Dylan Yudaken's avatar
Dylan Yudaken committed
811
        /* num p-frames weighted */
Fiona Glaser's avatar
Fiona Glaser committed
812
        int     i_wpred[2];
813

Laurent Aimar's avatar
Laurent Aimar committed
814
815
    } stat;

Anton Mitrofanov's avatar
Anton Mitrofanov committed
816
    ALIGNED_16( uint32_t nr_residual_sum[2][64] );
817
    ALIGNED_16( udctcoef nr_offset[2][64] );
Anton Mitrofanov's avatar
Anton Mitrofanov committed
818
819
    uint32_t        nr_count[2];

820
    /* Buffers that are allocated per-thread even in sliced threads. */
821
    void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
822
    pixel *intra_border_backup[2][2]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
Fiona Glaser's avatar
Fiona Glaser committed
823
    uint8_t (*deblock_strength[2])[2][4][4];
824

Loren Merritt's avatar
Loren Merritt committed
825
    /* CPU functions dependents */
Laurent Aimar's avatar
Laurent Aimar committed
826
    x264_predict_t      predict_16x16[4+3];
827
828
    x264_predict_t      predict_8x8c[4+3];
    x264_predict8x8_t   predict_8x8[9+3];
Laurent Aimar's avatar
Laurent Aimar committed
829
    x264_predict_t      predict_4x4[9+3];
830
    x264_predict_8x8_filter_t predict_8x8_filter;
Laurent Aimar's avatar
Laurent Aimar committed
831
832

    x264_pixel_function_t pixf;
833
    x264_mc_functions_t   mc;
Laurent Aimar's avatar
Laurent Aimar committed
834
    x264_dct_function_t   dctf;
835
    x264_zigzag_function_t zigzagf;
836
    x264_quant_function_t quantf;
Loren Merritt's avatar
Loren Merritt committed
837
    x264_deblock_function_t loopf;
838
    x264_bitstream_function_t bsf;
Laurent Aimar's avatar
Laurent Aimar committed
839

Steven Walters's avatar
Steven Walters committed
840
#if HAVE_VISUALIZE
Loren Merritt's avatar
Loren Merritt committed
841
842
    struct visualize_t *visualize;
#endif
Steven Walters's avatar
Steven Walters committed
843
    x264_lookahead_t *lookahead;
Laurent Aimar's avatar
Laurent Aimar committed
844
845
};

846
847
// included at the end because it needs x264_t
#include "macroblock.h"
Fiona Glaser's avatar
Fiona Glaser committed
848
#include "rectangle.h"
849

Steven Walters's avatar
Steven Walters committed
850
#if HAVE_MMX
851
852
853
#include "x86/util.h"
#endif

Laurent Aimar's avatar
Laurent Aimar committed
854
855
#endif