analyse.c 164 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * analyse.c: macroblock analysis
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Sean McGovern's avatar
Sean McGovern committed
4
 * Copyright (C) 2003-2011 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
23
24
25
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
26
27
 *****************************************************************************/

28
#define _ISOC99_SOURCE
Laurent Aimar's avatar
Laurent Aimar committed
29

30
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
31
32
#include "macroblock.h"
#include "me.h"
33
#include "ratecontrol.h"
34
35
#include "analyse.h"
#include "rdo.c"
Laurent Aimar's avatar
Laurent Aimar committed
36
37
38
39

typedef struct
{
    /* 16x16 */
40
    int       i_rd16x16;
Laurent Aimar's avatar
Laurent Aimar committed
41
    x264_me_t me16x16;
42
    x264_me_t bi16x16;      /* for b16x16 BI mode, since MVs can differ from l0/l1 */
Laurent Aimar's avatar
Laurent Aimar committed
43
44
45

    /* 8x8 */
    int       i_cost8x8;
Håkan Hjort's avatar
Håkan Hjort committed
46
    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
47
    ALIGNED_4( int16_t mvc[32][5][2] );
Laurent Aimar's avatar
Laurent Aimar committed
48
49
50
51
52
53
54
55
56
57
58
59
    x264_me_t me8x8[4];

    /* Sub 4x4 */
    int       i_cost4x4[4]; /* cost per 8x8 partition */
    x264_me_t me4x4[4][4];

    /* Sub 8x4 */
    int       i_cost8x4[4]; /* cost per 8x8 partition */
    x264_me_t me8x4[4][2];

    /* Sub 4x8 */
    int       i_cost4x8[4]; /* cost per 8x8 partition */
Loren Merritt's avatar
Loren Merritt committed
60
    x264_me_t me4x8[4][2];
Laurent Aimar's avatar
Laurent Aimar committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

    /* 16x8 */
    int       i_cost16x8;
    x264_me_t me16x8[2];

    /* 8x16 */
    int       i_cost8x16;
    x264_me_t me8x16[2];

} x264_mb_analysis_list_t;

typedef struct
{
    /* conduct the analysis using this lamda and QP */
    int i_lambda;
76
    int i_lambda2;
Laurent Aimar's avatar
Laurent Aimar committed
77
    int i_qp;
78
    uint16_t *p_cost_mv;
79
    uint16_t *p_cost_ref[2];
80
    int i_mbrd;
Laurent Aimar's avatar
Laurent Aimar committed
81
82
83


    /* I: Intra part */
84
85
    /* Take some shortcuts in intra search if intra is deemed unlikely */
    int b_fast_intra;
Fiona Glaser's avatar
Fiona Glaser committed
86
    int b_force_intra; /* For Periodic Intra Refresh.  Only supported in P-frames. */
87
    int b_avoid_topright; /* For Periodic Intra Refresh: don't predict from top-right pixels. */
88
    int b_try_skip;
89

90
    /* Luma part */
91
92
    int i_satd_i16x16;
    int i_satd_i16x16_dir[7];
Laurent Aimar's avatar
Laurent Aimar committed
93
94
    int i_predict16x16;

95
    int i_satd_i8x8;
96
    int i_cbp_i8x8_luma;
97
    ALIGNED_16( uint16_t i_satd_i8x8_dir[4][16] );
98
    int i_predict8x8[4];
99

100
101
    int i_satd_i4x4;
    int i_predict4x4[16];
Laurent Aimar's avatar
Laurent Aimar committed
102

103
104
    int i_satd_pcm;

Laurent Aimar's avatar
Laurent Aimar committed
105
    /* Chroma part */
Henrik Gramner's avatar
Henrik Gramner committed
106
107
    int i_satd_chroma;
    int i_satd_chroma_dir[7];
108
    int i_predict8x8chroma;
Laurent Aimar's avatar
Laurent Aimar committed
109
110
111
112
113
114

    /* II: Inter part P/B frame */
    x264_mb_analysis_list_t l0;
    x264_mb_analysis_list_t l1;

    int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
115
116
117
    int i_cost16x16direct;
    int i_cost8x8bi;
    int i_cost8x8direct[4];
118
119
120
    int i_satd8x8[3][4]; /* [L0,L1,BI][8x8 0..3] SATD only */
    int i_cost_est16x8[2]; /* Per-partition estimated cost */
    int i_cost_est8x16[2];
121
122
    int i_cost16x8bi;
    int i_cost8x16bi;
123
124
125
126
127
    int i_rd16x16bi;
    int i_rd16x16direct;
    int i_rd16x8bi;
    int i_rd8x16bi;
    int i_rd8x8bi;
128
129
130
131
132

    int i_mb_partition16x8[2]; /* mb_partition_e */
    int i_mb_partition8x16[2];
    int i_mb_type16x8; /* mb_class_e */
    int i_mb_type8x16;
133
134

    int b_direct_available;
135
    int b_early_terminate;
Laurent Aimar's avatar
Laurent Aimar committed
136
137
138

} x264_mb_analysis_t;

139
/* lambda = pow(2,qp/6-2) */
140
141
const uint16_t x264_lambda_tab[QP_MAX_MAX+1] =
{
142
143
144
145
146
147
148
149
   1,   1,   1,   1,   1,   1,   1,   1, /*  0- 7 */
   1,   1,   1,   1,   1,   1,   1,   1, /*  8-15 */
   2,   2,   2,   2,   3,   3,   3,   4, /* 16-23 */
   4,   4,   5,   6,   6,   7,   8,   9, /* 24-31 */
  10,  11,  13,  14,  16,  18,  20,  23, /* 32-39 */
  25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
  64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
Fiona Glaser's avatar
Fiona Glaser committed
150
151
152
 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
2048,2299,                               /* 80-81 */
Laurent Aimar's avatar
Laurent Aimar committed
153
154
};

Fiona Glaser's avatar
Fiona Glaser committed
155
/* lambda2 = pow(lambda,2) * .9 * 256 */
Fiona Glaser's avatar
Fiona Glaser committed
156
/* Capped to avoid overflow */
157
158
const int x264_lambda2_tab[QP_MAX_MAX+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
159
160
161
162
163
164
165
166
167
168
169
       14,       18,       22,       28,       36,       45,      57,      72, /*  0- 7 */
       91,      115,      145,      182,      230,      290,     365,     460, /*  8-15 */
      580,      731,      921,     1161,     1462,     1843,    2322,    2925, /* 16-23 */
     3686,     4644,     5851,     7372,     9289,    11703,   14745,   18578, /* 24-31 */
    23407,    29491,    37156,    46814,    58982,    74313,   93628,  117964, /* 32-39 */
   148626,   187257,   235929,   297252,   374514,   471859,  594505,  749029, /* 40-47 */
   943718,  1189010,  1498059,  1887436,  2378021,  2996119, 3774873, 4756042, /* 48-55 */
  5992238,  7549747,  9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
 38048341, 47937906, 60397977, 76096683, 95875813,120795955,                   /* 64-69 */
134217727,134217727,134217727,134217727,134217727,134217727,                   /* 70-75 */
134217727,134217727,134217727,134217727,134217727,134217727,                   /* 76-81 */
170
171
};

172
173
const uint8_t x264_exp2_lut[64] =
{
Anton Mitrofanov's avatar
Anton Mitrofanov committed
174
175
176
177
      0,   3,   6,   8,  11,  14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
     48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98, 102,
    106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
    175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
178
179
};

180
181
const float x264_log2_lut[128] =
{
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
};

/* Avoid an int/float conversion. */
201
202
const float x264_log2_lz_lut[32] =
{
203
204
205
    31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
};

Fiona Glaser's avatar
Fiona Glaser committed
206
207
// should the intra and inter lambdas be different?
// I'm just matching the behaviour of deadzone quant.
208
209
static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
210
    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
Fiona Glaser's avatar
Fiona Glaser committed
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
    {
               46,       58,       73,       92,      117,      147,
              185,      233,      294,      370,      466,      587,
              740,      932,     1174,     1480,     1864,     2349,
             2959,     3728,     4697,     5918,     7457,     9395,
            11837,    14914,    18790,    23674,    29828,    37581,
            47349,    59656,    75163,    94699,   119313,   150326,
           189399,   238627,   300652,   378798,   477255,   601304,
           757596,   954511,  1202608,  1515192,  1909022,  2405217,
          3030384,  3818045,  4810435,  6060769,  7636091,  9620872,
         12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
         48486154, 61088726, 76966972, 96972308,
        122177453,134217727,134217727,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
    },
Fiona Glaser's avatar
Fiona Glaser committed
226
    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
Fiona Glaser's avatar
Fiona Glaser committed
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    {
               27,       34,       43,       54,       68,       86,
              108,      136,      172,      216,      273,      343,
              433,      545,      687,      865,     1090,     1374,
             1731,     2180,     2747,     3461,     4361,     5494,
             6922,     8721,    10988,    13844,    17442,    21976,
            27688,    34885,    43953,    55377,    69771,    87906,
           110755,   139543,   175813,   221511,   279087,   351627,
           443023,   558174,   703255,   886046,  1116348,  1406511,
          1772093,  2232697,  2813022,  3544186,  4465396,  5626046,
          7088374,  8930791, 11252092, 14176748, 17861583, 22504184,
         28353495, 35723165, 45008368, 56706990,
         71446330, 90016736,113413980,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
    }
Fiona Glaser's avatar
Fiona Glaser committed
243
244
};

Fiona Glaser's avatar
Fiona Glaser committed
245
#define MAX_CHROMA_LAMBDA_OFFSET 36
246
247
static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
248
249
250
251
252
253
254
255
256
       16,    20,    25,    32,    40,    50,
       64,    80,   101,   128,   161,   203,
      256,   322,   406,   512,   645,   812,
     1024,  1290,  1625,  2048,  2580,  3250,
     4096,  5160,  6501,  8192, 10321, 13003,
    16384, 20642, 26007, 32768, 41285, 52015,
    65535
};

257
/* TODO: calculate CABAC costs */
258
259
static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] =
{
260
    9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
261
};
262
263
static const uint8_t i_mb_b16x8_cost_table[17] =
{
264
    0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
265
};
266
267
static const uint8_t i_sub_mb_b_cost_table[13] =
{
268
269
    7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
};
270
271
static const uint8_t i_sub_mb_p_cost_table[4] =
{
272
273
    5, 3, 3, 1
};
274

275
276
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );

Fiona Glaser's avatar
Fiona Glaser committed
277
static uint16_t x264_cost_ref[QP_MAX+1][3][33];
278
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
279
static uint16_t x264_cost_i4x4_mode[(QP_MAX+2)*32];
280

281
282
283
284
285
286
287
288
289
290
291
292
float *x264_analyse_prepare_costs( x264_t *h )
{
    float *logs = x264_malloc( (2*4*2048+1)*sizeof(float) );
    if( !logs )
        return NULL;
    logs[0] = 0.718f;
    for( int i = 1; i <= 2*4*2048; i++ )
        logs[i] = log2f(i+1)*2 + 1.718f;
    return logs;
}

int x264_analyse_init_costs( x264_t *h, float *logs, int qp )
293
{
294
    int lambda = x264_lambda_tab[qp];
Fiona Glaser's avatar
Fiona Glaser committed
295
    if( h->cost_mv[qp] )
296
297
        return 0;
    /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
Fiona Glaser's avatar
Fiona Glaser committed
298
299
    CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
    h->cost_mv[qp] += 2*4*2048;
300
    for( int i = 0; i <= 2*4*2048; i++ )
301
    {
Fiona Glaser's avatar
Fiona Glaser committed
302
        h->cost_mv[qp][-i] =
303
        h->cost_mv[qp][i]  = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
304
305
    }
    x264_pthread_mutex_lock( &cost_ref_mutex );
306
307
    for( int i = 0; i < 3; i++ )
        for( int j = 0; j < 33; j++ )
Fiona Glaser's avatar
Fiona Glaser committed
308
            x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
309
    x264_pthread_mutex_unlock( &cost_ref_mutex );
Fiona Glaser's avatar
Fiona Glaser committed
310
    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
311
    {
312
        for( int j = 0; j < 4; j++ )
313
        {
Fiona Glaser's avatar
Fiona Glaser committed
314
315
            CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
            h->cost_mv_fpel[qp][j] += 2*2048;
316
            for( int i = -2*2048; i < 2*2048; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
317
                h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
318
319
        }
    }
320
321
322
    uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + qp*32;
    for( int i = 0; i < 17; i++ )
        cost_i4x4_mode[i] = 3*lambda*(i!=8);
323
324
325
    return 0;
fail:
    return -1;
326
327
}

328
329
void x264_analyse_free_costs( x264_t *h )
{
Fiona Glaser's avatar
Fiona Glaser committed
330
    for( int i = 0; i < QP_MAX+1; i++ )
331
332
333
334
    {
        if( h->cost_mv[i] )
            x264_free( h->cost_mv[i] - 2*4*2048 );
        if( h->cost_mv_fpel[i][0] )
335
            for( int j = 0; j < 4; j++ )
336
337
338
339
                x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
    }
}

340
341
void x264_analyse_weight_frame( x264_t *h, int end )
{
342
    for( int j = 0; j < h->i_ref[0]; j++ )
343
344
345
    {
        if( h->sh.weight[j][0].weightfn )
        {
346
            x264_frame_t *frame = h->fref[0][j];
347
            int width = frame->i_width[0] + 2*PADH;
348
            int i_padv = PADV << PARAM_INTERLACED;
349
            int offset, height;
Fiona Glaser's avatar
Fiona Glaser committed
350
            pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH;
351
            height = X264_MIN( 16 + end + i_padv, h->fref[0][j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
352
353
354
            offset = h->fenc->i_lines_weighted*frame->i_stride[0];
            h->fenc->i_lines_weighted += height;
            if( height )
355
                for( int k = j; k < h->i_ref[0]; k++ )
356
357
                    if( h->sh.weight[k][0].weightfn )
                    {
358
                        pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
359
360
361
362
363
364
365
366
367
                        x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
                                                 src + offset, frame->i_stride[0],
                                                 width, height, &h->sh.weight[k][0] );
                    }
            break;
        }
    }
}

368
369
370
/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
Fiona Glaser's avatar
Fiona Glaser committed
371
372
373
    a->p_cost_mv = h->cost_mv[a->i_qp];
    a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
    a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
374
375
}

Fiona Glaser's avatar
Fiona Glaser committed
376
static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
Laurent Aimar's avatar
Laurent Aimar committed
377
{
Fiona Glaser's avatar
Fiona Glaser committed
378
379
380
    int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 );
    a->i_lambda = x264_lambda_tab[qp];
    a->i_lambda2 = x264_lambda2_tab[qp];
Fiona Glaser's avatar
Fiona Glaser committed
381
382

    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
Fiona Glaser's avatar
Fiona Glaser committed
383
    if( h->param.analyse.i_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
384
    {
Fiona Glaser's avatar
Fiona Glaser committed
385
386
387
388
        h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][qp];
        h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][qp];
        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
Fiona Glaser's avatar
Fiona Glaser committed
389
390
    }
    h->mb.i_psy_rd_lambda = a->i_lambda;
Fiona Glaser's avatar
Fiona Glaser committed
391
    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
Fiona Glaser's avatar
Fiona Glaser committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
    int chroma_offset_idx = X264_MIN( qp-effective_chroma_qp+12, MAX_CHROMA_LAMBDA_OFFSET );
    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;

    if( qp > QP_MAX_SPEC )
    {
        h->nr_offset = h->nr_offset_emergency[qp-QP_MAX_SPEC-1];
        h->nr_residual_sum = h->nr_residual_sum_buf[1];
        h->nr_count = h->nr_count_buf[1];
        h->mb.b_noise_reduction = 1;
        qp = QP_MAX_SPEC; /* Out-of-spec QPs are just used for calculating lambda values. */
    }
    else
    {
        h->nr_offset = h->nr_offset_denoise;
        h->nr_residual_sum = h->nr_residual_sum_buf[0];
        h->nr_count = h->nr_count_buf[0];
        h->mb.b_noise_reduction = 0;
    }

    a->i_qp = h->mb.i_qp = qp;
    h->mb.i_chroma_qp = h->chroma_qp_table[qp];
Fiona Glaser's avatar
Fiona Glaser committed
413
414
}

Fiona Glaser's avatar
Fiona Glaser committed
415
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
Fiona Glaser's avatar
Fiona Glaser committed
416
{
417
    int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
Fiona Glaser's avatar
Fiona Glaser committed
418
419
420
421

    /* mbrd == 1 -> RD mode decision */
    /* mbrd == 2 -> RD refinement */
    /* mbrd == 3 -> QPRD */
422
    a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
423
    h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
424
    a->b_early_terminate = h->param.analyse.i_subpel_refine < 11;
Fiona Glaser's avatar
Fiona Glaser committed
425

Fiona Glaser's avatar
Fiona Glaser committed
426
    x264_mb_analyse_init_qp( h, a, qp );
Fiona Glaser's avatar
Fiona Glaser committed
427

428
429
    h->mb.b_transform_8x8 = 0;

Laurent Aimar's avatar
Laurent Aimar committed
430
    /* I: Intra part */
431
432
433
    a->i_satd_i16x16 =
    a->i_satd_i8x8   =
    a->i_satd_i4x4   =
Henrik Gramner's avatar
Henrik Gramner committed
434
    a->i_satd_chroma = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
435

Anton Mitrofanov's avatar
Anton Mitrofanov committed
436
437
438
439
    /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it.
     * PCM cost can overflow with high lambda2, so cap it at COST_MAX. */
    uint64_t pcm_cost = ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8;
    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd && pcm_cost < COST_MAX ? pcm_cost : COST_MAX;
440

441
    a->b_fast_intra = 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
442
    a->b_avoid_topright = 0;
443
444
    h->mb.i_skip_intra =
        h->mb.b_lossless ? 0 :
445
        a->i_mbrd ? 2 :
446
        !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
447

Laurent Aimar's avatar
Laurent Aimar committed
448
449
450
    /* II: Inter part P/B frame */
    if( h->sh.i_type != SLICE_TYPE_I )
    {
Loren Merritt's avatar
Loren Merritt committed
451
        int i_fmv_range = 4 * h->param.analyse.i_mv_range;
452
453
        // limit motion search to a slightly smaller range than the theoretical limit,
        // since the search may go a few iterations past its given range
454
        int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
Laurent Aimar's avatar
Laurent Aimar committed
455

456
        /* Calculate max allowed MV range */
457
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
458
        h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
Simon Horlick's avatar
Simon Horlick committed
459
        h->mb.mv_max[0] = 4*( 16*( h->mb.i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
Loren Merritt's avatar
Loren Merritt committed
460
461
        h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
        h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
Fiona Glaser's avatar
Fiona Glaser committed
462
463
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P )
        {
464
            int max_x = (h->fref[0][0]->i_pir_end_col * 16 - 3)*4; /* 3 pixels of hpel border */
Fiona Glaser's avatar
Fiona Glaser committed
465
466
467
468
469
            int max_mv = max_x - 4*16*h->mb.i_mb_x;
            /* If we're left of the refresh bar, don't reference right of it. */
            if( max_mv > 0 && h->mb.i_mb_x < h->fdec->i_pir_start_col )
                h->mb.mv_max_spel[0] = X264_MIN( h->mb.mv_max_spel[0], max_mv );
        }
Loren Merritt's avatar
Loren Merritt committed
470
471
        h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
        h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
472
        if( h->mb.i_mb_x == 0 && !(h->mb.i_mb_y & PARAM_INTERLACED) )
473
        {
474
            int mb_y = h->mb.i_mb_y >> SLICE_MBAFF;
Loren Merritt's avatar
Loren Merritt committed
475
476
            int thread_mvy_range = i_fmv_range;

477
            if( h->i_thread_frames > 1 )
Loren Merritt's avatar
Loren Merritt committed
478
            {
479
                int pix_y = (h->mb.i_mb_y | PARAM_INTERLACED) * 16;
Loren Merritt's avatar
Loren Merritt committed
480
                int thresh = pix_y + h->param.analyse.i_mv_range_thread;
481
                for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
482
                    for( int j = 0; j < h->i_ref[i]; j++ )
Loren Merritt's avatar
Loren Merritt committed
483
                    {
484
485
                        x264_frame_cond_wait( h->fref[i][j]->orig, thresh );
                        thread_mvy_range = X264_MIN( thread_mvy_range, h->fref[i][j]->orig->i_lines_completed - pix_y );
Loren Merritt's avatar
Loren Merritt committed
486
                    }
Dylan Yudaken's avatar
Dylan Yudaken committed
487

Loren Merritt's avatar
Loren Merritt committed
488
489
                if( h->param.b_deterministic )
                    thread_mvy_range = h->param.analyse.i_mv_range_thread;
490
                if( PARAM_INTERLACED )
Loren Merritt's avatar
Loren Merritt committed
491
                    thread_mvy_range >>= 1;
Dylan Yudaken's avatar
Dylan Yudaken committed
492

493
                x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
Loren Merritt's avatar
Loren Merritt committed
494
495
            }

496
            if( PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
            {
                /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
                for( int i = 0; i < 3; i++ )
                {
                    int j = i == 2;
                    mb_y = (h->mb.i_mb_y >> j) + (i == 1);
                    h->mb.mv_miny_row[i] = 4*( -16*mb_y - 24 );
                    h->mb.mv_maxy_row[i] = 4*( 16*( (h->mb.i_mb_height>>j) - mb_y - 1 ) + 24 );
                    h->mb.mv_miny_spel_row[i] = x264_clip3( h->mb.mv_miny_row[i], -i_fmv_range, i_fmv_range );
                    h->mb.mv_maxy_spel_row[i] = CLIP_FMV( h->mb.mv_maxy_row[i] );
                    h->mb.mv_maxy_spel_row[i] = X264_MIN( h->mb.mv_maxy_spel_row[i], thread_mvy_range*4 );
                    h->mb.mv_miny_fpel_row[i] = (h->mb.mv_miny_spel_row[i]>>2) + i_fpel_border;
                    h->mb.mv_maxy_fpel_row[i] = (h->mb.mv_maxy_spel_row[i]>>2) - i_fpel_border;
                }
            }
            else
            {
                h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
                h->mb.mv_max[1] = 4*( 16*( h->mb.i_mb_height - mb_y - 1 ) + 24 );
                h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
                h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
                h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
                h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
                h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
            }
        }
523
        if( PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
524
        {
525
            int i = MB_INTERLACED ? 2 : h->mb.i_mb_y&1;
Simon Horlick's avatar
Simon Horlick committed
526
527
528
529
530
531
            h->mb.mv_min[1] = h->mb.mv_miny_row[i];
            h->mb.mv_max[1] = h->mb.mv_maxy_row[i];
            h->mb.mv_min_spel[1] = h->mb.mv_miny_spel_row[i];
            h->mb.mv_max_spel[1] = h->mb.mv_maxy_spel_row[i];
            h->mb.mv_min_fpel[1] = h->mb.mv_miny_fpel_row[i];
            h->mb.mv_max_fpel[1] = h->mb.mv_maxy_fpel_row[i];
532
        }
533
#undef CLIP_FMV
534

535
        a->l0.me16x16.cost =
536
        a->l0.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
537
        a->l0.i_cost8x8    =
538
539
        a->l0.i_cost16x8   =
        a->l0.i_cost8x16   = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
540
541
        if( h->sh.i_type == SLICE_TYPE_B )
        {
542
            a->l1.me16x16.cost =
543
            a->l1.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
544
545
546
547
548
            a->l1.i_cost8x8    =
            a->i_cost8x8direct[0] =
            a->i_cost8x8direct[1] =
            a->i_cost8x8direct[2] =
            a->i_cost8x8direct[3] =
549
550
            a->l1.i_cost16x8   =
            a->l1.i_cost8x16   =
551
552
553
554
555
            a->i_rd16x16bi     =
            a->i_rd16x16direct =
            a->i_rd8x8bi       =
            a->i_rd16x8bi      =
            a->i_rd8x16bi      =
556
557
558
559
560
            a->i_cost16x16bi   =
            a->i_cost16x16direct =
            a->i_cost8x8bi     =
            a->i_cost16x8bi    =
            a->i_cost8x16bi    = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
561
        }
Fiona Glaser's avatar
Fiona Glaser committed
562
        else if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
563
            for( int i = 0; i < 4; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
564
565
566
567
568
            {
                a->l0.i_cost4x4[i] =
                a->l0.i_cost8x4[i] =
                a->l0.i_cost4x8[i] = COST_MAX;
            }
569
570

        /* Fast intra decision */
571
        if( a->b_early_terminate && h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
572
        {
573
574
            /* Always run in fast-intra mode for subme < 3 */
            if( h->mb.i_subpel_refine > 2 &&
575
              ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
576
577
578
                IS_INTRA( h->mb.i_mb_type_top ) ||
                IS_INTRA( h->mb.i_mb_type_topleft ) ||
                IS_INTRA( h->mb.i_mb_type_topright ) ||
579
                (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref[0][0]->mb_type[h->mb.i_mb_xy] )) ||
580
                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) ) )
581
582
583
584
585
586
            { /* intra is likely */ }
            else
            {
                a->b_fast_intra = 1;
            }
        }
587
        h->mb.b_skip_mc = 0;
Fiona Glaser's avatar
Fiona Glaser committed
588
589
590
591
592
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P &&
            h->mb.i_mb_x >= h->fdec->i_pir_start_col && h->mb.i_mb_x <= h->fdec->i_pir_end_col )
        {
            a->b_force_intra = 1;
            a->b_fast_intra = 0;
593
            a->b_avoid_topright = h->mb.i_mb_x == h->fdec->i_pir_end_col;
Fiona Glaser's avatar
Fiona Glaser committed
594
595
596
        }
        else
            a->b_force_intra = 0;
Laurent Aimar's avatar
Laurent Aimar committed
597
598
599
    }
}

600
601
602
603
604
605
606
607
608
609
610
/* Prediction modes allowed for various combinations of neighbors. */
/* Terminated by a -1. */
/* In order, no neighbors, left, top, top/left, top/left/topleft */
static const int8_t i16x16_mode_available[5][5] =
{
    {I_PRED_16x16_DC_128, -1, -1, -1, -1},
    {I_PRED_16x16_DC_LEFT, I_PRED_16x16_H, -1, -1, -1},
    {I_PRED_16x16_DC_TOP, I_PRED_16x16_V, -1, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, I_PRED_16x16_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
611

Henrik Gramner's avatar
Henrik Gramner committed
612
static const int8_t chroma_mode_available[5][5] =
613
614
615
616
617
618
619
{
    {I_PRED_CHROMA_DC_128, -1, -1, -1, -1},
    {I_PRED_CHROMA_DC_LEFT, I_PRED_CHROMA_H, -1, -1, -1},
    {I_PRED_CHROMA_DC_TOP, I_PRED_CHROMA_V, -1, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
620

621
static const int8_t i4x4_mode_available[2][5][10] =
Laurent Aimar's avatar
Laurent Aimar committed
622
{
623
624
625
626
627
628
629
630
631
632
633
634
635
636
    {
        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
    },
    {
        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
    }
637
};
638

639
static ALWAYS_INLINE const int8_t *predict_16x16_mode_available( int i_neighbour )
640
641
{
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
642
643
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i16x16_mode_available[idx];
Laurent Aimar's avatar
Laurent Aimar committed
644
645
}

Henrik Gramner's avatar
Henrik Gramner committed
646
static ALWAYS_INLINE const int8_t *predict_chroma_mode_available( int i_neighbour )
Laurent Aimar's avatar
Laurent Aimar committed
647
{
648
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
649
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
Henrik Gramner's avatar
Henrik Gramner committed
650
    return chroma_mode_available[idx];
Laurent Aimar's avatar
Laurent Aimar committed
651
652
}

653
static ALWAYS_INLINE const int8_t *predict_8x8_mode_available( int force_intra, int i_neighbour, int i )
Laurent Aimar's avatar
Laurent Aimar committed
654
{
655
    int avoid_topright = force_intra && (i&1);
656
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
657
658
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i4x4_mode_available[avoid_topright][idx];
659
660
661
662
663
664
}

static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
{
    int avoid_topright = force_intra && ((i&5) == 5);
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
665
666
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i4x4_mode_available[avoid_topright][idx];
Laurent Aimar's avatar
Laurent Aimar committed
667
668
}

669
670
671
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
{
672
    ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0};
673
674

    if( do_both_dct || h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
675
        h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
Loren Merritt's avatar
Loren Merritt committed
676
    if( do_both_dct || !h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
677
        h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
678
679
}

680
681
/* Reset fenc satd scores cache for psy RD */
static inline void x264_mb_init_fenc_cache( x264_t *h, int b_satd )
682
{
Fiona Glaser's avatar
Fiona Glaser committed
683
684
685
686
    if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
        x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
    if( !h->mb.i_psy_rd )
        return;
687
688
689
690
    /* Writes beyond the end of the array, but not a problem since fenc_satd_cache is right after. */
    h->mc.memzero_aligned( h->mb.pic.fenc_hadamard_cache, sizeof(h->mb.pic.fenc_hadamard_cache) );
    if( b_satd )
        h->mc.memzero_aligned( h->mb.pic.fenc_satd_cache, sizeof(h->mb.pic.fenc_satd_cache) );
691
692
}

693
694
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
{
Henrik Gramner's avatar
Henrik Gramner committed
695
    if( a->i_satd_chroma < COST_MAX )
696
697
        return;

Fiona Glaser's avatar
Fiona Glaser committed
698
699
700
701
    if( CHROMA444 )
    {
        if( !h->mb.b_chroma_me )
        {
Henrik Gramner's avatar
Henrik Gramner committed
702
            a->i_satd_chroma = 0;
Fiona Glaser's avatar
Fiona Glaser committed
703
704
705
706
            return;
        }

        /* Cheap approximation of chroma costs to avoid a full i4x4/i8x8 analysis. */
707
708
709
710
711
712
713
714
715
716
        if( h->mb.b_lossless )
        {
            x264_predict_lossless_16x16( h, 1, a->i_predict16x16 );
            x264_predict_lossless_16x16( h, 2, a->i_predict16x16 );
        }
        else
        {
            h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[1] );
            h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[2] );
        }
Henrik Gramner's avatar
Henrik Gramner committed
717
718
        a->i_satd_chroma = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE )
                         + h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );
Fiona Glaser's avatar
Fiona Glaser committed
719
720
721
        return;
    }

Henrik Gramner's avatar
Henrik Gramner committed
722
723
    const int8_t *predict_mode = predict_chroma_mode_available( h->mb.i_neighbour_intra );
    int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
724

Henrik Gramner's avatar
Henrik Gramner committed
725
    /* Prediction selection for chroma */
726
    if( predict_mode[3] >= 0 && !h->mb.b_lossless )
727
    {
728
        int satdu[4], satdv[4];
Henrik Gramner's avatar
Henrik Gramner committed
729
730
731
732
733
734
        h->pixf.intra_mbcmp_x3_chroma( h->mb.pic.p_fenc[1], h->mb.pic.p_fdec[1], satdu );
        h->pixf.intra_mbcmp_x3_chroma( h->mb.pic.p_fenc[2], h->mb.pic.p_fdec[2], satdv );
        h->predict_chroma[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[1] );
        h->predict_chroma[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[2] );
        satdu[I_PRED_CHROMA_P] = h->pixf.mbcmp[chromapix]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE );
        satdv[I_PRED_CHROMA_P] = h->pixf.mbcmp[chromapix]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );
735
736

        for( ; *predict_mode >= 0; predict_mode++ )
737
        {
738
739
            int i_mode = *predict_mode;
            int i_satd = satdu[i_mode] + satdv[i_mode] + a->i_lambda * bs_size_ue( i_mode );
740

Henrik Gramner's avatar
Henrik Gramner committed
741
742
            a->i_satd_chroma_dir[i_mode] = i_satd;
            COPY2_IF_LT( a->i_satd_chroma, i_satd, a->i_predict8x8chroma, i_mode );
743
744
745
746
        }
    }
    else
    {
747
        for( ; *predict_mode >= 0; predict_mode++ )
748
749
        {
            int i_satd;
750
            int i_mode = *predict_mode;
751

752
            /* we do the prediction */
753
            if( h->mb.b_lossless )
Henrik Gramner's avatar
Henrik Gramner committed
754
                x264_predict_lossless_chroma( h, i_mode );
755
756
            else
            {
Henrik Gramner's avatar
Henrik Gramner committed
757
758
                h->predict_chroma[i_mode]( h->mb.pic.p_fdec[1] );
                h->predict_chroma[i_mode]( h->mb.pic.p_fdec[2] );
759
            }
760

761
            /* we calculate the cost */
Henrik Gramner's avatar
Henrik Gramner committed
762
763
764
            i_satd = h->pixf.mbcmp[chromapix]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE ) +
                     h->pixf.mbcmp[chromapix]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE ) +
                     a->i_lambda * bs_size_ue( x264_mb_chroma_pred_mode_fix[i_mode] );
765

Henrik Gramner's avatar
Henrik Gramner committed
766
767
            a->i_satd_chroma_dir[i_mode] = i_satd;
            COPY2_IF_LT( a->i_satd_chroma, i_satd, a->i_predict8x8chroma, i_mode );
768
        }
769
770
771
772
773
    }

    h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
}

Fiona Glaser's avatar
Fiona Glaser committed
774
/* FIXME: should we do any sort of merged chroma analysis with 4:4:4? */
775
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
Laurent Aimar's avatar
Laurent Aimar committed
776
777
{
    const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
778
779
    pixel *p_src = h->mb.pic.p_fenc[0];
    pixel *p_dst = h->mb.pic.p_fdec[0];
780
781
782
783
784
785
    static const int8_t intra_analysis_shortcut[2][2][2][5] =
    {
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_VL, -1}}},
Anton Mitrofanov's avatar
Anton Mitrofanov committed
786
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
787
788
789
790
          {-1, -1, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDR, I_PRED_4x4_VR, -1, -1, -1}}},
    };
Laurent Aimar's avatar
Laurent Aimar committed
791

792
    int idx;
793
    int lambda = a->i_lambda;
794

Laurent Aimar's avatar
Laurent Aimar committed
795
796
797
    /*---------------- Try all mode and calculate their score ---------------*/

    /* 16x16 prediction selection */
798
    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
799

800
801
802
803
    /* Not heavily tuned */
    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;

804
    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
805
    {
806
        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
807
808
809
810
811
812
813
814
815
        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );

        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
        if( a->i_satd_i16x16 <= i16x16_thresh )
816
        {
817
818
819
820
            h->predict_16x16[I_PRED_16x16_P]( p_dst );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
821
822
823
        }
    }
    else
Laurent Aimar's avatar
Laurent Aimar committed
824
    {
825
        for( ; *predict_mode >= 0; predict_mode++ )
826
827
        {
            int i_satd;
828
            int i_mode = *predict_mode;
829
830

            if( h->mb.b_lossless )
Fiona Glaser's avatar
Fiona Glaser committed
831
                x264_predict_lossless_16x16( h, 0, i_mode );
832
833
            else
                h->predict_16x16[i_mode]( p_dst );
834
835

            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
836
                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
837
838
839
            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
            a->i_satd_i16x16_dir[i_mode] = i_satd;
        }
Laurent Aimar's avatar
Laurent Aimar committed
840
841
    }

842
843
    if( h->sh.i_type == SLICE_TYPE_B )
        /* cavlc mb type prefix */
844
        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
845

846
    if( a->i_satd_i16x16 > i16x16_thresh )
847
        return;
848

849
    uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + a->i_qp*32 + 8;
850
851
    /* 8x8 prediction selection */
    if( flags & X264_ANALYSE_I8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
852
    {
853
        ALIGNED_ARRAY_32( pixel, edge,[36] );
854
        x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
855
        int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
856
857

        // FIXME some bias like in i4x4?
858
        int i_cost = lambda * 4; /* base predmode costs */
859
        h->mb.i_cbp_luma = 0;
Laurent Aimar's avatar
Laurent Aimar committed
860

861
        if( h->sh.i_type == SLICE_TYPE_B )
862
            i_cost += lambda * i_mb_b_cost_table[I_8x8];
863

864
865
866
867
        for( idx = 0;; idx++ )
        {
            int x = idx&1;
            int y = idx>>1;
868
869
            pixel *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
            pixel *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
870
871
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
872

873
            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
874
            h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
875

876
            if( h->pixf.intra_mbcmp_x9_8x8 && predict_mode[8] >= 0 )
877
            {
878
879
880
881
882
883
884
885
886
887
888
889
                /* No shortcuts here. The SSSE3 implementation of intra_mbcmp_x9 is fast enough. */
                i_best = h->pixf.intra_mbcmp_x9_8x8( p_src_by, p_dst_by, edge, cost_i4x4_mode-i_pred_mode, a->i_satd_i8x8_dir[idx] );
                i_cost += i_best & 0xffff;
                i_best >>= 16;
                a->i_predict8x8[idx] = i_best;
                if( idx == 3 || i_cost > i_satd_thresh )
                    break;
                x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, i_best );
            }
            else
            {
                if( !h->mb.b_lossless && predict_mode[5] >= 0 )
890
                {
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
                    int satd[9];
                    h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
                    int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
                    satd[i_pred_mode] -= 3 * lambda;
                    for( int i = 2; i >= 0; i-- )
                    {
                        int cost = satd[i];
                        a->i_satd_i8x8_dir[idx][i] = cost + 4 * lambda;
                        COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i );
                    }

                    /* Take analysis shortcuts: don't analyse modes that are too
                     * far away direction-wise from the favored mode. */
                    if( a->i_mbrd < 1 + a->b_fast_intra )
                        predict_mode = intra_analysis_shortcut[a->b_avoid_topright][predict_mode[8] >= 0][favor_vertical];
                    else
                        predict_mode += 3;
908
                }
909

910
911
912
913
                for( ; *predict_mode >= 0 && (i_best >= 0 || a->i_mbrd >= 2); predict_mode++ )
                {
                    int i_satd;
                    int i_mode = *predict_mode;
914

915
916
917
918
                    if( h->mb.b_lossless )
                        x264_predict_lossless_8x8( h, p_dst_by, 0, idx, i_mode, edge );
                    else
                        h->predict_8x8[i_mode]( p_dst_by, edge );
Laurent Aimar's avatar
Laurent Aimar committed
919

920
921
922
                    i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE );
                    if( i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) )
                        i_satd -= 3 * lambda;
Laurent Aimar's avatar
Laurent Aimar committed
923

924
925
926
927
                    COPY2_IF_LT( i_best, i_satd, a->i_predict8x8[idx], i_mode );
                    a->i_satd_i8x8_dir[idx][i_mode] = i_satd + 4 * lambda;
                }
                i_cost += i_best + 3*lambda;
Laurent Aimar's avatar
Laurent Aimar committed
928

929
930
931
932
933
934
935
                if( idx == 3 || i_cost > i_satd_thresh )
                    break;
                if( h->mb.b_lossless )
                    x264_predict_lossless_8x8( h, p_dst_by, 0, idx, a->i_predict8x8[idx], edge );
                else
                    h->predict_8x8[a->i_predict8x8[idx]]( p_dst_by,</