analyse.c 159 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * analyse.c: macroblock analysis
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Sean McGovern's avatar
Sean McGovern committed
4
 * Copyright (C) 2003-2011 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
23
24
25
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
26
27
 *****************************************************************************/

28
#define _ISOC99_SOURCE
Laurent Aimar's avatar
Laurent Aimar committed
29

30
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
31
32
#include "macroblock.h"
#include "me.h"
33
#include "ratecontrol.h"
34
35
#include "analyse.h"
#include "rdo.c"
Laurent Aimar's avatar
Laurent Aimar committed
36
37
38
39

typedef struct
{
    /* 16x16 */
40
    int       i_rd16x16;
Laurent Aimar's avatar
Laurent Aimar committed
41
    x264_me_t me16x16;
42
    x264_me_t bi16x16;      /* for b16x16 BI mode, since MVs can differ from l0/l1 */
Laurent Aimar's avatar
Laurent Aimar committed
43
44
45

    /* 8x8 */
    int       i_cost8x8;
Håkan Hjort's avatar
Håkan Hjort committed
46
    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
47
    ALIGNED_4( int16_t mvc[32][5][2] );
Laurent Aimar's avatar
Laurent Aimar committed
48
49
50
51
52
53
54
55
56
57
58
59
    x264_me_t me8x8[4];

    /* Sub 4x4 */
    int       i_cost4x4[4]; /* cost per 8x8 partition */
    x264_me_t me4x4[4][4];

    /* Sub 8x4 */
    int       i_cost8x4[4]; /* cost per 8x8 partition */
    x264_me_t me8x4[4][2];

    /* Sub 4x8 */
    int       i_cost4x8[4]; /* cost per 8x8 partition */
Loren Merritt's avatar
Loren Merritt committed
60
    x264_me_t me4x8[4][2];
Laurent Aimar's avatar
Laurent Aimar committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

    /* 16x8 */
    int       i_cost16x8;
    x264_me_t me16x8[2];

    /* 8x16 */
    int       i_cost8x16;
    x264_me_t me8x16[2];

} x264_mb_analysis_list_t;

typedef struct
{
    /* conduct the analysis using this lamda and QP */
    int i_lambda;
76
    int i_lambda2;
Laurent Aimar's avatar
Laurent Aimar committed
77
    int i_qp;
78
    uint16_t *p_cost_mv;
79
    uint16_t *p_cost_ref[2];
80
    int i_mbrd;
Laurent Aimar's avatar
Laurent Aimar committed
81
82
83


    /* I: Intra part */
84
85
    /* Take some shortcuts in intra search if intra is deemed unlikely */
    int b_fast_intra;
Fiona Glaser's avatar
Fiona Glaser committed
86
    int b_force_intra; /* For Periodic Intra Refresh.  Only supported in P-frames. */
87
    int b_avoid_topright; /* For Periodic Intra Refresh: don't predict from top-right pixels. */
88
    int b_try_skip;
89

90
    /* Luma part */
91
92
    int i_satd_i16x16;
    int i_satd_i16x16_dir[7];
Laurent Aimar's avatar
Laurent Aimar committed
93
94
    int i_predict16x16;

95
    int i_satd_i8x8;
96
    int i_cbp_i8x8_luma;
97
98
    int i_satd_i8x8_dir[12][4];
    int i_predict8x8[4];
99

100
101
    int i_satd_i4x4;
    int i_predict4x4[16];
Laurent Aimar's avatar
Laurent Aimar committed
102

103
104
    int i_satd_pcm;

Laurent Aimar's avatar
Laurent Aimar committed
105
    /* Chroma part */
106
    int i_satd_i8x8chroma;
107
    int i_satd_i8x8chroma_dir[7];
108
    int i_predict8x8chroma;
Laurent Aimar's avatar
Laurent Aimar committed
109
110
111
112
113
114

    /* II: Inter part P/B frame */
    x264_mb_analysis_list_t l0;
    x264_mb_analysis_list_t l1;

    int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
115
116
117
    int i_cost16x16direct;
    int i_cost8x8bi;
    int i_cost8x8direct[4];
118
119
120
    int i_satd8x8[3][4]; /* [L0,L1,BI][8x8 0..3] SATD only */
    int i_cost_est16x8[2]; /* Per-partition estimated cost */
    int i_cost_est8x16[2];
121
122
    int i_cost16x8bi;
    int i_cost8x16bi;
123
124
125
126
127
    int i_rd16x16bi;
    int i_rd16x16direct;
    int i_rd16x8bi;
    int i_rd8x16bi;
    int i_rd8x8bi;
128
129
130
131
132

    int i_mb_partition16x8[2]; /* mb_partition_e */
    int i_mb_partition8x16[2];
    int i_mb_type16x8; /* mb_class_e */
    int i_mb_type8x16;
133
134

    int b_direct_available;
Laurent Aimar's avatar
Laurent Aimar committed
135
136
137

} x264_mb_analysis_t;

138
/* lambda = pow(2,qp/6-2) */
139
140
const uint16_t x264_lambda_tab[QP_MAX_MAX+1] =
{
141
142
143
144
145
146
147
148
   1,   1,   1,   1,   1,   1,   1,   1, /*  0- 7 */
   1,   1,   1,   1,   1,   1,   1,   1, /*  8-15 */
   2,   2,   2,   2,   3,   3,   3,   4, /* 16-23 */
   4,   4,   5,   6,   6,   7,   8,   9, /* 24-31 */
  10,  11,  13,  14,  16,  18,  20,  23, /* 32-39 */
  25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
  64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
Fiona Glaser's avatar
Fiona Glaser committed
149
150
151
 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
2048,2299,                               /* 80-81 */
Laurent Aimar's avatar
Laurent Aimar committed
152
153
};

Fiona Glaser's avatar
Fiona Glaser committed
154
/* lambda2 = pow(lambda,2) * .9 * 256 */
Fiona Glaser's avatar
Fiona Glaser committed
155
/* Capped to avoid overflow */
156
157
const int x264_lambda2_tab[QP_MAX_MAX+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
158
159
160
161
162
163
164
165
166
167
168
       14,       18,       22,       28,       36,       45,      57,      72, /*  0- 7 */
       91,      115,      145,      182,      230,      290,     365,     460, /*  8-15 */
      580,      731,      921,     1161,     1462,     1843,    2322,    2925, /* 16-23 */
     3686,     4644,     5851,     7372,     9289,    11703,   14745,   18578, /* 24-31 */
    23407,    29491,    37156,    46814,    58982,    74313,   93628,  117964, /* 32-39 */
   148626,   187257,   235929,   297252,   374514,   471859,  594505,  749029, /* 40-47 */
   943718,  1189010,  1498059,  1887436,  2378021,  2996119, 3774873, 4756042, /* 48-55 */
  5992238,  7549747,  9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
 38048341, 47937906, 60397977, 76096683, 95875813,120795955,                   /* 64-69 */
134217727,134217727,134217727,134217727,134217727,134217727,                   /* 70-75 */
134217727,134217727,134217727,134217727,134217727,134217727,                   /* 76-81 */
169
170
};

171
172
const uint8_t x264_exp2_lut[64] =
{
Anton Mitrofanov's avatar
Anton Mitrofanov committed
173
174
175
176
      0,   3,   6,   8,  11,  14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
     48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98, 102,
    106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
    175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
177
178
};

179
180
const float x264_log2_lut[128] =
{
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
};

/* Avoid an int/float conversion. */
200
201
const float x264_log2_lz_lut[32] =
{
202
203
204
    31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
};

Fiona Glaser's avatar
Fiona Glaser committed
205
206
// should the intra and inter lambdas be different?
// I'm just matching the behaviour of deadzone quant.
207
208
static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
209
    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
Fiona Glaser's avatar
Fiona Glaser committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
    {
               46,       58,       73,       92,      117,      147,
              185,      233,      294,      370,      466,      587,
              740,      932,     1174,     1480,     1864,     2349,
             2959,     3728,     4697,     5918,     7457,     9395,
            11837,    14914,    18790,    23674,    29828,    37581,
            47349,    59656,    75163,    94699,   119313,   150326,
           189399,   238627,   300652,   378798,   477255,   601304,
           757596,   954511,  1202608,  1515192,  1909022,  2405217,
          3030384,  3818045,  4810435,  6060769,  7636091,  9620872,
         12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
         48486154, 61088726, 76966972, 96972308,
        122177453,134217727,134217727,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
    },
Fiona Glaser's avatar
Fiona Glaser committed
225
    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
Fiona Glaser's avatar
Fiona Glaser committed
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
    {
               27,       34,       43,       54,       68,       86,
              108,      136,      172,      216,      273,      343,
              433,      545,      687,      865,     1090,     1374,
             1731,     2180,     2747,     3461,     4361,     5494,
             6922,     8721,    10988,    13844,    17442,    21976,
            27688,    34885,    43953,    55377,    69771,    87906,
           110755,   139543,   175813,   221511,   279087,   351627,
           443023,   558174,   703255,   886046,  1116348,  1406511,
          1772093,  2232697,  2813022,  3544186,  4465396,  5626046,
          7088374,  8930791, 11252092, 14176748, 17861583, 22504184,
         28353495, 35723165, 45008368, 56706990,
         71446330, 90016736,113413980,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
    }
Fiona Glaser's avatar
Fiona Glaser committed
242
243
};

Fiona Glaser's avatar
Fiona Glaser committed
244
#define MAX_CHROMA_LAMBDA_OFFSET 36
245
246
static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
247
248
249
250
251
252
253
254
255
       16,    20,    25,    32,    40,    50,
       64,    80,   101,   128,   161,   203,
      256,   322,   406,   512,   645,   812,
     1024,  1290,  1625,  2048,  2580,  3250,
     4096,  5160,  6501,  8192, 10321, 13003,
    16384, 20642, 26007, 32768, 41285, 52015,
    65535
};

256
/* TODO: calculate CABAC costs */
257
258
static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] =
{
259
    9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
260
};
261
262
static const uint8_t i_mb_b16x8_cost_table[17] =
{
263
    0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
264
};
265
266
static const uint8_t i_sub_mb_b_cost_table[13] =
{
267
268
    7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
};
269
270
static const uint8_t i_sub_mb_p_cost_table[4] =
{
271
272
    5, 3, 3, 1
};
273

274
275
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );

Fiona Glaser's avatar
Fiona Glaser committed
276
static uint16_t x264_cost_ref[QP_MAX+1][3][33];
277
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
278

279
280
281
282
283
284
285
286
287
288
289
290
float *x264_analyse_prepare_costs( x264_t *h )
{
    float *logs = x264_malloc( (2*4*2048+1)*sizeof(float) );
    if( !logs )
        return NULL;
    logs[0] = 0.718f;
    for( int i = 1; i <= 2*4*2048; i++ )
        logs[i] = log2f(i+1)*2 + 1.718f;
    return logs;
}

int x264_analyse_init_costs( x264_t *h, float *logs, int qp )
291
{
292
    int lambda = x264_lambda_tab[qp];
Fiona Glaser's avatar
Fiona Glaser committed
293
    if( h->cost_mv[qp] )
294
295
        return 0;
    /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
Fiona Glaser's avatar
Fiona Glaser committed
296
297
    CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
    h->cost_mv[qp] += 2*4*2048;
298
    for( int i = 0; i <= 2*4*2048; i++ )
299
    {
Fiona Glaser's avatar
Fiona Glaser committed
300
        h->cost_mv[qp][-i] =
301
        h->cost_mv[qp][i]  = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
302
303
    }
    x264_pthread_mutex_lock( &cost_ref_mutex );
304
305
    for( int i = 0; i < 3; i++ )
        for( int j = 0; j < 33; j++ )
Fiona Glaser's avatar
Fiona Glaser committed
306
            x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
307
    x264_pthread_mutex_unlock( &cost_ref_mutex );
Fiona Glaser's avatar
Fiona Glaser committed
308
    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
309
    {
310
        for( int j = 0; j < 4; j++ )
311
        {
Fiona Glaser's avatar
Fiona Glaser committed
312
313
            CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
            h->cost_mv_fpel[qp][j] += 2*2048;
314
            for( int i = -2*2048; i < 2*2048; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
315
                h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
316
317
        }
    }
318
319
320
    return 0;
fail:
    return -1;
321
322
}

323
324
void x264_analyse_free_costs( x264_t *h )
{
Fiona Glaser's avatar
Fiona Glaser committed
325
    for( int i = 0; i < QP_MAX+1; i++ )
326
327
328
329
    {
        if( h->cost_mv[i] )
            x264_free( h->cost_mv[i] - 2*4*2048 );
        if( h->cost_mv_fpel[i][0] )
330
            for( int j = 0; j < 4; j++ )
331
332
333
334
                x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
    }
}

335
336
void x264_analyse_weight_frame( x264_t *h, int end )
{
337
    for( int j = 0; j < h->i_ref[0]; j++ )
338
339
340
    {
        if( h->sh.weight[j][0].weightfn )
        {
341
            x264_frame_t *frame = h->fref[0][j];
342
            int width = frame->i_width[0] + 2*PADH;
343
            int i_padv = PADV << PARAM_INTERLACED;
344
            int offset, height;
Fiona Glaser's avatar
Fiona Glaser committed
345
            pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH;
346
            height = X264_MIN( 16 + end + i_padv, h->fref[0][j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
347
348
349
            offset = h->fenc->i_lines_weighted*frame->i_stride[0];
            h->fenc->i_lines_weighted += height;
            if( height )
350
                for( int k = j; k < h->i_ref[0]; k++ )
351
352
                    if( h->sh.weight[k][0].weightfn )
                    {
353
                        pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
354
355
356
357
358
359
360
361
362
                        x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
                                                 src + offset, frame->i_stride[0],
                                                 width, height, &h->sh.weight[k][0] );
                    }
            break;
        }
    }
}

363
364
365
/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
Fiona Glaser's avatar
Fiona Glaser committed
366
367
368
    a->p_cost_mv = h->cost_mv[a->i_qp];
    a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
    a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
369
370
}

Fiona Glaser's avatar
Fiona Glaser committed
371
static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
Laurent Aimar's avatar
Laurent Aimar committed
372
{
Fiona Glaser's avatar
Fiona Glaser committed
373
374
375
    int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 );
    a->i_lambda = x264_lambda_tab[qp];
    a->i_lambda2 = x264_lambda2_tab[qp];
Fiona Glaser's avatar
Fiona Glaser committed
376
377

    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
Fiona Glaser's avatar
Fiona Glaser committed
378
    if( h->param.analyse.i_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
379
    {
Fiona Glaser's avatar
Fiona Glaser committed
380
381
382
383
        h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][qp];
        h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][qp];
        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
Fiona Glaser's avatar
Fiona Glaser committed
384
385
    }
    h->mb.i_psy_rd_lambda = a->i_lambda;
Fiona Glaser's avatar
Fiona Glaser committed
386
    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
Fiona Glaser's avatar
Fiona Glaser committed
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
    int chroma_offset_idx = X264_MIN( qp-effective_chroma_qp+12, MAX_CHROMA_LAMBDA_OFFSET );
    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;

    if( qp > QP_MAX_SPEC )
    {
        h->nr_offset = h->nr_offset_emergency[qp-QP_MAX_SPEC-1];
        h->nr_residual_sum = h->nr_residual_sum_buf[1];
        h->nr_count = h->nr_count_buf[1];
        h->mb.b_noise_reduction = 1;
        qp = QP_MAX_SPEC; /* Out-of-spec QPs are just used for calculating lambda values. */
    }
    else
    {
        h->nr_offset = h->nr_offset_denoise;
        h->nr_residual_sum = h->nr_residual_sum_buf[0];
        h->nr_count = h->nr_count_buf[0];
        h->mb.b_noise_reduction = 0;
    }

    a->i_qp = h->mb.i_qp = qp;
    h->mb.i_chroma_qp = h->chroma_qp_table[qp];
Fiona Glaser's avatar
Fiona Glaser committed
408
409
}

Fiona Glaser's avatar
Fiona Glaser committed
410
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
Fiona Glaser's avatar
Fiona Glaser committed
411
{
412
    int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
Fiona Glaser's avatar
Fiona Glaser committed
413
414
415
416

    /* mbrd == 1 -> RD mode decision */
    /* mbrd == 2 -> RD refinement */
    /* mbrd == 3 -> QPRD */
417
    a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
418
    h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
Fiona Glaser's avatar
Fiona Glaser committed
419

Fiona Glaser's avatar
Fiona Glaser committed
420
    x264_mb_analyse_init_qp( h, a, qp );
Fiona Glaser's avatar
Fiona Glaser committed
421

422
423
    h->mb.b_transform_8x8 = 0;

Laurent Aimar's avatar
Laurent Aimar committed
424
    /* I: Intra part */
425
426
427
428
    a->i_satd_i16x16 =
    a->i_satd_i8x8   =
    a->i_satd_i4x4   =
    a->i_satd_i8x8chroma = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
429

Fiona Glaser's avatar
Fiona Glaser committed
430
    /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it */
431
    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX;
432

433
    a->b_fast_intra = 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
434
    a->b_avoid_topright = 0;
435
436
    h->mb.i_skip_intra =
        h->mb.b_lossless ? 0 :
437
        a->i_mbrd ? 2 :
438
        !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
439

Laurent Aimar's avatar
Laurent Aimar committed
440
441
442
    /* II: Inter part P/B frame */
    if( h->sh.i_type != SLICE_TYPE_I )
    {
Loren Merritt's avatar
Loren Merritt committed
443
        int i_fmv_range = 4 * h->param.analyse.i_mv_range;
444
445
        // limit motion search to a slightly smaller range than the theoretical limit,
        // since the search may go a few iterations past its given range
446
        int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
Laurent Aimar's avatar
Laurent Aimar committed
447

448
        /* Calculate max allowed MV range */
449
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
450
        h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
Simon Horlick's avatar
Simon Horlick committed
451
        h->mb.mv_max[0] = 4*( 16*( h->mb.i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
Loren Merritt's avatar
Loren Merritt committed
452
453
        h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
        h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
Fiona Glaser's avatar
Fiona Glaser committed
454
455
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P )
        {
456
            int max_x = (h->fref[0][0]->i_pir_end_col * 16 - 3)*4; /* 3 pixels of hpel border */
Fiona Glaser's avatar
Fiona Glaser committed
457
458
459
460
461
            int max_mv = max_x - 4*16*h->mb.i_mb_x;
            /* If we're left of the refresh bar, don't reference right of it. */
            if( max_mv > 0 && h->mb.i_mb_x < h->fdec->i_pir_start_col )
                h->mb.mv_max_spel[0] = X264_MIN( h->mb.mv_max_spel[0], max_mv );
        }
Loren Merritt's avatar
Loren Merritt committed
462
463
        h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
        h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
464
        if( h->mb.i_mb_x == 0 && !(h->mb.i_mb_y & PARAM_INTERLACED) )
465
        {
466
            int mb_y = h->mb.i_mb_y >> SLICE_MBAFF;
Loren Merritt's avatar
Loren Merritt committed
467
468
            int thread_mvy_range = i_fmv_range;

469
            if( h->i_thread_frames > 1 )
Loren Merritt's avatar
Loren Merritt committed
470
            {
471
                int pix_y = (h->mb.i_mb_y | PARAM_INTERLACED) * 16;
Loren Merritt's avatar
Loren Merritt committed
472
                int thresh = pix_y + h->param.analyse.i_mv_range_thread;
473
                for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
474
                    for( int j = 0; j < h->i_ref[i]; j++ )
Loren Merritt's avatar
Loren Merritt committed
475
                    {
476
477
                        x264_frame_cond_wait( h->fref[i][j]->orig, thresh );
                        thread_mvy_range = X264_MIN( thread_mvy_range, h->fref[i][j]->orig->i_lines_completed - pix_y );
Loren Merritt's avatar
Loren Merritt committed
478
                    }
Dylan Yudaken's avatar
Dylan Yudaken committed
479

Loren Merritt's avatar
Loren Merritt committed
480
481
                if( h->param.b_deterministic )
                    thread_mvy_range = h->param.analyse.i_mv_range_thread;
482
                if( PARAM_INTERLACED )
Loren Merritt's avatar
Loren Merritt committed
483
                    thread_mvy_range >>= 1;
Dylan Yudaken's avatar
Dylan Yudaken committed
484

485
                x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
Loren Merritt's avatar
Loren Merritt committed
486
487
            }

488
            if( PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
            {
                /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
                for( int i = 0; i < 3; i++ )
                {
                    int j = i == 2;
                    mb_y = (h->mb.i_mb_y >> j) + (i == 1);
                    h->mb.mv_miny_row[i] = 4*( -16*mb_y - 24 );
                    h->mb.mv_maxy_row[i] = 4*( 16*( (h->mb.i_mb_height>>j) - mb_y - 1 ) + 24 );
                    h->mb.mv_miny_spel_row[i] = x264_clip3( h->mb.mv_miny_row[i], -i_fmv_range, i_fmv_range );
                    h->mb.mv_maxy_spel_row[i] = CLIP_FMV( h->mb.mv_maxy_row[i] );
                    h->mb.mv_maxy_spel_row[i] = X264_MIN( h->mb.mv_maxy_spel_row[i], thread_mvy_range*4 );
                    h->mb.mv_miny_fpel_row[i] = (h->mb.mv_miny_spel_row[i]>>2) + i_fpel_border;
                    h->mb.mv_maxy_fpel_row[i] = (h->mb.mv_maxy_spel_row[i]>>2) - i_fpel_border;
                }
            }
            else
            {
                h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
                h->mb.mv_max[1] = 4*( 16*( h->mb.i_mb_height - mb_y - 1 ) + 24 );
                h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
                h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
                h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
                h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
                h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
            }
        }
515
        if( PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
516
        {
517
            int i = MB_INTERLACED ? 2 : h->mb.i_mb_y&1;
Simon Horlick's avatar
Simon Horlick committed
518
519
520
521
522
523
            h->mb.mv_min[1] = h->mb.mv_miny_row[i];
            h->mb.mv_max[1] = h->mb.mv_maxy_row[i];
            h->mb.mv_min_spel[1] = h->mb.mv_miny_spel_row[i];
            h->mb.mv_max_spel[1] = h->mb.mv_maxy_spel_row[i];
            h->mb.mv_min_fpel[1] = h->mb.mv_miny_fpel_row[i];
            h->mb.mv_max_fpel[1] = h->mb.mv_maxy_fpel_row[i];
524
        }
525
#undef CLIP_FMV
526

527
        a->l0.me16x16.cost =
528
        a->l0.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
529
        a->l0.i_cost8x8    =
530
531
        a->l0.i_cost16x8   =
        a->l0.i_cost8x16   = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
532
533
        if( h->sh.i_type == SLICE_TYPE_B )
        {
534
            a->l1.me16x16.cost =
535
            a->l1.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
536
537
538
539
540
            a->l1.i_cost8x8    =
            a->i_cost8x8direct[0] =
            a->i_cost8x8direct[1] =
            a->i_cost8x8direct[2] =
            a->i_cost8x8direct[3] =
541
542
            a->l1.i_cost16x8   =
            a->l1.i_cost8x16   =
543
544
545
546
547
            a->i_rd16x16bi     =
            a->i_rd16x16direct =
            a->i_rd8x8bi       =
            a->i_rd16x8bi      =
            a->i_rd8x16bi      =
548
549
550
551
552
            a->i_cost16x16bi   =
            a->i_cost16x16direct =
            a->i_cost8x8bi     =
            a->i_cost16x8bi    =
            a->i_cost8x16bi    = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
553
        }
Fiona Glaser's avatar
Fiona Glaser committed
554
        else if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
555
            for( int i = 0; i < 4; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
556
557
558
559
560
            {
                a->l0.i_cost4x4[i] =
                a->l0.i_cost8x4[i] =
                a->l0.i_cost4x8[i] = COST_MAX;
            }
561
562

        /* Fast intra decision */
Loren Merritt's avatar
Loren Merritt committed
563
        if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
564
        {
565
566
            /* Always run in fast-intra mode for subme < 3 */
            if( h->mb.i_subpel_refine > 2 &&
567
              ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
568
569
570
                IS_INTRA( h->mb.i_mb_type_top ) ||
                IS_INTRA( h->mb.i_mb_type_topleft ) ||
                IS_INTRA( h->mb.i_mb_type_topright ) ||
571
                (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref[0][0]->mb_type[h->mb.i_mb_xy] )) ||
572
                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) ) )
573
574
575
576
577
578
            { /* intra is likely */ }
            else
            {
                a->b_fast_intra = 1;
            }
        }
579
        h->mb.b_skip_mc = 0;
Fiona Glaser's avatar
Fiona Glaser committed
580
581
582
583
584
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P &&
            h->mb.i_mb_x >= h->fdec->i_pir_start_col && h->mb.i_mb_x <= h->fdec->i_pir_end_col )
        {
            a->b_force_intra = 1;
            a->b_fast_intra = 0;
585
            a->b_avoid_topright = h->mb.i_mb_x == h->fdec->i_pir_end_col;
Fiona Glaser's avatar
Fiona Glaser committed
586
587
588
        }
        else
            a->b_force_intra = 0;
Laurent Aimar's avatar
Laurent Aimar committed
589
590
591
    }
}

592
593
594
595
596
597
598
599
600
601
602
/* Prediction modes allowed for various combinations of neighbors. */
/* Terminated by a -1. */
/* In order, no neighbors, left, top, top/left, top/left/topleft */
static const int8_t i16x16_mode_available[5][5] =
{
    {I_PRED_16x16_DC_128, -1, -1, -1, -1},
    {I_PRED_16x16_DC_LEFT, I_PRED_16x16_H, -1, -1, -1},
    {I_PRED_16x16_DC_TOP, I_PRED_16x16_V, -1, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, I_PRED_16x16_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
603

604
605
606
607
608
609
610
611
static const int8_t i8x8chroma_mode_available[5][5] =
{
    {I_PRED_CHROMA_DC_128, -1, -1, -1, -1},
    {I_PRED_CHROMA_DC_LEFT, I_PRED_CHROMA_H, -1, -1, -1},
    {I_PRED_CHROMA_DC_TOP, I_PRED_CHROMA_V, -1, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
612

613
static const int8_t i4x4_mode_available[2][5][10] =
Laurent Aimar's avatar
Laurent Aimar committed
614
{
615
616
617
618
619
620
621
622
623
624
625
626
627
628
    {
        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
    },
    {
        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
    }
629
};
630

631
static ALWAYS_INLINE const int8_t *predict_16x16_mode_available( int i_neighbour )
632
633
{
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
634
635
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i16x16_mode_available[idx];
Laurent Aimar's avatar
Laurent Aimar committed
636
637
}

638
static ALWAYS_INLINE const int8_t *predict_8x8chroma_mode_available( int i_neighbour )
Laurent Aimar's avatar
Laurent Aimar committed
639
{
640
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
641
642
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i8x8chroma_mode_available[idx];
Laurent Aimar's avatar
Laurent Aimar committed
643
644
}

645
static ALWAYS_INLINE const int8_t *predict_8x8_mode_available( int force_intra, int i_neighbour, int i )
Laurent Aimar's avatar
Laurent Aimar committed
646
{
647
    int avoid_topright = force_intra && (i&1);
648
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
649
650
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i4x4_mode_available[avoid_topright][idx];
651
652
653
654
655
656
}

static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
{
    int avoid_topright = force_intra && ((i&5) == 5);
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
657
658
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i4x4_mode_available[avoid_topright][idx];
Laurent Aimar's avatar
Laurent Aimar committed
659
660
}

661
662
663
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
{
664
    ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0};
665
666

    if( do_both_dct || h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
667
        h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
Loren Merritt's avatar
Loren Merritt committed
668
    if( do_both_dct || !h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
669
        h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
670
671
}

672
673
/* Reset fenc satd scores cache for psy RD */
static inline void x264_mb_init_fenc_cache( x264_t *h, int b_satd )
674
{
Fiona Glaser's avatar
Fiona Glaser committed
675
676
677
678
    if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
        x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
    if( !h->mb.i_psy_rd )
        return;
679
680
681
682
    /* Writes beyond the end of the array, but not a problem since fenc_satd_cache is right after. */
    h->mc.memzero_aligned( h->mb.pic.fenc_hadamard_cache, sizeof(h->mb.pic.fenc_hadamard_cache) );
    if( b_satd )
        h->mc.memzero_aligned( h->mb.pic.fenc_satd_cache, sizeof(h->mb.pic.fenc_satd_cache) );
683
684
}

685
686
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
{
687
    if( a->i_satd_i8x8chroma < COST_MAX )
688
689
        return;

Fiona Glaser's avatar
Fiona Glaser committed
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
    if( CHROMA444 )
    {
        if( !h->mb.b_chroma_me )
        {
            a->i_satd_i8x8chroma = 0;
            return;
        }

        /* Cheap approximation of chroma costs to avoid a full i4x4/i8x8 analysis. */
        h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[1] );
        h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[2] );
        a->i_satd_i8x8chroma = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE )
                             + h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );
        return;
    }

706
    const int8_t *predict_mode = predict_8x8chroma_mode_available( h->mb.i_neighbour_intra );
707

708
    /* 8x8 prediction selection for chroma */
709
    if( predict_mode[3] >= 0 && !h->mb.b_lossless )
710
    {
711
        int satdu[4], satdv[4];
712
713
714
715
716
717
718
719
        h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[1], h->mb.pic.p_fdec[1], satdu );
        h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[2], h->mb.pic.p_fdec[2], satdv );
        h->predict_8x8c[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[1] );
        h->predict_8x8c[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[2] );
        satdu[I_PRED_CHROMA_P] = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE );
        satdv[I_PRED_CHROMA_P] = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );

        for( ; *predict_mode >= 0; predict_mode++ )
720
        {
721
722
            int i_mode = *predict_mode;
            int i_satd = satdu[i_mode] + satdv[i_mode] + a->i_lambda * bs_size_ue( i_mode );
723

724
            a->i_satd_i8x8chroma_dir[i_mode] = i_satd;
725
726
727
728
729
            COPY2_IF_LT( a->i_satd_i8x8chroma, i_satd, a->i_predict8x8chroma, i_mode );
        }
    }
    else
    {
730
        for( ; *predict_mode >= 0; predict_mode++ )
731
732
        {
            int i_satd;
733
            int i_mode = *predict_mode;
734

735
            /* we do the prediction */
736
737
738
739
            if( h->mb.b_lossless )
                x264_predict_lossless_8x8_chroma( h, i_mode );
            else
            {
740
741
                h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
                h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
742
            }
743

744
            /* we calculate the cost */
745
746
            i_satd = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE ) +
                     h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE ) +
747
                     a->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
748

749
            a->i_satd_i8x8chroma_dir[i_mode] = i_satd;
750
751
            COPY2_IF_LT( a->i_satd_i8x8chroma, i_satd, a->i_predict8x8chroma, i_mode );
        }
752
753
754
755
756
    }

    h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
}

Fiona Glaser's avatar
Fiona Glaser committed
757
/* FIXME: should we do any sort of merged chroma analysis with 4:4:4? */
758
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
Laurent Aimar's avatar
Laurent Aimar committed
759
760
{
    const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
761
762
    pixel *p_src = h->mb.pic.p_fenc[0];
    pixel *p_dst = h->mb.pic.p_fdec[0];
763
764
765
766
767
768
    static const int8_t intra_analysis_shortcut[2][2][2][5] =
    {
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_VL, -1}}},
Anton Mitrofanov's avatar
Anton Mitrofanov committed
769
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
770
771
772
773
          {-1, -1, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDR, I_PRED_4x4_VR, -1, -1, -1}}},
    };
Laurent Aimar's avatar
Laurent Aimar committed
774

775
    int idx;
776
    int lambda = a->i_lambda;
777

Laurent Aimar's avatar
Laurent Aimar committed
778
779
780
    /*---------------- Try all mode and calculate their score ---------------*/

    /* 16x16 prediction selection */
781
    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
782

783
784
785
786
    /* Not heavily tuned */
    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;

787
    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
788
    {
789
        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
790
791
792
793
794
795
796
797
798
        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );

        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
        if( a->i_satd_i16x16 <= i16x16_thresh )
799
        {
800
801
802
803
            h->predict_16x16[I_PRED_16x16_P]( p_dst );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
804
805
806
        }
    }
    else
Laurent Aimar's avatar
Laurent Aimar committed
807
    {
808
        for( ; *predict_mode >= 0; predict_mode++ )
809
810
        {
            int i_satd;
811
            int i_mode = *predict_mode;
812
813

            if( h->mb.b_lossless )
Fiona Glaser's avatar
Fiona Glaser committed
814
                x264_predict_lossless_16x16( h, 0, i_mode );
815
816
            else
                h->predict_16x16[i_mode]( p_dst );
817
818

            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
819
                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
820
821
822
            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
            a->i_satd_i16x16_dir[i_mode] = i_satd;
        }
Laurent Aimar's avatar
Laurent Aimar committed
823
824
    }

825
826
    if( h->sh.i_type == SLICE_TYPE_B )
        /* cavlc mb type prefix */
827
        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
828

829
    if( a->i_satd_i16x16 > i16x16_thresh )
830
        return;
831

832
833
    /* 8x8 prediction selection */
    if( flags & X264_ANALYSE_I8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
834
    {
835
        ALIGNED_ARRAY_16( pixel, edge,[33] );
836
        x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
837
        int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
838
839

        // FIXME some bias like in i4x4?
840
        int i_cost = lambda * 4; /* base predmode costs */
841
        h->mb.i_cbp_luma = 0;
Laurent Aimar's avatar
Laurent Aimar committed
842

843
        if( h->sh.i_type == SLICE_TYPE_B )
844
            i_cost += lambda * i_mb_b_cost_table[I_8x8];
845

846
847
848
849
        for( idx = 0;; idx++ )
        {
            int x = idx&1;
            int y = idx>>1;
850
851
            pixel *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
            pixel *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
852
853
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
854

855
            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
856
            h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
857

858
            if( !h->mb.b_lossless && predict_mode[5] >= 0 )
859
            {
860
                int satd[9];
Fiona Glaser's avatar
Fiona Glaser committed
861
                h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
862
863
                int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
                satd[i_pred_mode] -= 3 * lambda;
864
                for( int i = 2; i >= 0; i-- )
865
                {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
866
867
                    int cost = satd[i];
                    a->i_satd_i8x8_dir[i][idx] = cost + 4 * lambda;
868
869
                    COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i );
                }
870
871
872
873

                /* Take analysis shortcuts: don't analyse modes that are too
                 * far away direction-wise from the favored mode. */
                if( a->i_mbrd < 1 + a->b_fast_intra )
874
                    predict_mode = intra_analysis_shortcut[a->b_avoid_topright][predict_mode[8] >= 0][favor_vertical];
875
876
                else
                    predict_mode += 3;
877
878
            }

879
            for( ; *predict_mode >= 0 && (i_best >= 0 || a->i_mbrd >= 2); predict_mode++ )
Laurent Aimar's avatar
Laurent Aimar committed
880
            {
881
                int i_satd;
882
                int i_mode = *predict_mode;
Laurent Aimar's avatar
Laurent Aimar committed
883

884
                if( h->mb.b_lossless )
Fiona Glaser's avatar
Fiona Glaser committed
885
                    x264_predict_lossless_8x8( h, p_dst_by, 0, idx, i_mode, edge );
886
887
                else
                    h->predict_8x8[i_mode]( p_dst_by, edge );
Laurent Aimar's avatar
Laurent Aimar committed
888

889
                i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE );