analyse.c 160 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * analyse.c: macroblock analysis
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Sean McGovern's avatar
Sean McGovern committed
4
 * Copyright (C) 2003-2011 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
23
24
25
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
26
27
 *****************************************************************************/

28
#define _ISOC99_SOURCE
Laurent Aimar's avatar
Laurent Aimar committed
29

30
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
31
32
#include "macroblock.h"
#include "me.h"
33
#include "ratecontrol.h"
34
35
#include "analyse.h"
#include "rdo.c"
Laurent Aimar's avatar
Laurent Aimar committed
36
37
38
39

typedef struct
{
    /* 16x16 */
40
    int       i_rd16x16;
Laurent Aimar's avatar
Laurent Aimar committed
41
    x264_me_t me16x16;
42
    x264_me_t bi16x16;      /* for b16x16 BI mode, since MVs can differ from l0/l1 */
Laurent Aimar's avatar
Laurent Aimar committed
43
44
45

    /* 8x8 */
    int       i_cost8x8;
Håkan Hjort's avatar
Håkan Hjort committed
46
    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
47
    ALIGNED_4( int16_t mvc[32][5][2] );
Laurent Aimar's avatar
Laurent Aimar committed
48
49
50
51
52
53
54
55
56
57
58
59
    x264_me_t me8x8[4];

    /* Sub 4x4 */
    int       i_cost4x4[4]; /* cost per 8x8 partition */
    x264_me_t me4x4[4][4];

    /* Sub 8x4 */
    int       i_cost8x4[4]; /* cost per 8x8 partition */
    x264_me_t me8x4[4][2];

    /* Sub 4x8 */
    int       i_cost4x8[4]; /* cost per 8x8 partition */
Loren Merritt's avatar
Loren Merritt committed
60
    x264_me_t me4x8[4][2];
Laurent Aimar's avatar
Laurent Aimar committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

    /* 16x8 */
    int       i_cost16x8;
    x264_me_t me16x8[2];

    /* 8x16 */
    int       i_cost8x16;
    x264_me_t me8x16[2];

} x264_mb_analysis_list_t;

typedef struct
{
    /* conduct the analysis using this lamda and QP */
    int i_lambda;
76
    int i_lambda2;
Laurent Aimar's avatar
Laurent Aimar committed
77
    int i_qp;
78
    uint16_t *p_cost_mv;
79
    uint16_t *p_cost_ref[2];
80
    int i_mbrd;
Laurent Aimar's avatar
Laurent Aimar committed
81
82
83


    /* I: Intra part */
84
85
    /* Take some shortcuts in intra search if intra is deemed unlikely */
    int b_fast_intra;
Fiona Glaser's avatar
Fiona Glaser committed
86
    int b_force_intra; /* For Periodic Intra Refresh.  Only supported in P-frames. */
87
    int b_avoid_topright; /* For Periodic Intra Refresh: don't predict from top-right pixels. */
88
    int b_try_skip;
89

90
    /* Luma part */
91
92
    int i_satd_i16x16;
    int i_satd_i16x16_dir[7];
Laurent Aimar's avatar
Laurent Aimar committed
93
94
    int i_predict16x16;

95
    int i_satd_i8x8;
96
    int i_cbp_i8x8_luma;
97
98
    int i_satd_i8x8_dir[12][4];
    int i_predict8x8[4];
99

100
101
    int i_satd_i4x4;
    int i_predict4x4[16];
Laurent Aimar's avatar
Laurent Aimar committed
102

103
104
    int i_satd_pcm;

Laurent Aimar's avatar
Laurent Aimar committed
105
    /* Chroma part */
106
    int i_satd_i8x8chroma;
107
    int i_satd_i8x8chroma_dir[7];
108
    int i_predict8x8chroma;
Laurent Aimar's avatar
Laurent Aimar committed
109
110
111
112
113
114

    /* II: Inter part P/B frame */
    x264_mb_analysis_list_t l0;
    x264_mb_analysis_list_t l1;

    int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
115
116
117
    int i_cost16x16direct;
    int i_cost8x8bi;
    int i_cost8x8direct[4];
118
119
120
    int i_satd8x8[3][4]; /* [L0,L1,BI][8x8 0..3] SATD only */
    int i_cost_est16x8[2]; /* Per-partition estimated cost */
    int i_cost_est8x16[2];
121
122
    int i_cost16x8bi;
    int i_cost8x16bi;
123
124
125
126
127
    int i_rd16x16bi;
    int i_rd16x16direct;
    int i_rd16x8bi;
    int i_rd8x16bi;
    int i_rd8x8bi;
128
129
130
131
132

    int i_mb_partition16x8[2]; /* mb_partition_e */
    int i_mb_partition8x16[2];
    int i_mb_type16x8; /* mb_class_e */
    int i_mb_type8x16;
133
134

    int b_direct_available;
135
    int b_early_terminate;
Laurent Aimar's avatar
Laurent Aimar committed
136
137
138

} x264_mb_analysis_t;

139
/* lambda = pow(2,qp/6-2) */
140
141
const uint16_t x264_lambda_tab[QP_MAX_MAX+1] =
{
142
143
144
145
146
147
148
149
   1,   1,   1,   1,   1,   1,   1,   1, /*  0- 7 */
   1,   1,   1,   1,   1,   1,   1,   1, /*  8-15 */
   2,   2,   2,   2,   3,   3,   3,   4, /* 16-23 */
   4,   4,   5,   6,   6,   7,   8,   9, /* 24-31 */
  10,  11,  13,  14,  16,  18,  20,  23, /* 32-39 */
  25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
  64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
Fiona Glaser's avatar
Fiona Glaser committed
150
151
152
 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
2048,2299,                               /* 80-81 */
Laurent Aimar's avatar
Laurent Aimar committed
153
154
};

Fiona Glaser's avatar
Fiona Glaser committed
155
/* lambda2 = pow(lambda,2) * .9 * 256 */
Fiona Glaser's avatar
Fiona Glaser committed
156
/* Capped to avoid overflow */
157
158
const int x264_lambda2_tab[QP_MAX_MAX+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
159
160
161
162
163
164
165
166
167
168
169
       14,       18,       22,       28,       36,       45,      57,      72, /*  0- 7 */
       91,      115,      145,      182,      230,      290,     365,     460, /*  8-15 */
      580,      731,      921,     1161,     1462,     1843,    2322,    2925, /* 16-23 */
     3686,     4644,     5851,     7372,     9289,    11703,   14745,   18578, /* 24-31 */
    23407,    29491,    37156,    46814,    58982,    74313,   93628,  117964, /* 32-39 */
   148626,   187257,   235929,   297252,   374514,   471859,  594505,  749029, /* 40-47 */
   943718,  1189010,  1498059,  1887436,  2378021,  2996119, 3774873, 4756042, /* 48-55 */
  5992238,  7549747,  9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
 38048341, 47937906, 60397977, 76096683, 95875813,120795955,                   /* 64-69 */
134217727,134217727,134217727,134217727,134217727,134217727,                   /* 70-75 */
134217727,134217727,134217727,134217727,134217727,134217727,                   /* 76-81 */
170
171
};

172
173
const uint8_t x264_exp2_lut[64] =
{
Anton Mitrofanov's avatar
Anton Mitrofanov committed
174
175
176
177
      0,   3,   6,   8,  11,  14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
     48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98, 102,
    106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
    175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
178
179
};

180
181
const float x264_log2_lut[128] =
{
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
};

/* Avoid an int/float conversion. */
201
202
const float x264_log2_lz_lut[32] =
{
203
204
205
    31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
};

Fiona Glaser's avatar
Fiona Glaser committed
206
207
// should the intra and inter lambdas be different?
// I'm just matching the behaviour of deadzone quant.
208
209
static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
210
    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
Fiona Glaser's avatar
Fiona Glaser committed
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
    {
               46,       58,       73,       92,      117,      147,
              185,      233,      294,      370,      466,      587,
              740,      932,     1174,     1480,     1864,     2349,
             2959,     3728,     4697,     5918,     7457,     9395,
            11837,    14914,    18790,    23674,    29828,    37581,
            47349,    59656,    75163,    94699,   119313,   150326,
           189399,   238627,   300652,   378798,   477255,   601304,
           757596,   954511,  1202608,  1515192,  1909022,  2405217,
          3030384,  3818045,  4810435,  6060769,  7636091,  9620872,
         12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
         48486154, 61088726, 76966972, 96972308,
        122177453,134217727,134217727,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
    },
Fiona Glaser's avatar
Fiona Glaser committed
226
    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
Fiona Glaser's avatar
Fiona Glaser committed
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    {
               27,       34,       43,       54,       68,       86,
              108,      136,      172,      216,      273,      343,
              433,      545,      687,      865,     1090,     1374,
             1731,     2180,     2747,     3461,     4361,     5494,
             6922,     8721,    10988,    13844,    17442,    21976,
            27688,    34885,    43953,    55377,    69771,    87906,
           110755,   139543,   175813,   221511,   279087,   351627,
           443023,   558174,   703255,   886046,  1116348,  1406511,
          1772093,  2232697,  2813022,  3544186,  4465396,  5626046,
          7088374,  8930791, 11252092, 14176748, 17861583, 22504184,
         28353495, 35723165, 45008368, 56706990,
         71446330, 90016736,113413980,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
    }
Fiona Glaser's avatar
Fiona Glaser committed
243
244
};

Fiona Glaser's avatar
Fiona Glaser committed
245
#define MAX_CHROMA_LAMBDA_OFFSET 36
246
247
static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
248
249
250
251
252
253
254
255
256
       16,    20,    25,    32,    40,    50,
       64,    80,   101,   128,   161,   203,
      256,   322,   406,   512,   645,   812,
     1024,  1290,  1625,  2048,  2580,  3250,
     4096,  5160,  6501,  8192, 10321, 13003,
    16384, 20642, 26007, 32768, 41285, 52015,
    65535
};

257
/* TODO: calculate CABAC costs */
258
259
static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] =
{
260
    9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
261
};
262
263
static const uint8_t i_mb_b16x8_cost_table[17] =
{
264
    0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
265
};
266
267
static const uint8_t i_sub_mb_b_cost_table[13] =
{
268
269
    7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
};
270
271
static const uint8_t i_sub_mb_p_cost_table[4] =
{
272
273
    5, 3, 3, 1
};
274

275
276
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );

Fiona Glaser's avatar
Fiona Glaser committed
277
static uint16_t x264_cost_ref[QP_MAX+1][3][33];
278
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
279

280
281
282
283
284
285
286
287
288
289
290
291
float *x264_analyse_prepare_costs( x264_t *h )
{
    float *logs = x264_malloc( (2*4*2048+1)*sizeof(float) );
    if( !logs )
        return NULL;
    logs[0] = 0.718f;
    for( int i = 1; i <= 2*4*2048; i++ )
        logs[i] = log2f(i+1)*2 + 1.718f;
    return logs;
}

int x264_analyse_init_costs( x264_t *h, float *logs, int qp )
292
{
293
    int lambda = x264_lambda_tab[qp];
Fiona Glaser's avatar
Fiona Glaser committed
294
    if( h->cost_mv[qp] )
295
296
        return 0;
    /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
Fiona Glaser's avatar
Fiona Glaser committed
297
298
    CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
    h->cost_mv[qp] += 2*4*2048;
299
    for( int i = 0; i <= 2*4*2048; i++ )
300
    {
Fiona Glaser's avatar
Fiona Glaser committed
301
        h->cost_mv[qp][-i] =
302
        h->cost_mv[qp][i]  = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
303
304
    }
    x264_pthread_mutex_lock( &cost_ref_mutex );
305
306
    for( int i = 0; i < 3; i++ )
        for( int j = 0; j < 33; j++ )
Fiona Glaser's avatar
Fiona Glaser committed
307
            x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
308
    x264_pthread_mutex_unlock( &cost_ref_mutex );
Fiona Glaser's avatar
Fiona Glaser committed
309
    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
310
    {
311
        for( int j = 0; j < 4; j++ )
312
        {
Fiona Glaser's avatar
Fiona Glaser committed
313
314
            CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
            h->cost_mv_fpel[qp][j] += 2*2048;
315
            for( int i = -2*2048; i < 2*2048; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
316
                h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
317
318
        }
    }
319
320
321
    return 0;
fail:
    return -1;
322
323
}

324
325
void x264_analyse_free_costs( x264_t *h )
{
Fiona Glaser's avatar
Fiona Glaser committed
326
    for( int i = 0; i < QP_MAX+1; i++ )
327
328
329
330
    {
        if( h->cost_mv[i] )
            x264_free( h->cost_mv[i] - 2*4*2048 );
        if( h->cost_mv_fpel[i][0] )
331
            for( int j = 0; j < 4; j++ )
332
333
334
335
                x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
    }
}

336
337
void x264_analyse_weight_frame( x264_t *h, int end )
{
338
    for( int j = 0; j < h->i_ref[0]; j++ )
339
340
341
    {
        if( h->sh.weight[j][0].weightfn )
        {
342
            x264_frame_t *frame = h->fref[0][j];
343
            int width = frame->i_width[0] + 2*PADH;
344
            int i_padv = PADV << PARAM_INTERLACED;
345
            int offset, height;
Fiona Glaser's avatar
Fiona Glaser committed
346
            pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH;
347
            height = X264_MIN( 16 + end + i_padv, h->fref[0][j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
348
349
350
            offset = h->fenc->i_lines_weighted*frame->i_stride[0];
            h->fenc->i_lines_weighted += height;
            if( height )
351
                for( int k = j; k < h->i_ref[0]; k++ )
352
353
                    if( h->sh.weight[k][0].weightfn )
                    {
354
                        pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
355
356
357
358
359
360
361
362
363
                        x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
                                                 src + offset, frame->i_stride[0],
                                                 width, height, &h->sh.weight[k][0] );
                    }
            break;
        }
    }
}

364
365
366
/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
Fiona Glaser's avatar
Fiona Glaser committed
367
368
369
    a->p_cost_mv = h->cost_mv[a->i_qp];
    a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
    a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
370
371
}

Fiona Glaser's avatar
Fiona Glaser committed
372
static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
Laurent Aimar's avatar
Laurent Aimar committed
373
{
Fiona Glaser's avatar
Fiona Glaser committed
374
375
376
    int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 );
    a->i_lambda = x264_lambda_tab[qp];
    a->i_lambda2 = x264_lambda2_tab[qp];
Fiona Glaser's avatar
Fiona Glaser committed
377
378

    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
Fiona Glaser's avatar
Fiona Glaser committed
379
    if( h->param.analyse.i_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
380
    {
Fiona Glaser's avatar
Fiona Glaser committed
381
382
383
384
        h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][qp];
        h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][qp];
        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
Fiona Glaser's avatar
Fiona Glaser committed
385
386
    }
    h->mb.i_psy_rd_lambda = a->i_lambda;
Fiona Glaser's avatar
Fiona Glaser committed
387
    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
Fiona Glaser's avatar
Fiona Glaser committed
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
    int chroma_offset_idx = X264_MIN( qp-effective_chroma_qp+12, MAX_CHROMA_LAMBDA_OFFSET );
    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;

    if( qp > QP_MAX_SPEC )
    {
        h->nr_offset = h->nr_offset_emergency[qp-QP_MAX_SPEC-1];
        h->nr_residual_sum = h->nr_residual_sum_buf[1];
        h->nr_count = h->nr_count_buf[1];
        h->mb.b_noise_reduction = 1;
        qp = QP_MAX_SPEC; /* Out-of-spec QPs are just used for calculating lambda values. */
    }
    else
    {
        h->nr_offset = h->nr_offset_denoise;
        h->nr_residual_sum = h->nr_residual_sum_buf[0];
        h->nr_count = h->nr_count_buf[0];
        h->mb.b_noise_reduction = 0;
    }

    a->i_qp = h->mb.i_qp = qp;
    h->mb.i_chroma_qp = h->chroma_qp_table[qp];
Fiona Glaser's avatar
Fiona Glaser committed
409
410
}

Fiona Glaser's avatar
Fiona Glaser committed
411
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
Fiona Glaser's avatar
Fiona Glaser committed
412
{
413
    int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
Fiona Glaser's avatar
Fiona Glaser committed
414
415
416
417

    /* mbrd == 1 -> RD mode decision */
    /* mbrd == 2 -> RD refinement */
    /* mbrd == 3 -> QPRD */
418
    a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
419
    h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
420
    a->b_early_terminate = h->param.analyse.i_subpel_refine < 11;
Fiona Glaser's avatar
Fiona Glaser committed
421

Fiona Glaser's avatar
Fiona Glaser committed
422
    x264_mb_analyse_init_qp( h, a, qp );
Fiona Glaser's avatar
Fiona Glaser committed
423

424
425
    h->mb.b_transform_8x8 = 0;

Laurent Aimar's avatar
Laurent Aimar committed
426
    /* I: Intra part */
427
428
429
430
    a->i_satd_i16x16 =
    a->i_satd_i8x8   =
    a->i_satd_i4x4   =
    a->i_satd_i8x8chroma = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
431

Fiona Glaser's avatar
Fiona Glaser committed
432
    /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it */
433
    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX;
434

435
    a->b_fast_intra = 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
436
    a->b_avoid_topright = 0;
437
438
    h->mb.i_skip_intra =
        h->mb.b_lossless ? 0 :
439
        a->i_mbrd ? 2 :
440
        !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
441

Laurent Aimar's avatar
Laurent Aimar committed
442
443
444
    /* II: Inter part P/B frame */
    if( h->sh.i_type != SLICE_TYPE_I )
    {
Loren Merritt's avatar
Loren Merritt committed
445
        int i_fmv_range = 4 * h->param.analyse.i_mv_range;
446
447
        // limit motion search to a slightly smaller range than the theoretical limit,
        // since the search may go a few iterations past its given range
448
        int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
Laurent Aimar's avatar
Laurent Aimar committed
449

450
        /* Calculate max allowed MV range */
451
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
452
        h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
Simon Horlick's avatar
Simon Horlick committed
453
        h->mb.mv_max[0] = 4*( 16*( h->mb.i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
Loren Merritt's avatar
Loren Merritt committed
454
455
        h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
        h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
Fiona Glaser's avatar
Fiona Glaser committed
456
457
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P )
        {
458
            int max_x = (h->fref[0][0]->i_pir_end_col * 16 - 3)*4; /* 3 pixels of hpel border */
Fiona Glaser's avatar
Fiona Glaser committed
459
460
461
462
463
            int max_mv = max_x - 4*16*h->mb.i_mb_x;
            /* If we're left of the refresh bar, don't reference right of it. */
            if( max_mv > 0 && h->mb.i_mb_x < h->fdec->i_pir_start_col )
                h->mb.mv_max_spel[0] = X264_MIN( h->mb.mv_max_spel[0], max_mv );
        }
Loren Merritt's avatar
Loren Merritt committed
464
465
        h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
        h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
466
        if( h->mb.i_mb_x == 0 && !(h->mb.i_mb_y & PARAM_INTERLACED) )
467
        {
468
            int mb_y = h->mb.i_mb_y >> SLICE_MBAFF;
Loren Merritt's avatar
Loren Merritt committed
469
470
            int thread_mvy_range = i_fmv_range;

471
            if( h->i_thread_frames > 1 )
Loren Merritt's avatar
Loren Merritt committed
472
            {
473
                int pix_y = (h->mb.i_mb_y | PARAM_INTERLACED) * 16;
Loren Merritt's avatar
Loren Merritt committed
474
                int thresh = pix_y + h->param.analyse.i_mv_range_thread;
475
                for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
476
                    for( int j = 0; j < h->i_ref[i]; j++ )
Loren Merritt's avatar
Loren Merritt committed
477
                    {
478
479
                        x264_frame_cond_wait( h->fref[i][j]->orig, thresh );
                        thread_mvy_range = X264_MIN( thread_mvy_range, h->fref[i][j]->orig->i_lines_completed - pix_y );
Loren Merritt's avatar
Loren Merritt committed
480
                    }
Dylan Yudaken's avatar
Dylan Yudaken committed
481

Loren Merritt's avatar
Loren Merritt committed
482
483
                if( h->param.b_deterministic )
                    thread_mvy_range = h->param.analyse.i_mv_range_thread;
484
                if( PARAM_INTERLACED )
Loren Merritt's avatar
Loren Merritt committed
485
                    thread_mvy_range >>= 1;
Dylan Yudaken's avatar
Dylan Yudaken committed
486

487
                x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
Loren Merritt's avatar
Loren Merritt committed
488
489
            }

490
            if( PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
            {
                /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
                for( int i = 0; i < 3; i++ )
                {
                    int j = i == 2;
                    mb_y = (h->mb.i_mb_y >> j) + (i == 1);
                    h->mb.mv_miny_row[i] = 4*( -16*mb_y - 24 );
                    h->mb.mv_maxy_row[i] = 4*( 16*( (h->mb.i_mb_height>>j) - mb_y - 1 ) + 24 );
                    h->mb.mv_miny_spel_row[i] = x264_clip3( h->mb.mv_miny_row[i], -i_fmv_range, i_fmv_range );
                    h->mb.mv_maxy_spel_row[i] = CLIP_FMV( h->mb.mv_maxy_row[i] );
                    h->mb.mv_maxy_spel_row[i] = X264_MIN( h->mb.mv_maxy_spel_row[i], thread_mvy_range*4 );
                    h->mb.mv_miny_fpel_row[i] = (h->mb.mv_miny_spel_row[i]>>2) + i_fpel_border;
                    h->mb.mv_maxy_fpel_row[i] = (h->mb.mv_maxy_spel_row[i]>>2) - i_fpel_border;
                }
            }
            else
            {
                h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
                h->mb.mv_max[1] = 4*( 16*( h->mb.i_mb_height - mb_y - 1 ) + 24 );
                h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
                h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
                h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
                h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
                h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
            }
        }
517
        if( PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
518
        {
519
            int i = MB_INTERLACED ? 2 : h->mb.i_mb_y&1;
Simon Horlick's avatar
Simon Horlick committed
520
521
522
523
524
525
            h->mb.mv_min[1] = h->mb.mv_miny_row[i];
            h->mb.mv_max[1] = h->mb.mv_maxy_row[i];
            h->mb.mv_min_spel[1] = h->mb.mv_miny_spel_row[i];
            h->mb.mv_max_spel[1] = h->mb.mv_maxy_spel_row[i];
            h->mb.mv_min_fpel[1] = h->mb.mv_miny_fpel_row[i];
            h->mb.mv_max_fpel[1] = h->mb.mv_maxy_fpel_row[i];
526
        }
527
#undef CLIP_FMV
528

529
        a->l0.me16x16.cost =
530
        a->l0.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
531
        a->l0.i_cost8x8    =
532
533
        a->l0.i_cost16x8   =
        a->l0.i_cost8x16   = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
534
535
        if( h->sh.i_type == SLICE_TYPE_B )
        {
536
            a->l1.me16x16.cost =
537
            a->l1.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
538
539
540
541
542
            a->l1.i_cost8x8    =
            a->i_cost8x8direct[0] =
            a->i_cost8x8direct[1] =
            a->i_cost8x8direct[2] =
            a->i_cost8x8direct[3] =
543
544
            a->l1.i_cost16x8   =
            a->l1.i_cost8x16   =
545
546
547
548
549
            a->i_rd16x16bi     =
            a->i_rd16x16direct =
            a->i_rd8x8bi       =
            a->i_rd16x8bi      =
            a->i_rd8x16bi      =
550
551
552
553
554
            a->i_cost16x16bi   =
            a->i_cost16x16direct =
            a->i_cost8x8bi     =
            a->i_cost16x8bi    =
            a->i_cost8x16bi    = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
555
        }
Fiona Glaser's avatar
Fiona Glaser committed
556
        else if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
557
            for( int i = 0; i < 4; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
558
559
560
561
562
            {
                a->l0.i_cost4x4[i] =
                a->l0.i_cost8x4[i] =
                a->l0.i_cost4x8[i] = COST_MAX;
            }
563
564

        /* Fast intra decision */
565
        if( a->b_early_terminate && h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
566
        {
567
568
            /* Always run in fast-intra mode for subme < 3 */
            if( h->mb.i_subpel_refine > 2 &&
569
              ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
570
571
572
                IS_INTRA( h->mb.i_mb_type_top ) ||
                IS_INTRA( h->mb.i_mb_type_topleft ) ||
                IS_INTRA( h->mb.i_mb_type_topright ) ||
573
                (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref[0][0]->mb_type[h->mb.i_mb_xy] )) ||
574
                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) ) )
575
576
577
578
579
580
            { /* intra is likely */ }
            else
            {
                a->b_fast_intra = 1;
            }
        }
581
        h->mb.b_skip_mc = 0;
Fiona Glaser's avatar
Fiona Glaser committed
582
583
584
585
586
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P &&
            h->mb.i_mb_x >= h->fdec->i_pir_start_col && h->mb.i_mb_x <= h->fdec->i_pir_end_col )
        {
            a->b_force_intra = 1;
            a->b_fast_intra = 0;
587
            a->b_avoid_topright = h->mb.i_mb_x == h->fdec->i_pir_end_col;
Fiona Glaser's avatar
Fiona Glaser committed
588
589
590
        }
        else
            a->b_force_intra = 0;
Laurent Aimar's avatar
Laurent Aimar committed
591
592
593
    }
}

594
595
596
597
598
599
600
601
602
603
604
/* Prediction modes allowed for various combinations of neighbors. */
/* Terminated by a -1. */
/* In order, no neighbors, left, top, top/left, top/left/topleft */
static const int8_t i16x16_mode_available[5][5] =
{
    {I_PRED_16x16_DC_128, -1, -1, -1, -1},
    {I_PRED_16x16_DC_LEFT, I_PRED_16x16_H, -1, -1, -1},
    {I_PRED_16x16_DC_TOP, I_PRED_16x16_V, -1, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, I_PRED_16x16_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
605

606
607
608
609
610
611
612
613
static const int8_t i8x8chroma_mode_available[5][5] =
{
    {I_PRED_CHROMA_DC_128, -1, -1, -1, -1},
    {I_PRED_CHROMA_DC_LEFT, I_PRED_CHROMA_H, -1, -1, -1},
    {I_PRED_CHROMA_DC_TOP, I_PRED_CHROMA_V, -1, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
614

615
static const int8_t i4x4_mode_available[2][5][10] =
Laurent Aimar's avatar
Laurent Aimar committed
616
{
617
618
619
620
621
622
623
624
625
626
627
628
629
630
    {
        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
    },
    {
        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
    }
631
};
632

633
static ALWAYS_INLINE const int8_t *predict_16x16_mode_available( int i_neighbour )
634
635
{
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
636
637
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i16x16_mode_available[idx];
Laurent Aimar's avatar
Laurent Aimar committed
638
639
}

640
static ALWAYS_INLINE const int8_t *predict_8x8chroma_mode_available( int i_neighbour )
Laurent Aimar's avatar
Laurent Aimar committed
641
{
642
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
643
644
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i8x8chroma_mode_available[idx];
Laurent Aimar's avatar
Laurent Aimar committed
645
646
}

647
static ALWAYS_INLINE const int8_t *predict_8x8_mode_available( int force_intra, int i_neighbour, int i )
Laurent Aimar's avatar
Laurent Aimar committed
648
{
649
    int avoid_topright = force_intra && (i&1);
650
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
651
652
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i4x4_mode_available[avoid_topright][idx];
653
654
655
656
657
658
}

static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
{
    int avoid_topright = force_intra && ((i&5) == 5);
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
659
660
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i4x4_mode_available[avoid_topright][idx];
Laurent Aimar's avatar
Laurent Aimar committed
661
662
}

663
664
665
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
{
666
    ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0};
667
668

    if( do_both_dct || h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
669
        h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
Loren Merritt's avatar
Loren Merritt committed
670
    if( do_both_dct || !h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
671
        h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
672
673
}

674
675
/* Reset fenc satd scores cache for psy RD */
static inline void x264_mb_init_fenc_cache( x264_t *h, int b_satd )
676
{
Fiona Glaser's avatar
Fiona Glaser committed
677
678
679
680
    if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
        x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
    if( !h->mb.i_psy_rd )
        return;
681
682
683
684
    /* Writes beyond the end of the array, but not a problem since fenc_satd_cache is right after. */
    h->mc.memzero_aligned( h->mb.pic.fenc_hadamard_cache, sizeof(h->mb.pic.fenc_hadamard_cache) );
    if( b_satd )
        h->mc.memzero_aligned( h->mb.pic.fenc_satd_cache, sizeof(h->mb.pic.fenc_satd_cache) );
685
686
}

687
688
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
{
689
    if( a->i_satd_i8x8chroma < COST_MAX )
690
691
        return;

Fiona Glaser's avatar
Fiona Glaser committed
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
    if( CHROMA444 )
    {
        if( !h->mb.b_chroma_me )
        {
            a->i_satd_i8x8chroma = 0;
            return;
        }

        /* Cheap approximation of chroma costs to avoid a full i4x4/i8x8 analysis. */
        h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[1] );
        h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[2] );
        a->i_satd_i8x8chroma = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE )
                             + h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );
        return;
    }

708
    const int8_t *predict_mode = predict_8x8chroma_mode_available( h->mb.i_neighbour_intra );
709

710
    /* 8x8 prediction selection for chroma */
711
    if( predict_mode[3] >= 0 && !h->mb.b_lossless )
712
    {
713
        int satdu[4], satdv[4];
714
715
716
717
718
719
720
721
        h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[1], h->mb.pic.p_fdec[1], satdu );
        h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[2], h->mb.pic.p_fdec[2], satdv );
        h->predict_8x8c[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[1] );
        h->predict_8x8c[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[2] );
        satdu[I_PRED_CHROMA_P] = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE );
        satdv[I_PRED_CHROMA_P] = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );

        for( ; *predict_mode >= 0; predict_mode++ )
722
        {
723
724
            int i_mode = *predict_mode;
            int i_satd = satdu[i_mode] + satdv[i_mode] + a->i_lambda * bs_size_ue( i_mode );
725

726
            a->i_satd_i8x8chroma_dir[i_mode] = i_satd;
727
728
729
730
731
            COPY2_IF_LT( a->i_satd_i8x8chroma, i_satd, a->i_predict8x8chroma, i_mode );
        }
    }
    else
    {
732
        for( ; *predict_mode >= 0; predict_mode++ )
733
734
        {
            int i_satd;
735
            int i_mode = *predict_mode;
736

737
            /* we do the prediction */
738
739
740
741
            if( h->mb.b_lossless )
                x264_predict_lossless_8x8_chroma( h, i_mode );
            else
            {
742
743
                h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
                h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
744
            }
745

746
            /* we calculate the cost */
747
748
            i_satd = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE ) +
                     h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE ) +
749
                     a->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
750

751
            a->i_satd_i8x8chroma_dir[i_mode] = i_satd;
752
753
            COPY2_IF_LT( a->i_satd_i8x8chroma, i_satd, a->i_predict8x8chroma, i_mode );
        }
754
755
756
757
758
    }

    h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
}

Fiona Glaser's avatar
Fiona Glaser committed
759
/* FIXME: should we do any sort of merged chroma analysis with 4:4:4? */
760
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
Laurent Aimar's avatar
Laurent Aimar committed
761
762
{
    const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
763
764
    pixel *p_src = h->mb.pic.p_fenc[0];
    pixel *p_dst = h->mb.pic.p_fdec[0];
765
766
767
768
769
770
    static const int8_t intra_analysis_shortcut[2][2][2][5] =
    {
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_VL, -1}}},
Anton Mitrofanov's avatar
Anton Mitrofanov committed
771
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
772
773
774
775
          {-1, -1, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDR, I_PRED_4x4_VR, -1, -1, -1}}},
    };
Laurent Aimar's avatar
Laurent Aimar committed
776

777
    int idx;
778
    int lambda = a->i_lambda;
779

Laurent Aimar's avatar
Laurent Aimar committed
780
781
782
    /*---------------- Try all mode and calculate their score ---------------*/

    /* 16x16 prediction selection */
783
    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
784

785
786
787
788
    /* Not heavily tuned */
    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;

789
    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
790
    {
791
        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
792
793
794
795
796
797
798
799
800
        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );

        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
        if( a->i_satd_i16x16 <= i16x16_thresh )
801
        {
802
803
804
805
            h->predict_16x16[I_PRED_16x16_P]( p_dst );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
806
807
808
        }
    }
    else
Laurent Aimar's avatar
Laurent Aimar committed
809
    {
810
        for( ; *predict_mode >= 0; predict_mode++ )
811
812
        {
            int i_satd;
813
            int i_mode = *predict_mode;
814
815

            if( h->mb.b_lossless )
Fiona Glaser's avatar
Fiona Glaser committed
816
                x264_predict_lossless_16x16( h, 0, i_mode );
817
818
            else
                h->predict_16x16[i_mode]( p_dst );
819
820

            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
821
                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
822
823
824
            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
            a->i_satd_i16x16_dir[i_mode] = i_satd;
        }
Laurent Aimar's avatar
Laurent Aimar committed
825
826
    }

827
828
    if( h->sh.i_type == SLICE_TYPE_B )
        /* cavlc mb type prefix */
829
        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
830

831
    if( a->i_satd_i16x16 > i16x16_thresh )
832
        return;
833

834
835
    /* 8x8 prediction selection */
    if( flags & X264_ANALYSE_I8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
836
    {
837
        ALIGNED_ARRAY_32( pixel, edge,[36] );
838
        x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
839
        int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
840
841

        // FIXME some bias like in i4x4?
842
        int i_cost = lambda * 4; /* base predmode costs */
843
        h->mb.i_cbp_luma = 0;
Laurent Aimar's avatar
Laurent Aimar committed
844

845
        if( h->sh.i_type == SLICE_TYPE_B )
846
            i_cost += lambda * i_mb_b_cost_table[I_8x8];
847

848
849
850
851
        for( idx = 0;; idx++ )
        {
            int x = idx&1;
            int y = idx>>1;
852
853
            pixel *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
            pixel *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
854
855
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
856

857
            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
858
            h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
859

860
            if( !h->mb.b_lossless && predict_mode[5] >= 0 )
861
            {
862
                int satd[9];
Fiona Glaser's avatar
Fiona Glaser committed
863
                h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
864
865
                int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
                satd[i_pred_mode] -= 3 * lambda;
866
                for( int i = 2; i >= 0; i-- )
867
                {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
868
869
                    int cost = satd[i];
                    a->i_satd_i8x8_dir[i][idx] = cost + 4 * lambda;
870
871
                    COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i );
                }
872
873
874
875

                /* Take analysis shortcuts: don't analyse modes that are too
                 * far away direction-wise from the favored mode. */
                if( a->i_mbrd < 1 + a->b_fast_intra )
876
                    predict_mode = intra_analysis_shortcut[a->b_avoid_topright][predict_mode[8] >= 0][favor_vertical];
877
878
                else
                    predict_mode += 3;
879
880
            }

881
            for( ; *predict_mode >= 0 && (i_best >= 0 || a->i_mbrd >= 2); predict_mode++ )
Laurent Aimar's avatar
Laurent Aimar committed
882
            {
883
                int i_satd;
884
                int i_mode = *predict_mode;
Laurent Aimar's avatar
Laurent Aimar committed
885

886
                if( h->mb.b_lossless )
Fiona Glaser's avatar
Fiona Glaser committed
887
                    x264_predict_lossless_8x8( h, p_dst_by, 0, idx, i_mode, edge );
888
889
                else
                    h->predict_8x8[i_mode]( p_dst_by, edge );