analyse.c 163 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * analyse.c: macroblock analysis
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Sean McGovern's avatar
Sean McGovern committed
4
 * Copyright (C) 2003-2011 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
23
24
25
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
26
27
 *****************************************************************************/

28
#define _ISOC99_SOURCE
Laurent Aimar's avatar
Laurent Aimar committed
29

30
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
31
32
#include "macroblock.h"
#include "me.h"
33
#include "ratecontrol.h"
34
35
#include "analyse.h"
#include "rdo.c"
Laurent Aimar's avatar
Laurent Aimar committed
36
37
38
39

typedef struct
{
    /* 16x16 */
40
    int       i_rd16x16;
Laurent Aimar's avatar
Laurent Aimar committed
41
    x264_me_t me16x16;
42
    x264_me_t bi16x16;      /* for b16x16 BI mode, since MVs can differ from l0/l1 */
Laurent Aimar's avatar
Laurent Aimar committed
43
44
45

    /* 8x8 */
    int       i_cost8x8;
Håkan Hjort's avatar
Håkan Hjort committed
46
    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
47
    ALIGNED_4( int16_t mvc[32][5][2] );
Laurent Aimar's avatar
Laurent Aimar committed
48
49
50
51
52
53
54
55
56
57
58
59
    x264_me_t me8x8[4];

    /* Sub 4x4 */
    int       i_cost4x4[4]; /* cost per 8x8 partition */
    x264_me_t me4x4[4][4];

    /* Sub 8x4 */
    int       i_cost8x4[4]; /* cost per 8x8 partition */
    x264_me_t me8x4[4][2];

    /* Sub 4x8 */
    int       i_cost4x8[4]; /* cost per 8x8 partition */
Loren Merritt's avatar
Loren Merritt committed
60
    x264_me_t me4x8[4][2];
Laurent Aimar's avatar
Laurent Aimar committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

    /* 16x8 */
    int       i_cost16x8;
    x264_me_t me16x8[2];

    /* 8x16 */
    int       i_cost8x16;
    x264_me_t me8x16[2];

} x264_mb_analysis_list_t;

typedef struct
{
    /* conduct the analysis using this lamda and QP */
    int i_lambda;
76
    int i_lambda2;
Laurent Aimar's avatar
Laurent Aimar committed
77
    int i_qp;
78
    uint16_t *p_cost_mv;
79
    uint16_t *p_cost_ref[2];
80
    int i_mbrd;
Laurent Aimar's avatar
Laurent Aimar committed
81
82
83


    /* I: Intra part */
84
85
    /* Take some shortcuts in intra search if intra is deemed unlikely */
    int b_fast_intra;
Fiona Glaser's avatar
Fiona Glaser committed
86
    int b_force_intra; /* For Periodic Intra Refresh.  Only supported in P-frames. */
87
    int b_avoid_topright; /* For Periodic Intra Refresh: don't predict from top-right pixels. */
88
    int b_try_skip;
89

90
    /* Luma part */
91
92
    int i_satd_i16x16;
    int i_satd_i16x16_dir[7];
Laurent Aimar's avatar
Laurent Aimar committed
93
94
    int i_predict16x16;

95
    int i_satd_i8x8;
96
    int i_cbp_i8x8_luma;
97
98
    int i_satd_i8x8_dir[12][4];
    int i_predict8x8[4];
99

100
101
    int i_satd_i4x4;
    int i_predict4x4[16];
Laurent Aimar's avatar
Laurent Aimar committed
102

103
104
    int i_satd_pcm;

Laurent Aimar's avatar
Laurent Aimar committed
105
    /* Chroma part */
Henrik Gramner's avatar
Henrik Gramner committed
106
107
    int i_satd_chroma;
    int i_satd_chroma_dir[7];
108
    int i_predict8x8chroma;
Laurent Aimar's avatar
Laurent Aimar committed
109
110
111
112
113
114

    /* II: Inter part P/B frame */
    x264_mb_analysis_list_t l0;
    x264_mb_analysis_list_t l1;

    int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
115
116
117
    int i_cost16x16direct;
    int i_cost8x8bi;
    int i_cost8x8direct[4];
118
119
120
    int i_satd8x8[3][4]; /* [L0,L1,BI][8x8 0..3] SATD only */
    int i_cost_est16x8[2]; /* Per-partition estimated cost */
    int i_cost_est8x16[2];
121
122
    int i_cost16x8bi;
    int i_cost8x16bi;
123
124
125
126
127
    int i_rd16x16bi;
    int i_rd16x16direct;
    int i_rd16x8bi;
    int i_rd8x16bi;
    int i_rd8x8bi;
128
129
130
131
132

    int i_mb_partition16x8[2]; /* mb_partition_e */
    int i_mb_partition8x16[2];
    int i_mb_type16x8; /* mb_class_e */
    int i_mb_type8x16;
133
134

    int b_direct_available;
135
    int b_early_terminate;
Laurent Aimar's avatar
Laurent Aimar committed
136
137
138

} x264_mb_analysis_t;

139
/* lambda = pow(2,qp/6-2) */
140
141
const uint16_t x264_lambda_tab[QP_MAX_MAX+1] =
{
142
143
144
145
146
147
148
149
   1,   1,   1,   1,   1,   1,   1,   1, /*  0- 7 */
   1,   1,   1,   1,   1,   1,   1,   1, /*  8-15 */
   2,   2,   2,   2,   3,   3,   3,   4, /* 16-23 */
   4,   4,   5,   6,   6,   7,   8,   9, /* 24-31 */
  10,  11,  13,  14,  16,  18,  20,  23, /* 32-39 */
  25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
  64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
Fiona Glaser's avatar
Fiona Glaser committed
150
151
152
 406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
2048,2299,                               /* 80-81 */
Laurent Aimar's avatar
Laurent Aimar committed
153
154
};

Fiona Glaser's avatar
Fiona Glaser committed
155
/* lambda2 = pow(lambda,2) * .9 * 256 */
Fiona Glaser's avatar
Fiona Glaser committed
156
/* Capped to avoid overflow */
157
158
const int x264_lambda2_tab[QP_MAX_MAX+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
159
160
161
162
163
164
165
166
167
168
169
       14,       18,       22,       28,       36,       45,      57,      72, /*  0- 7 */
       91,      115,      145,      182,      230,      290,     365,     460, /*  8-15 */
      580,      731,      921,     1161,     1462,     1843,    2322,    2925, /* 16-23 */
     3686,     4644,     5851,     7372,     9289,    11703,   14745,   18578, /* 24-31 */
    23407,    29491,    37156,    46814,    58982,    74313,   93628,  117964, /* 32-39 */
   148626,   187257,   235929,   297252,   374514,   471859,  594505,  749029, /* 40-47 */
   943718,  1189010,  1498059,  1887436,  2378021,  2996119, 3774873, 4756042, /* 48-55 */
  5992238,  7549747,  9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
 38048341, 47937906, 60397977, 76096683, 95875813,120795955,                   /* 64-69 */
134217727,134217727,134217727,134217727,134217727,134217727,                   /* 70-75 */
134217727,134217727,134217727,134217727,134217727,134217727,                   /* 76-81 */
170
171
};

172
173
const uint8_t x264_exp2_lut[64] =
{
Anton Mitrofanov's avatar
Anton Mitrofanov committed
174
175
176
177
      0,   3,   6,   8,  11,  14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
     48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98, 102,
    106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
    175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
178
179
};

180
181
const float x264_log2_lut[128] =
{
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
};

/* Avoid an int/float conversion. */
201
202
const float x264_log2_lz_lut[32] =
{
203
204
205
    31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
};

Fiona Glaser's avatar
Fiona Glaser committed
206
207
// should the intra and inter lambdas be different?
// I'm just matching the behaviour of deadzone quant.
208
209
static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
210
    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
Fiona Glaser's avatar
Fiona Glaser committed
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
    {
               46,       58,       73,       92,      117,      147,
              185,      233,      294,      370,      466,      587,
              740,      932,     1174,     1480,     1864,     2349,
             2959,     3728,     4697,     5918,     7457,     9395,
            11837,    14914,    18790,    23674,    29828,    37581,
            47349,    59656,    75163,    94699,   119313,   150326,
           189399,   238627,   300652,   378798,   477255,   601304,
           757596,   954511,  1202608,  1515192,  1909022,  2405217,
          3030384,  3818045,  4810435,  6060769,  7636091,  9620872,
         12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
         48486154, 61088726, 76966972, 96972308,
        122177453,134217727,134217727,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
    },
Fiona Glaser's avatar
Fiona Glaser committed
226
    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
Fiona Glaser's avatar
Fiona Glaser committed
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    {
               27,       34,       43,       54,       68,       86,
              108,      136,      172,      216,      273,      343,
              433,      545,      687,      865,     1090,     1374,
             1731,     2180,     2747,     3461,     4361,     5494,
             6922,     8721,    10988,    13844,    17442,    21976,
            27688,    34885,    43953,    55377,    69771,    87906,
           110755,   139543,   175813,   221511,   279087,   351627,
           443023,   558174,   703255,   886046,  1116348,  1406511,
          1772093,  2232697,  2813022,  3544186,  4465396,  5626046,
          7088374,  8930791, 11252092, 14176748, 17861583, 22504184,
         28353495, 35723165, 45008368, 56706990,
         71446330, 90016736,113413980,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
        134217727,134217727,134217727,134217727,134217727,134217727,
    }
Fiona Glaser's avatar
Fiona Glaser committed
243
244
};

Fiona Glaser's avatar
Fiona Glaser committed
245
#define MAX_CHROMA_LAMBDA_OFFSET 36
246
247
static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] =
{
Fiona Glaser's avatar
Fiona Glaser committed
248
249
250
251
252
253
254
255
256
       16,    20,    25,    32,    40,    50,
       64,    80,   101,   128,   161,   203,
      256,   322,   406,   512,   645,   812,
     1024,  1290,  1625,  2048,  2580,  3250,
     4096,  5160,  6501,  8192, 10321, 13003,
    16384, 20642, 26007, 32768, 41285, 52015,
    65535
};

257
/* TODO: calculate CABAC costs */
258
259
static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] =
{
260
    9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
261
};
262
263
static const uint8_t i_mb_b16x8_cost_table[17] =
{
264
    0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
265
};
266
267
static const uint8_t i_sub_mb_b_cost_table[13] =
{
268
269
    7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
};
270
271
static const uint8_t i_sub_mb_p_cost_table[4] =
{
272
273
    5, 3, 3, 1
};
274

275
276
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );

Fiona Glaser's avatar
Fiona Glaser committed
277
static uint16_t x264_cost_ref[QP_MAX+1][3][33];
278
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
279
static uint16_t x264_cost_i4x4_mode[(QP_MAX+2)*32];
280

281
282
283
284
285
286
287
288
289
290
291
292
float *x264_analyse_prepare_costs( x264_t *h )
{
    float *logs = x264_malloc( (2*4*2048+1)*sizeof(float) );
    if( !logs )
        return NULL;
    logs[0] = 0.718f;
    for( int i = 1; i <= 2*4*2048; i++ )
        logs[i] = log2f(i+1)*2 + 1.718f;
    return logs;
}

int x264_analyse_init_costs( x264_t *h, float *logs, int qp )
293
{
294
    int lambda = x264_lambda_tab[qp];
Fiona Glaser's avatar
Fiona Glaser committed
295
    if( h->cost_mv[qp] )
296
297
        return 0;
    /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
Fiona Glaser's avatar
Fiona Glaser committed
298
299
    CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
    h->cost_mv[qp] += 2*4*2048;
300
    for( int i = 0; i <= 2*4*2048; i++ )
301
    {
Fiona Glaser's avatar
Fiona Glaser committed
302
        h->cost_mv[qp][-i] =
303
        h->cost_mv[qp][i]  = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
304
305
    }
    x264_pthread_mutex_lock( &cost_ref_mutex );
306
307
    for( int i = 0; i < 3; i++ )
        for( int j = 0; j < 33; j++ )
Fiona Glaser's avatar
Fiona Glaser committed
308
            x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
309
    x264_pthread_mutex_unlock( &cost_ref_mutex );
Fiona Glaser's avatar
Fiona Glaser committed
310
    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
311
    {
312
        for( int j = 0; j < 4; j++ )
313
        {
Fiona Glaser's avatar
Fiona Glaser committed
314
315
            CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
            h->cost_mv_fpel[qp][j] += 2*2048;
316
            for( int i = -2*2048; i < 2*2048; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
317
                h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
318
319
        }
    }
320
321
322
    uint16_t *cost_i4x4_mode = (uint16_t*)ALIGN((intptr_t)x264_cost_i4x4_mode,64) + qp*32;
    for( int i = 0; i < 17; i++ )
        cost_i4x4_mode[i] = 3*lambda*(i!=8);
323
324
325
    return 0;
fail:
    return -1;
326
327
}

328
329
void x264_analyse_free_costs( x264_t *h )
{
Fiona Glaser's avatar
Fiona Glaser committed
330
    for( int i = 0; i < QP_MAX+1; i++ )
331
332
333
334
    {
        if( h->cost_mv[i] )
            x264_free( h->cost_mv[i] - 2*4*2048 );
        if( h->cost_mv_fpel[i][0] )
335
            for( int j = 0; j < 4; j++ )
336
337
338
339
                x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
    }
}

340
341
void x264_analyse_weight_frame( x264_t *h, int end )
{
342
    for( int j = 0; j < h->i_ref[0]; j++ )
343
344
345
    {
        if( h->sh.weight[j][0].weightfn )
        {
346
            x264_frame_t *frame = h->fref[0][j];
347
            int width = frame->i_width[0] + 2*PADH;
348
            int i_padv = PADV << PARAM_INTERLACED;
349
            int offset, height;
Fiona Glaser's avatar
Fiona Glaser committed
350
            pixel *src = frame->filtered[0][0] - frame->i_stride[0]*i_padv - PADH;
351
            height = X264_MIN( 16 + end + i_padv, h->fref[0][j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
352
353
354
            offset = h->fenc->i_lines_weighted*frame->i_stride[0];
            h->fenc->i_lines_weighted += height;
            if( height )
355
                for( int k = j; k < h->i_ref[0]; k++ )
356
357
                    if( h->sh.weight[k][0].weightfn )
                    {
358
                        pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
359
360
361
362
363
364
365
366
367
                        x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
                                                 src + offset, frame->i_stride[0],
                                                 width, height, &h->sh.weight[k][0] );
                    }
            break;
        }
    }
}

368
369
370
/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
Fiona Glaser's avatar
Fiona Glaser committed
371
372
373
    a->p_cost_mv = h->cost_mv[a->i_qp];
    a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
    a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
374
375
}

Fiona Glaser's avatar
Fiona Glaser committed
376
static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
Laurent Aimar's avatar
Laurent Aimar committed
377
{
Fiona Glaser's avatar
Fiona Glaser committed
378
379
380
    int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 );
    a->i_lambda = x264_lambda_tab[qp];
    a->i_lambda2 = x264_lambda2_tab[qp];
Fiona Glaser's avatar
Fiona Glaser committed
381
382

    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
Fiona Glaser's avatar
Fiona Glaser committed
383
    if( h->param.analyse.i_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
384
    {
Fiona Glaser's avatar
Fiona Glaser committed
385
386
387
388
        h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][qp];
        h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][qp];
        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
Fiona Glaser's avatar
Fiona Glaser committed
389
390
    }
    h->mb.i_psy_rd_lambda = a->i_lambda;
Fiona Glaser's avatar
Fiona Glaser committed
391
    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
Fiona Glaser's avatar
Fiona Glaser committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
    int chroma_offset_idx = X264_MIN( qp-effective_chroma_qp+12, MAX_CHROMA_LAMBDA_OFFSET );
    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;

    if( qp > QP_MAX_SPEC )
    {
        h->nr_offset = h->nr_offset_emergency[qp-QP_MAX_SPEC-1];
        h->nr_residual_sum = h->nr_residual_sum_buf[1];
        h->nr_count = h->nr_count_buf[1];
        h->mb.b_noise_reduction = 1;
        qp = QP_MAX_SPEC; /* Out-of-spec QPs are just used for calculating lambda values. */
    }
    else
    {
        h->nr_offset = h->nr_offset_denoise;
        h->nr_residual_sum = h->nr_residual_sum_buf[0];
        h->nr_count = h->nr_count_buf[0];
        h->mb.b_noise_reduction = 0;
    }

    a->i_qp = h->mb.i_qp = qp;
    h->mb.i_chroma_qp = h->chroma_qp_table[qp];
Fiona Glaser's avatar
Fiona Glaser committed
413
414
}

Fiona Glaser's avatar
Fiona Glaser committed
415
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
Fiona Glaser's avatar
Fiona Glaser committed
416
{
417
    int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
Fiona Glaser's avatar
Fiona Glaser committed
418
419
420
421

    /* mbrd == 1 -> RD mode decision */
    /* mbrd == 2 -> RD refinement */
    /* mbrd == 3 -> QPRD */
422
    a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
423
    h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
424
    a->b_early_terminate = h->param.analyse.i_subpel_refine < 11;
Fiona Glaser's avatar
Fiona Glaser committed
425

Fiona Glaser's avatar
Fiona Glaser committed
426
    x264_mb_analyse_init_qp( h, a, qp );
Fiona Glaser's avatar
Fiona Glaser committed
427

428
429
    h->mb.b_transform_8x8 = 0;

Laurent Aimar's avatar
Laurent Aimar committed
430
    /* I: Intra part */
431
432
433
    a->i_satd_i16x16 =
    a->i_satd_i8x8   =
    a->i_satd_i4x4   =
Henrik Gramner's avatar
Henrik Gramner committed
434
    a->i_satd_chroma = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
435

Fiona Glaser's avatar
Fiona Glaser committed
436
    /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it */
437
    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX;
438

439
    a->b_fast_intra = 0;
Anton Mitrofanov's avatar
Anton Mitrofanov committed
440
    a->b_avoid_topright = 0;
441
442
    h->mb.i_skip_intra =
        h->mb.b_lossless ? 0 :
443
        a->i_mbrd ? 2 :
444
        !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
445

Laurent Aimar's avatar
Laurent Aimar committed
446
447
448
    /* II: Inter part P/B frame */
    if( h->sh.i_type != SLICE_TYPE_I )
    {
Loren Merritt's avatar
Loren Merritt committed
449
        int i_fmv_range = 4 * h->param.analyse.i_mv_range;
450
451
        // limit motion search to a slightly smaller range than the theoretical limit,
        // since the search may go a few iterations past its given range
452
        int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
Laurent Aimar's avatar
Laurent Aimar committed
453

454
        /* Calculate max allowed MV range */
455
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
456
        h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
Simon Horlick's avatar
Simon Horlick committed
457
        h->mb.mv_max[0] = 4*( 16*( h->mb.i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
Loren Merritt's avatar
Loren Merritt committed
458
459
        h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
        h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
Fiona Glaser's avatar
Fiona Glaser committed
460
461
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P )
        {
462
            int max_x = (h->fref[0][0]->i_pir_end_col * 16 - 3)*4; /* 3 pixels of hpel border */
Fiona Glaser's avatar
Fiona Glaser committed
463
464
465
466
467
            int max_mv = max_x - 4*16*h->mb.i_mb_x;
            /* If we're left of the refresh bar, don't reference right of it. */
            if( max_mv > 0 && h->mb.i_mb_x < h->fdec->i_pir_start_col )
                h->mb.mv_max_spel[0] = X264_MIN( h->mb.mv_max_spel[0], max_mv );
        }
Loren Merritt's avatar
Loren Merritt committed
468
469
        h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
        h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
470
        if( h->mb.i_mb_x == 0 && !(h->mb.i_mb_y & PARAM_INTERLACED) )
471
        {
472
            int mb_y = h->mb.i_mb_y >> SLICE_MBAFF;
Loren Merritt's avatar
Loren Merritt committed
473
474
            int thread_mvy_range = i_fmv_range;

475
            if( h->i_thread_frames > 1 )
Loren Merritt's avatar
Loren Merritt committed
476
            {
477
                int pix_y = (h->mb.i_mb_y | PARAM_INTERLACED) * 16;
Loren Merritt's avatar
Loren Merritt committed
478
                int thresh = pix_y + h->param.analyse.i_mv_range_thread;
479
                for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
480
                    for( int j = 0; j < h->i_ref[i]; j++ )
Loren Merritt's avatar
Loren Merritt committed
481
                    {
482
483
                        x264_frame_cond_wait( h->fref[i][j]->orig, thresh );
                        thread_mvy_range = X264_MIN( thread_mvy_range, h->fref[i][j]->orig->i_lines_completed - pix_y );
Loren Merritt's avatar
Loren Merritt committed
484
                    }
Dylan Yudaken's avatar
Dylan Yudaken committed
485

Loren Merritt's avatar
Loren Merritt committed
486
487
                if( h->param.b_deterministic )
                    thread_mvy_range = h->param.analyse.i_mv_range_thread;
488
                if( PARAM_INTERLACED )
Loren Merritt's avatar
Loren Merritt committed
489
                    thread_mvy_range >>= 1;
Dylan Yudaken's avatar
Dylan Yudaken committed
490

491
                x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
Loren Merritt's avatar
Loren Merritt committed
492
493
            }

494
            if( PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
            {
                /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
                for( int i = 0; i < 3; i++ )
                {
                    int j = i == 2;
                    mb_y = (h->mb.i_mb_y >> j) + (i == 1);
                    h->mb.mv_miny_row[i] = 4*( -16*mb_y - 24 );
                    h->mb.mv_maxy_row[i] = 4*( 16*( (h->mb.i_mb_height>>j) - mb_y - 1 ) + 24 );
                    h->mb.mv_miny_spel_row[i] = x264_clip3( h->mb.mv_miny_row[i], -i_fmv_range, i_fmv_range );
                    h->mb.mv_maxy_spel_row[i] = CLIP_FMV( h->mb.mv_maxy_row[i] );
                    h->mb.mv_maxy_spel_row[i] = X264_MIN( h->mb.mv_maxy_spel_row[i], thread_mvy_range*4 );
                    h->mb.mv_miny_fpel_row[i] = (h->mb.mv_miny_spel_row[i]>>2) + i_fpel_border;
                    h->mb.mv_maxy_fpel_row[i] = (h->mb.mv_maxy_spel_row[i]>>2) - i_fpel_border;
                }
            }
            else
            {
                h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
                h->mb.mv_max[1] = 4*( 16*( h->mb.i_mb_height - mb_y - 1 ) + 24 );
                h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
                h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
                h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
                h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
                h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
            }
        }
521
        if( PARAM_INTERLACED )
Simon Horlick's avatar
Simon Horlick committed
522
        {
523
            int i = MB_INTERLACED ? 2 : h->mb.i_mb_y&1;
Simon Horlick's avatar
Simon Horlick committed
524
525
526
527
528
529
            h->mb.mv_min[1] = h->mb.mv_miny_row[i];
            h->mb.mv_max[1] = h->mb.mv_maxy_row[i];
            h->mb.mv_min_spel[1] = h->mb.mv_miny_spel_row[i];
            h->mb.mv_max_spel[1] = h->mb.mv_maxy_spel_row[i];
            h->mb.mv_min_fpel[1] = h->mb.mv_miny_fpel_row[i];
            h->mb.mv_max_fpel[1] = h->mb.mv_maxy_fpel_row[i];
530
        }
531
#undef CLIP_FMV
532

533
        a->l0.me16x16.cost =
534
        a->l0.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
535
        a->l0.i_cost8x8    =
536
537
        a->l0.i_cost16x8   =
        a->l0.i_cost8x16   = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
538
539
        if( h->sh.i_type == SLICE_TYPE_B )
        {
540
            a->l1.me16x16.cost =
541
            a->l1.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
542
543
544
545
546
            a->l1.i_cost8x8    =
            a->i_cost8x8direct[0] =
            a->i_cost8x8direct[1] =
            a->i_cost8x8direct[2] =
            a->i_cost8x8direct[3] =
547
548
            a->l1.i_cost16x8   =
            a->l1.i_cost8x16   =
549
550
551
552
553
            a->i_rd16x16bi     =
            a->i_rd16x16direct =
            a->i_rd8x8bi       =
            a->i_rd16x8bi      =
            a->i_rd8x16bi      =
554
555
556
557
558
            a->i_cost16x16bi   =
            a->i_cost16x16direct =
            a->i_cost8x8bi     =
            a->i_cost16x8bi    =
            a->i_cost8x16bi    = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
559
        }
Fiona Glaser's avatar
Fiona Glaser committed
560
        else if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
561
            for( int i = 0; i < 4; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
562
563
564
565
566
            {
                a->l0.i_cost4x4[i] =
                a->l0.i_cost8x4[i] =
                a->l0.i_cost4x8[i] = COST_MAX;
            }
567
568

        /* Fast intra decision */
569
        if( a->b_early_terminate && h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
570
        {
571
572
            /* Always run in fast-intra mode for subme < 3 */
            if( h->mb.i_subpel_refine > 2 &&
573
              ( IS_INTRA( h->mb.i_mb_type_left[0] ) ||
574
575
576
                IS_INTRA( h->mb.i_mb_type_top ) ||
                IS_INTRA( h->mb.i_mb_type_topleft ) ||
                IS_INTRA( h->mb.i_mb_type_topright ) ||
577
                (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref[0][0]->mb_type[h->mb.i_mb_xy] )) ||
578
                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) ) )
579
580
581
582
583
584
            { /* intra is likely */ }
            else
            {
                a->b_fast_intra = 1;
            }
        }
585
        h->mb.b_skip_mc = 0;
Fiona Glaser's avatar
Fiona Glaser committed
586
587
588
589
590
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P &&
            h->mb.i_mb_x >= h->fdec->i_pir_start_col && h->mb.i_mb_x <= h->fdec->i_pir_end_col )
        {
            a->b_force_intra = 1;
            a->b_fast_intra = 0;
591
            a->b_avoid_topright = h->mb.i_mb_x == h->fdec->i_pir_end_col;
Fiona Glaser's avatar
Fiona Glaser committed
592
593
594
        }
        else
            a->b_force_intra = 0;
Laurent Aimar's avatar
Laurent Aimar committed
595
596
597
    }
}

598
599
600
601
602
603
604
605
606
607
608
/* Prediction modes allowed for various combinations of neighbors. */
/* Terminated by a -1. */
/* In order, no neighbors, left, top, top/left, top/left/topleft */
static const int8_t i16x16_mode_available[5][5] =
{
    {I_PRED_16x16_DC_128, -1, -1, -1, -1},
    {I_PRED_16x16_DC_LEFT, I_PRED_16x16_H, -1, -1, -1},
    {I_PRED_16x16_DC_TOP, I_PRED_16x16_V, -1, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, I_PRED_16x16_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
609

Henrik Gramner's avatar
Henrik Gramner committed
610
static const int8_t chroma_mode_available[5][5] =
611
612
613
614
615
616
617
{
    {I_PRED_CHROMA_DC_128, -1, -1, -1, -1},
    {I_PRED_CHROMA_DC_LEFT, I_PRED_CHROMA_H, -1, -1, -1},
    {I_PRED_CHROMA_DC_TOP, I_PRED_CHROMA_V, -1, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
618

619
static const int8_t i4x4_mode_available[2][5][10] =
Laurent Aimar's avatar
Laurent Aimar committed
620
{
621
622
623
624
625
626
627
628
629
630
631
632
633
634
    {
        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
    },
    {
        {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, -1, -1, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1},
        {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
    }
635
};
636

637
static ALWAYS_INLINE const int8_t *predict_16x16_mode_available( int i_neighbour )
638
639
{
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
640
641
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i16x16_mode_available[idx];
Laurent Aimar's avatar
Laurent Aimar committed
642
643
}

Henrik Gramner's avatar
Henrik Gramner committed
644
static ALWAYS_INLINE const int8_t *predict_chroma_mode_available( int i_neighbour )
Laurent Aimar's avatar
Laurent Aimar committed
645
{
646
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
647
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
Henrik Gramner's avatar
Henrik Gramner committed
648
    return chroma_mode_available[idx];
Laurent Aimar's avatar
Laurent Aimar committed
649
650
}

651
static ALWAYS_INLINE const int8_t *predict_8x8_mode_available( int force_intra, int i_neighbour, int i )
Laurent Aimar's avatar
Laurent Aimar committed
652
{
653
    int avoid_topright = force_intra && (i&1);
654
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
655
656
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i4x4_mode_available[avoid_topright][idx];
657
658
659
660
661
662
}

static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, int i_neighbour, int i )
{
    int avoid_topright = force_intra && ((i&5) == 5);
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
663
664
    idx = (idx == (MB_TOP|MB_LEFT|MB_TOPLEFT)) ? 4 : idx & (MB_TOP|MB_LEFT);
    return i4x4_mode_available[avoid_topright][idx];
Laurent Aimar's avatar
Laurent Aimar committed
665
666
}

667
668
669
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
{
670
    ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0};
671
672

    if( do_both_dct || h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
673
        h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
Loren Merritt's avatar
Loren Merritt committed
674
    if( do_both_dct || !h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
675
        h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
676
677
}

678
679
/* Reset fenc satd scores cache for psy RD */
static inline void x264_mb_init_fenc_cache( x264_t *h, int b_satd )
680
{
Fiona Glaser's avatar
Fiona Glaser committed
681
682
683
684
    if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
        x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
    if( !h->mb.i_psy_rd )
        return;
685
686
687
688
    /* Writes beyond the end of the array, but not a problem since fenc_satd_cache is right after. */
    h->mc.memzero_aligned( h->mb.pic.fenc_hadamard_cache, sizeof(h->mb.pic.fenc_hadamard_cache) );
    if( b_satd )
        h->mc.memzero_aligned( h->mb.pic.fenc_satd_cache, sizeof(h->mb.pic.fenc_satd_cache) );
689
690
}

691
692
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
{
Henrik Gramner's avatar
Henrik Gramner committed
693
    if( a->i_satd_chroma < COST_MAX )
694
695
        return;

Fiona Glaser's avatar
Fiona Glaser committed
696
697
698
699
    if( CHROMA444 )
    {
        if( !h->mb.b_chroma_me )
        {
Henrik Gramner's avatar
Henrik Gramner committed
700
            a->i_satd_chroma = 0;
Fiona Glaser's avatar
Fiona Glaser committed
701
702
703
704
            return;
        }

        /* Cheap approximation of chroma costs to avoid a full i4x4/i8x8 analysis. */
705
706
707
708
709
710
711
712
713
714
        if( h->mb.b_lossless )
        {
            x264_predict_lossless_16x16( h, 1, a->i_predict16x16 );
            x264_predict_lossless_16x16( h, 2, a->i_predict16x16 );
        }
        else
        {
            h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[1] );
            h->predict_16x16[a->i_predict16x16]( h->mb.pic.p_fdec[2] );
        }
Henrik Gramner's avatar
Henrik Gramner committed
715
716
        a->i_satd_chroma = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE )
                         + h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );
Fiona Glaser's avatar
Fiona Glaser committed
717
718
719
        return;
    }

Henrik Gramner's avatar
Henrik Gramner committed
720
721
    const int8_t *predict_mode = predict_chroma_mode_available( h->mb.i_neighbour_intra );
    int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
722

Henrik Gramner's avatar
Henrik Gramner committed
723
    /* Prediction selection for chroma */
724
    if( predict_mode[3] >= 0 && !h->mb.b_lossless )
725
    {
726
        int satdu[4], satdv[4];
Henrik Gramner's avatar
Henrik Gramner committed
727
728
729
730
731
732
        h->pixf.intra_mbcmp_x3_chroma( h->mb.pic.p_fenc[1], h->mb.pic.p_fdec[1], satdu );
        h->pixf.intra_mbcmp_x3_chroma( h->mb.pic.p_fenc[2], h->mb.pic.p_fdec[2], satdv );
        h->predict_chroma[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[1] );
        h->predict_chroma[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[2] );
        satdu[I_PRED_CHROMA_P] = h->pixf.mbcmp[chromapix]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE );
        satdv[I_PRED_CHROMA_P] = h->pixf.mbcmp[chromapix]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );
733
734

        for( ; *predict_mode >= 0; predict_mode++ )
735
        {
736
737
            int i_mode = *predict_mode;
            int i_satd = satdu[i_mode] + satdv[i_mode] + a->i_lambda * bs_size_ue( i_mode );
738

Henrik Gramner's avatar
Henrik Gramner committed
739
740
            a->i_satd_chroma_dir[i_mode] = i_satd;
            COPY2_IF_LT( a->i_satd_chroma, i_satd, a->i_predict8x8chroma, i_mode );
741
742
743
744
        }
    }
    else
    {
745
        for( ; *predict_mode >= 0; predict_mode++ )
746
747
        {
            int i_satd;
748
            int i_mode = *predict_mode;
749

750
            /* we do the prediction */
751
            if( h->mb.b_lossless )
Henrik Gramner's avatar
Henrik Gramner committed
752
                x264_predict_lossless_chroma( h, i_mode );
753
754
            else
            {
Henrik Gramner's avatar
Henrik Gramner committed
755
756
                h->predict_chroma[i_mode]( h->mb.pic.p_fdec[1] );
                h->predict_chroma[i_mode]( h->mb.pic.p_fdec[2] );
757
            }
758

759
            /* we calculate the cost */
Henrik Gramner's avatar
Henrik Gramner committed
760
761
762
            i_satd = h->pixf.mbcmp[chromapix]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE ) +
                     h->pixf.mbcmp[chromapix]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE ) +
                     a->i_lambda * bs_size_ue( x264_mb_chroma_pred_mode_fix[i_mode] );
763

Henrik Gramner's avatar
Henrik Gramner committed
764
765
            a->i_satd_chroma_dir[i_mode] = i_satd;
            COPY2_IF_LT( a->i_satd_chroma, i_satd, a->i_predict8x8chroma, i_mode );
766
        }
767
768
769
770
771
    }

    h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
}

Fiona Glaser's avatar
Fiona Glaser committed
772
/* FIXME: should we do any sort of merged chroma analysis with 4:4:4? */
773
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
Laurent Aimar's avatar
Laurent Aimar committed
774
775
{
    const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
776
777
    pixel *p_src = h->mb.pic.p_fenc[0];
    pixel *p_dst = h->mb.pic.p_fdec[0];
778
779
780
781
782
783
    static const int8_t intra_analysis_shortcut[2][2][2][5] =
    {
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_VL, -1}}},
Anton Mitrofanov's avatar
Anton Mitrofanov committed
784
        {{{I_PRED_4x4_HU, -1, -1, -1, -1},
785
786
787
788
          {-1, -1, -1, -1, -1}},
         {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1, -1},
          {I_PRED_4x4_DDR, I_PRED_4x4_VR, -1, -1, -1}}},
    };
Laurent Aimar's avatar
Laurent Aimar committed
789

790
    int idx;
791
    int lambda = a->i_lambda;
792

Laurent Aimar's avatar
Laurent Aimar committed
793
794
795
    /*---------------- Try all mode and calculate their score ---------------*/

    /* 16x16 prediction selection */
796
    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
797

798
799
800
801
    /* Not heavily tuned */
    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;

802
    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
803
    {
804
        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
805
806
807
808
809
810
811
812
813
        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );

        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
        if( a->i_satd_i16x16 <= i16x16_thresh )
814
        {
815
816
817
818
            h->predict_16x16[I_PRED_16x16_P]( p_dst );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
819
820
821
        }
    }
    else
Laurent Aimar's avatar
Laurent Aimar committed
822
    {
823
        for( ; *predict_mode >= 0; predict_mode++ )
824
825
        {
            int i_satd;
826
            int i_mode = *predict_mode;
827
828

            if( h->mb.b_lossless )
Fiona Glaser's avatar
Fiona Glaser committed
829
                x264_predict_lossless_16x16( h, 0, i_mode );
830
831
            else
                h->predict_16x16[i_mode]( p_dst );
832
833

            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
834
                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
835
836
837
            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
            a->i_satd_i16x16_dir[i_mode] = i_satd;
        }
Laurent Aimar's avatar
Laurent Aimar committed
838
839
    }

840
841
    if( h->sh.i_type == SLICE_TYPE_B )
        /* cavlc mb type prefix */
842
        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
843

844
    if( a->i_satd_i16x16 > i16x16_thresh )
845
        return;
846

847
848
    /* 8x8 prediction selection */
    if( flags & X264_ANALYSE_I8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
849
    {
850
        ALIGNED_ARRAY_32( pixel, edge,[36] );
851
        x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
852
        int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
853
854

        // FIXME some bias like in i4x4?
855
        int i_cost = lambda * 4; /* base predmode costs */
856
        h->mb.i_cbp_luma = 0;
Laurent Aimar's avatar
Laurent Aimar committed
857

858
        if( h->sh.i_type == SLICE_TYPE_B )
859
            i_cost += lambda * i_mb_b_cost_table[I_8x8];
860

861
862
863
864
        for( idx = 0;; idx++ )
        {
            int x = idx&1;
            int y = idx>>1;
865
866
            pixel *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
            pixel *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
867
868
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
869

870
            predict_mode = predict_8x8_mode_available( a->b_avoid_topright, h->mb.i_neighbour8[idx], idx );
871
            h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
872

873
            if( !h->mb.b_lossless && predict_mode[5] >= 0 )
874
            {
875
                int satd[9];
Fiona Glaser's avatar
Fiona Glaser committed
876
                h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
877
878
                int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
                satd[i_pred_mode] -= 3 * lambda;
879
                for( int i = 2; i >= 0; i-- )
880
                {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
881
882
                    int cost = satd[i];
                    a->i_satd_i8x8_dir[i][idx] = cost + 4 * lambda;
883
884
                    COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i );
                }
885
886
887
888

                /* Take analysis shortcuts: don't analyse modes that are too
                 * far away direction-wise from the favored mode. */
                if( a->i_mbrd < 1 + a->b_fast_intra )