analyse.c 142 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
2
3
/*****************************************************************************
 * analyse.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5
6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23
24
 *****************************************************************************/

25
#define _ISOC99_SOURCE
Laurent Aimar's avatar
Laurent Aimar committed
26
#include <math.h>
Loren Merritt's avatar
Loren Merritt committed
27
#include <unistd.h>
Laurent Aimar's avatar
Laurent Aimar committed
28

29
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
30
31
#include "macroblock.h"
#include "me.h"
32
#include "ratecontrol.h"
33
34
#include "analyse.h"
#include "rdo.c"
Laurent Aimar's avatar
Laurent Aimar committed
35
36
37
38

typedef struct
{
    /* 16x16 */
39
    int       i_rd16x16;
Laurent Aimar's avatar
Laurent Aimar committed
40
    x264_me_t me16x16;
41
    x264_me_t bi16x16;      /* for b16x16 BI mode, since MVs can differ from l0/l1 */
Laurent Aimar's avatar
Laurent Aimar committed
42
43
44

    /* 8x8 */
    int       i_cost8x8;
Håkan Hjort's avatar
Håkan Hjort committed
45
    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
46
    ALIGNED_4( int16_t mvc[32][5][2] );
Laurent Aimar's avatar
Laurent Aimar committed
47
48
49
50
51
52
53
54
55
56
57
58
    x264_me_t me8x8[4];

    /* Sub 4x4 */
    int       i_cost4x4[4]; /* cost per 8x8 partition */
    x264_me_t me4x4[4][4];

    /* Sub 8x4 */
    int       i_cost8x4[4]; /* cost per 8x8 partition */
    x264_me_t me8x4[4][2];

    /* Sub 4x8 */
    int       i_cost4x8[4]; /* cost per 8x8 partition */
Loren Merritt's avatar
Loren Merritt committed
59
    x264_me_t me4x8[4][2];
Laurent Aimar's avatar
Laurent Aimar committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

    /* 16x8 */
    int       i_cost16x8;
    x264_me_t me16x8[2];

    /* 8x16 */
    int       i_cost8x16;
    x264_me_t me8x16[2];

} x264_mb_analysis_list_t;

typedef struct
{
    /* conduct the analysis using this lamda and QP */
    int i_lambda;
75
    int i_lambda2;
Laurent Aimar's avatar
Laurent Aimar committed
76
    int i_qp;
77
    uint16_t *p_cost_mv;
78
    uint16_t *p_cost_ref[2];
79
    int i_mbrd;
Laurent Aimar's avatar
Laurent Aimar committed
80
81
82


    /* I: Intra part */
83
84
    /* Take some shortcuts in intra search if intra is deemed unlikely */
    int b_fast_intra;
Fiona Glaser's avatar
Fiona Glaser committed
85
    int b_force_intra; /* For Periodic Intra Refresh.  Only supported in P-frames. */
86
    int b_try_skip;
87

88
    /* Luma part */
89
90
    int i_satd_i16x16;
    int i_satd_i16x16_dir[7];
Laurent Aimar's avatar
Laurent Aimar committed
91
92
    int i_predict16x16;

93
    int i_satd_i8x8;
94
    int i_cbp_i8x8_luma;
95
96
    int i_satd_i8x8_dir[12][4];
    int i_predict8x8[4];
97

98
99
    int i_satd_i4x4;
    int i_predict4x4[16];
Laurent Aimar's avatar
Laurent Aimar committed
100

101
102
    int i_satd_pcm;

Laurent Aimar's avatar
Laurent Aimar committed
103
    /* Chroma part */
104
    int i_satd_i8x8chroma;
105
    int i_satd_i8x8chroma_dir[7];
106
    int i_predict8x8chroma;
Laurent Aimar's avatar
Laurent Aimar committed
107
108
109
110
111
112

    /* II: Inter part P/B frame */
    x264_mb_analysis_list_t l0;
    x264_mb_analysis_list_t l1;

    int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
113
114
115
    int i_cost16x16direct;
    int i_cost8x8bi;
    int i_cost8x8direct[4];
116
117
118
    int i_satd8x8[3][4]; /* [L0,L1,BI][8x8 0..3] SATD only */
    int i_cost_est16x8[2]; /* Per-partition estimated cost */
    int i_cost_est8x16[2];
119
120
    int i_cost16x8bi;
    int i_cost8x16bi;
121
122
123
124
125
    int i_rd16x16bi;
    int i_rd16x16direct;
    int i_rd16x8bi;
    int i_rd8x16bi;
    int i_rd8x8bi;
126
127
128
129
130

    int i_mb_partition16x8[2]; /* mb_partition_e */
    int i_mb_partition8x16[2];
    int i_mb_type16x8; /* mb_class_e */
    int i_mb_type8x16;
131
132

    int b_direct_available;
Laurent Aimar's avatar
Laurent Aimar committed
133
134
135

} x264_mb_analysis_t;

136
/* lambda = pow(2,qp/6-2) */
137
const uint8_t x264_lambda_tab[52] = {
Laurent Aimar's avatar
Laurent Aimar committed
138
139
140
141
142
143
144
145
146
   1, 1, 1, 1, 1, 1, 1, 1,  /*  0-7 */
   1, 1, 1, 1,              /*  8-11 */
   1, 1, 1, 1, 2, 2, 2, 2,  /* 12-19 */
   3, 3, 3, 4, 4, 4, 5, 6,  /* 20-27 */
   6, 7, 8, 9,10,11,13,14,  /* 28-35 */
  16,18,20,23,25,29,32,36,  /* 36-43 */
  40,45,51,57,64,72,81,91   /* 44-51 */
};

Fiona Glaser's avatar
Fiona Glaser committed
147
/* lambda2 = pow(lambda,2) * .9 * 256 */
148
const int x264_lambda2_tab[52] = {
Fiona Glaser's avatar
Fiona Glaser committed
149
150
151
152
153
154
155
    14,      18,      22,      28,     36,     45,     57,     72, /*  0 -  7 */
    91,     115,     145,     182,    230,    290,    365,    460, /*  8 - 15 */
   580,     731,     921,    1161,   1462,   1843,   2322,   2925, /* 16 - 23 */
  3686,    4644,    5851,    7372,   9289,  11703,  14745,  18578, /* 24 - 31 */
 23407,   29491,   37156,   46814,  58982,  74313,  93628, 117964, /* 32 - 39 */
148626,  187257,  235929,  297252, 374514, 471859, 594505, 749029, /* 40 - 47 */
943718, 1189010, 1498059, 1887436                                  /* 48 - 51 */
156
157
};

158
const uint8_t x264_exp2_lut[64] = {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
159
160
161
162
      0,   3,   6,   8,  11,  14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
     48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98, 102,
    106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
    175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
};

const float x264_log2_lut[128] = {
    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
};

/* Avoid an int/float conversion. */
const float x264_log2_lz_lut[32] = {
    31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
};

Fiona Glaser's avatar
Fiona Glaser committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
// should the intra and inter lambdas be different?
// I'm just matching the behaviour of deadzone quant.
static const int x264_trellis_lambda2_tab[2][52] = {
    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
    {    46,      58,      73,      92,     117,     147,
        185,     233,     294,     370,     466,     587,
        740,     932,    1174,    1480,    1864,    2349,
       2959,    3728,    4697,    5918,    7457,    9395,
      11837,   14914,   18790,   23674,   29828,   37581,
      47349,   59656,   75163,   94699,  119313,  150326,
     189399,  238627,  300652,  378798,  477255,  601304,
     757596,  954511, 1202608, 1515192, 1909022, 2405217,
    3030384, 3818045, 4810435, 6060769 },
    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
    {    27,      34,      43,      54,      68,      86,
        108,     136,     172,     216,     273,     343,
        433,     545,     687,     865,    1090,    1374,
       1731,    2180,    2747,    3461,    4361,    5494,
       6922,    8721,   10988,   13844,   17442,   21976,
      27688,   34885,   43953,   55377,   69771,   87906,
     110755,  139543,  175813,  221511,  279087,  351627,
     443023,  558174,  703255,  886046, 1116348, 1406511,
    1772093, 2232697, 2813022, 3544186 }
};

static const uint16_t x264_chroma_lambda2_offset_tab[] = {
       16,    20,    25,    32,    40,    50,
       64,    80,   101,   128,   161,   203,
      256,   322,   406,   512,   645,   812,
     1024,  1290,  1625,  2048,  2580,  3250,
     4096,  5160,  6501,  8192, 10321, 13003,
    16384, 20642, 26007, 32768, 41285, 52015,
    65535
};

224
/* TODO: calculate CABAC costs */
225
static const uint8_t i_mb_b_cost_table[X264_MBTYPE_MAX] = {
226
    9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
227
};
228
static const uint8_t i_mb_b16x8_cost_table[17] = {
229
    0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
230
};
231
static const uint8_t i_sub_mb_b_cost_table[13] = {
232
233
    7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
};
234
static const uint8_t i_sub_mb_p_cost_table[4] = {
235
236
    5, 3, 3, 1
};
237

238
239
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );

240
static uint16_t x264_cost_ref[92][3][33];
241
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
242

243
int x264_analyse_init_costs( x264_t *h, int qp )
244
{
245
246
247
248
249
250
    int lambda = x264_lambda_tab[qp];
    if( h->cost_mv[lambda] )
        return 0;
    /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
    CHECKED_MALLOC( h->cost_mv[lambda], (4*4*2048 + 1) * sizeof(uint16_t) );
    h->cost_mv[lambda] += 2*4*2048;
251
    for( int i = 0; i <= 2*4*2048; i++ )
252
253
254
255
256
    {
        h->cost_mv[lambda][-i] =
        h->cost_mv[lambda][i]  = lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
    }
    x264_pthread_mutex_lock( &cost_ref_mutex );
257
258
    for( int i = 0; i < 3; i++ )
        for( int j = 0; j < 33; j++ )
259
260
261
            x264_cost_ref[lambda][i][j] = i ? lambda * bs_size_te( i, j ) : 0;
    x264_pthread_mutex_unlock( &cost_ref_mutex );
    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[lambda][0] )
262
    {
263
        for( int j = 0; j < 4; j++ )
264
        {
265
266
            CHECKED_MALLOC( h->cost_mv_fpel[lambda][j], (4*2048 + 1) * sizeof(uint16_t) );
            h->cost_mv_fpel[lambda][j] += 2*2048;
267
            for( int i = -2*2048; i < 2*2048; i++ )
268
                h->cost_mv_fpel[lambda][j][i] = h->cost_mv[lambda][i*4+j];
269
270
        }
    }
271
272
273
    return 0;
fail:
    return -1;
274
275
}

276
277
void x264_analyse_free_costs( x264_t *h )
{
278
    for( int i = 0; i < 92; i++ )
279
280
281
282
    {
        if( h->cost_mv[i] )
            x264_free( h->cost_mv[i] - 2*4*2048 );
        if( h->cost_mv_fpel[i][0] )
283
            for( int j = 0; j < 4; j++ )
284
285
286
287
                x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
    }
}

288
289
void x264_analyse_weight_frame( x264_t *h, int end )
{
290
    for( int j = 0; j < h->i_ref0; j++ )
291
292
293
294
295
296
297
    {
        if( h->sh.weight[j][0].weightfn )
        {
            x264_frame_t *frame = h->fref0[j];
            int width = frame->i_width[0] + 2*PADH;
            int i_padv = PADV << h->param.b_interlaced;
            int offset, height;
298
            pixel *src = frame->filtered[0] - frame->i_stride[0]*i_padv - PADH;
299
300
301
302
            height = X264_MIN( 16 + end + i_padv, h->fref0[j]->i_lines[0] + i_padv*2 ) - h->fenc->i_lines_weighted;
            offset = h->fenc->i_lines_weighted*frame->i_stride[0];
            h->fenc->i_lines_weighted += height;
            if( height )
303
                for( int k = j; k < h->i_ref0; k++ )
304
305
                    if( h->sh.weight[k][0].weightfn )
                    {
306
                        pixel *dst = h->fenc->weighted[k] - h->fenc->i_stride[0]*i_padv - PADH;
307
308
309
310
311
312
313
314
315
                        x264_weight_scale_plane( h, dst + offset, frame->i_stride[0],
                                                 src + offset, frame->i_stride[0],
                                                 width, height, &h->sh.weight[k][0] );
                    }
            break;
        }
    }
}

316
317
318
319
/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
    a->p_cost_mv = h->cost_mv[a->i_lambda];
320
321
    a->p_cost_ref[0] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
    a->p_cost_ref[1] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
322
323
}

Fiona Glaser's avatar
Fiona Glaser committed
324
static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
325
326
{
    /* conduct the analysis using this lamda and QP */
327
    a->i_qp = h->mb.i_qp = i_qp;
328
    h->mb.i_chroma_qp = h->chroma_qp_table[i_qp];
Fiona Glaser's avatar
Fiona Glaser committed
329

330
331
    a->i_lambda = x264_lambda_tab[i_qp];
    a->i_lambda2 = x264_lambda2_tab[i_qp];
Fiona Glaser's avatar
Fiona Glaser committed
332
333

    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
Fiona Glaser's avatar
Fiona Glaser committed
334
    if( h->param.analyse.i_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
335
336
337
338
339
340
341
    {
        h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][h->mb.i_qp];
        h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][h->mb.i_qp];
        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][h->mb.i_chroma_qp];
        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
    }
    h->mb.i_psy_rd_lambda = a->i_lambda;
Fiona Glaser's avatar
Fiona Glaser committed
342
343
    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
Fiona Glaser's avatar
Fiona Glaser committed
344
345
346
347
}

static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
{
348
    int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
Fiona Glaser's avatar
Fiona Glaser committed
349
350
351
352

    /* mbrd == 1 -> RD mode decision */
    /* mbrd == 2 -> RD refinement */
    /* mbrd == 3 -> QPRD */
353
    a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
Fiona Glaser's avatar
Fiona Glaser committed
354
355
356

    x264_mb_analyse_init_qp( h, a, i_qp );

357
    h->mb.b_transform_8x8 = 0;
358
    h->mb.b_noise_reduction = 0;
359

Laurent Aimar's avatar
Laurent Aimar committed
360
    /* I: Intra part */
361
362
363
364
    a->i_satd_i16x16 =
    a->i_satd_i8x8   =
    a->i_satd_i4x4   =
    a->i_satd_i8x8chroma = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
365

Fiona Glaser's avatar
Fiona Glaser committed
366
    /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it */
367
    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX;
368

369
    a->b_fast_intra = 0;
370
371
    h->mb.i_skip_intra =
        h->mb.b_lossless ? 0 :
372
        a->i_mbrd ? 2 :
373
        !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
374

Laurent Aimar's avatar
Laurent Aimar committed
375
376
377
    /* II: Inter part P/B frame */
    if( h->sh.i_type != SLICE_TYPE_I )
    {
Loren Merritt's avatar
Loren Merritt committed
378
        int i_fmv_range = 4 * h->param.analyse.i_mv_range;
379
380
        // limit motion search to a slightly smaller range than the theoretical limit,
        // since the search may go a few iterations past its given range
381
        int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
Laurent Aimar's avatar
Laurent Aimar committed
382

383
        /* Calculate max allowed MV range */
384
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
385
386
        h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
        h->mb.mv_max[0] = 4*( 16*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
Loren Merritt's avatar
Loren Merritt committed
387
388
        h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
        h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
Fiona Glaser's avatar
Fiona Glaser committed
389
390
391
392
393
394
395
396
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P )
        {
            int max_x = (h->fref0[0]->i_pir_end_col * 16 - 3)*4; /* 3 pixels of hpel border */
            int max_mv = max_x - 4*16*h->mb.i_mb_x;
            /* If we're left of the refresh bar, don't reference right of it. */
            if( max_mv > 0 && h->mb.i_mb_x < h->fdec->i_pir_start_col )
                h->mb.mv_max_spel[0] = X264_MIN( h->mb.mv_max_spel[0], max_mv );
        }
Loren Merritt's avatar
Loren Merritt committed
397
398
        h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
        h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
399
        if( h->mb.i_mb_x == 0 )
400
        {
401
402
            int mb_y = h->mb.i_mb_y >> h->sh.b_mbaff;
            int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
Loren Merritt's avatar
Loren Merritt committed
403
404
            int thread_mvy_range = i_fmv_range;

405
            if( h->i_thread_frames > 1 )
Loren Merritt's avatar
Loren Merritt committed
406
407
408
            {
                int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
                int thresh = pix_y + h->param.analyse.i_mv_range_thread;
409
                for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
Loren Merritt's avatar
Loren Merritt committed
410
411
412
                {
                    x264_frame_t **fref = i ? h->fref1 : h->fref0;
                    int i_ref = i ? h->i_ref1 : h->i_ref0;
413
                    for( int j = 0; j < i_ref; j++ )
Loren Merritt's avatar
Loren Merritt committed
414
                    {
Dylan Yudaken's avatar
Dylan Yudaken committed
415
                        x264_frame_cond_wait( fref[j]->orig, thresh );
416
                        thread_mvy_range = X264_MIN( thread_mvy_range, fref[j]->orig->i_lines_completed - pix_y );
Loren Merritt's avatar
Loren Merritt committed
417
418
                    }
                }
Dylan Yudaken's avatar
Dylan Yudaken committed
419

Loren Merritt's avatar
Loren Merritt committed
420
421
422
423
                if( h->param.b_deterministic )
                    thread_mvy_range = h->param.analyse.i_mv_range_thread;
                if( h->mb.b_interlaced )
                    thread_mvy_range >>= 1;
Dylan Yudaken's avatar
Dylan Yudaken committed
424

425
                x264_analyse_weight_frame( h, pix_y + thread_mvy_range );
Loren Merritt's avatar
Loren Merritt committed
426
427
            }

428
429
            h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
            h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
430
            h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
Loren Merritt's avatar
Loren Merritt committed
431
432
433
434
            h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
            h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
            h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
            h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
435
        }
436
#undef CLIP_FMV
437

438
        a->l0.me16x16.cost =
439
        a->l0.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
440
        a->l0.i_cost8x8    =
441
442
        a->l0.i_cost16x8   =
        a->l0.i_cost8x16   = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
443
444
        if( h->sh.i_type == SLICE_TYPE_B )
        {
445
            a->l1.me16x16.cost =
446
            a->l1.i_rd16x16    =
Fiona Glaser's avatar
Fiona Glaser committed
447
448
449
450
451
            a->l1.i_cost8x8    =
            a->i_cost8x8direct[0] =
            a->i_cost8x8direct[1] =
            a->i_cost8x8direct[2] =
            a->i_cost8x8direct[3] =
452
453
            a->l1.i_cost16x8   =
            a->l1.i_cost8x16   =
454
455
456
457
458
            a->i_rd16x16bi     =
            a->i_rd16x16direct =
            a->i_rd8x8bi       =
            a->i_rd16x8bi      =
            a->i_rd8x16bi      =
459
460
461
462
463
            a->i_cost16x16bi   =
            a->i_cost16x16direct =
            a->i_cost8x8bi     =
            a->i_cost16x8bi    =
            a->i_cost8x16bi    = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
464
        }
Fiona Glaser's avatar
Fiona Glaser committed
465
        else if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
466
            for( int i = 0; i < 4; i++ )
Fiona Glaser's avatar
Fiona Glaser committed
467
468
469
470
471
            {
                a->l0.i_cost4x4[i] =
                a->l0.i_cost8x4[i] =
                a->l0.i_cost4x8[i] = COST_MAX;
            }
472
473

        /* Fast intra decision */
Loren Merritt's avatar
Loren Merritt committed
474
        if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
475
        {
476
477
478
479
480
481
482
483
            /* Always run in fast-intra mode for subme < 3 */
            if( h->mb.i_subpel_refine > 2 &&
              ( IS_INTRA( h->mb.i_mb_type_left ) ||
                IS_INTRA( h->mb.i_mb_type_top ) ||
                IS_INTRA( h->mb.i_mb_type_topleft ) ||
                IS_INTRA( h->mb.i_mb_type_topright ) ||
                (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] )) ||
                (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) ) )
484
485
486
487
488
489
            { /* intra is likely */ }
            else
            {
                a->b_fast_intra = 1;
            }
        }
490
        h->mb.b_skip_mc = 0;
Fiona Glaser's avatar
Fiona Glaser committed
491
492
493
494
495
496
497
498
        if( h->param.b_intra_refresh && h->sh.i_type == SLICE_TYPE_P &&
            h->mb.i_mb_x >= h->fdec->i_pir_start_col && h->mb.i_mb_x <= h->fdec->i_pir_end_col )
        {
            a->b_force_intra = 1;
            a->b_fast_intra = 0;
        }
        else
            a->b_force_intra = 0;
Laurent Aimar's avatar
Laurent Aimar committed
499
500
501
    }
}

502
503
504
505
506
507
508
509
510
511
512
/* Prediction modes allowed for various combinations of neighbors. */
/* Terminated by a -1. */
/* In order, no neighbors, left, top, top/left, top/left/topleft */
static const int8_t i16x16_mode_available[5][5] =
{
    {I_PRED_16x16_DC_128, -1, -1, -1, -1},
    {I_PRED_16x16_DC_LEFT, I_PRED_16x16_H, -1, -1, -1},
    {I_PRED_16x16_DC_TOP, I_PRED_16x16_V, -1, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, -1, -1},
    {I_PRED_16x16_V, I_PRED_16x16_H, I_PRED_16x16_DC, I_PRED_16x16_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
513

514
515
516
517
518
519
520
521
static const int8_t i8x8chroma_mode_available[5][5] =
{
    {I_PRED_CHROMA_DC_128, -1, -1, -1, -1},
    {I_PRED_CHROMA_DC_LEFT, I_PRED_CHROMA_H, -1, -1, -1},
    {I_PRED_CHROMA_DC_TOP, I_PRED_CHROMA_V, -1, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, -1, -1},
    {I_PRED_CHROMA_V, I_PRED_CHROMA_H, I_PRED_CHROMA_DC, I_PRED_CHROMA_P, -1},
};
Laurent Aimar's avatar
Laurent Aimar committed
522

523
static const int8_t i4x4_mode_available[5][10] =
Laurent Aimar's avatar
Laurent Aimar committed
524
{
525
526
527
528
529
530
531
    {I_PRED_4x4_DC_128, -1, -1, -1, -1, -1, -1, -1, -1, -1},
    {I_PRED_4x4_DC_LEFT, I_PRED_4x4_H, I_PRED_4x4_HU, -1, -1, -1, -1, -1, -1, -1},
    {I_PRED_4x4_DC_TOP, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, -1, -1, -1, -1, -1, -1},
    {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_VL, I_PRED_4x4_HU, -1, -1, -1, -1},
    {I_PRED_4x4_DC, I_PRED_4x4_H, I_PRED_4x4_V, I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_HD, I_PRED_4x4_VL, I_PRED_4x4_HU, -1},
};

532
static ALWAYS_INLINE const int8_t *predict_16x16_mode_available( int i_neighbour )
533
534
535
{
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
    return i16x16_mode_available[(idx&MB_TOPLEFT)?4:idx];
Laurent Aimar's avatar
Laurent Aimar committed
536
537
}

538
static ALWAYS_INLINE const int8_t *predict_8x8chroma_mode_available( int i_neighbour )
Laurent Aimar's avatar
Laurent Aimar committed
539
{
540
541
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
    return i8x8chroma_mode_available[(idx&MB_TOPLEFT)?4:idx];
Laurent Aimar's avatar
Laurent Aimar committed
542
543
}

544
static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int i_neighbour )
Laurent Aimar's avatar
Laurent Aimar committed
545
{
546
547
    int idx = i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT);
    return i4x4_mode_available[(idx&MB_TOPLEFT)?4:idx];
Laurent Aimar's avatar
Laurent Aimar committed
548
549
}

550
551
552
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
{
553
    ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0};
554
555

    if( do_both_dct || h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
556
        h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
Loren Merritt's avatar
Loren Merritt committed
557
    if( do_both_dct || !h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
558
        h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
559
560
}

561
562
/* Reset fenc satd scores cache for psy RD */
static inline void x264_mb_init_fenc_cache( x264_t *h, int b_satd )
563
{
Fiona Glaser's avatar
Fiona Glaser committed
564
565
566
567
    if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
        x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
    if( !h->mb.i_psy_rd )
        return;
568
569
570
571
    /* Writes beyond the end of the array, but not a problem since fenc_satd_cache is right after. */
    h->mc.memzero_aligned( h->mb.pic.fenc_hadamard_cache, sizeof(h->mb.pic.fenc_hadamard_cache) );
    if( b_satd )
        h->mc.memzero_aligned( h->mb.pic.fenc_satd_cache, sizeof(h->mb.pic.fenc_satd_cache) );
572
573
}

574
575
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
{
576
    if( a->i_satd_i8x8chroma < COST_MAX )
577
578
        return;

579
    const int8_t *predict_mode = predict_8x8chroma_mode_available( h->mb.i_neighbour_intra );
580

581
    /* 8x8 prediction selection for chroma */
582
    if( predict_mode[3] >= 0 && !h->mb.b_lossless )
583
    {
584
        int satdu[4], satdv[4];
585
586
587
588
589
590
591
592
        h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[1], h->mb.pic.p_fdec[1], satdu );
        h->pixf.intra_mbcmp_x3_8x8c( h->mb.pic.p_fenc[2], h->mb.pic.p_fdec[2], satdv );
        h->predict_8x8c[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[1] );
        h->predict_8x8c[I_PRED_CHROMA_P]( h->mb.pic.p_fdec[2] );
        satdu[I_PRED_CHROMA_P] = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE );
        satdv[I_PRED_CHROMA_P] = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE );

        for( ; *predict_mode >= 0; predict_mode++ )
593
        {
594
595
            int i_mode = *predict_mode;
            int i_satd = satdu[i_mode] + satdv[i_mode] + a->i_lambda * bs_size_ue( i_mode );
596

597
            a->i_satd_i8x8chroma_dir[i_mode] = i_satd;
598
599
600
601
602
            COPY2_IF_LT( a->i_satd_i8x8chroma, i_satd, a->i_predict8x8chroma, i_mode );
        }
    }
    else
    {
603
        for( ; *predict_mode >= 0; predict_mode++ )
604
605
        {
            int i_satd;
606
            int i_mode = *predict_mode;
607

608
            /* we do the prediction */
609
610
611
612
            if( h->mb.b_lossless )
                x264_predict_lossless_8x8_chroma( h, i_mode );
            else
            {
613
614
                h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
                h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
615
            }
616

617
            /* we calculate the cost */
618
619
            i_satd = h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE ) +
                     h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE ) +
620
                     a->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
621

622
            a->i_satd_i8x8chroma_dir[i_mode] = i_satd;
623
624
            COPY2_IF_LT( a->i_satd_i8x8chroma, i_satd, a->i_predict8x8chroma, i_mode );
        }
625
626
627
628
629
    }

    h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
}

630
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
Laurent Aimar's avatar
Laurent Aimar committed
631
632
{
    const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
633
634
    pixel *p_src = h->mb.pic.p_fenc[0];
    pixel *p_dst = h->mb.pic.p_fdec[0];
635
636
637
638
639
    static const int8_t intra_analysis_shortcut[2][2][5] =
    {{{I_PRED_4x4_HU, -1},
      {I_PRED_4x4_DDL, I_PRED_4x4_VL, -1}},
     {{I_PRED_4x4_DDR, I_PRED_4x4_HD, I_PRED_4x4_HU, -1},
      {I_PRED_4x4_DDL, I_PRED_4x4_DDR, I_PRED_4x4_VR, I_PRED_4x4_VL, -1}}};
Laurent Aimar's avatar
Laurent Aimar committed
640

641
    int idx;
642
    int lambda = a->i_lambda;
643

Laurent Aimar's avatar
Laurent Aimar committed
644
645
646
    /*---------------- Try all mode and calculate their score ---------------*/

    /* 16x16 prediction selection */
647
    const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
648

649
650
651
652
    /* Not heavily tuned */
    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;

653
    if( !h->mb.b_lossless && predict_mode[3] >= 0 )
654
    {
655
        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
656
657
658
659
660
661
662
663
664
        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );

        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
        if( a->i_satd_i16x16 <= i16x16_thresh )
665
        {
666
667
668
669
            h->predict_16x16[I_PRED_16x16_P]( p_dst );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
670
671
672
        }
    }
    else
Laurent Aimar's avatar
Laurent Aimar committed
673
    {
674
        for( ; *predict_mode >= 0; predict_mode++ )
675
676
        {
            int i_satd;
677
            int i_mode = *predict_mode;
678
679
680
681
682

            if( h->mb.b_lossless )
                x264_predict_lossless_16x16( h, i_mode );
            else
                h->predict_16x16[i_mode]( p_dst );
683
684

            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
685
                     lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
686
687
688
            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
            a->i_satd_i16x16_dir[i_mode] = i_satd;
        }
Laurent Aimar's avatar
Laurent Aimar committed
689
690
    }

691
692
    if( h->sh.i_type == SLICE_TYPE_B )
        /* cavlc mb type prefix */
693
        a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
694

695
    if( a->i_satd_i16x16 > i16x16_thresh )
696
        return;
697

698
699
    /* 8x8 prediction selection */
    if( flags & X264_ANALYSE_I8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
700
    {
701
        ALIGNED_ARRAY_16( pixel, edge,[33] );
702
        x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
703
        int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
704
705

        // FIXME some bias like in i4x4?
706
        int i_cost = lambda * 4; /* base predmode costs */
707
        h->mb.i_cbp_luma = 0;
Laurent Aimar's avatar
Laurent Aimar committed
708

709
        if( h->sh.i_type == SLICE_TYPE_B )
710
            i_cost += lambda * i_mb_b_cost_table[I_8x8];
711

712
713
714
715
        for( idx = 0;; idx++ )
        {
            int x = idx&1;
            int y = idx>>1;
716
717
            pixel *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
            pixel *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
718
719
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
720

721
            predict_mode = predict_4x4_mode_available( h->mb.i_neighbour8[idx] );
722
            h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
723

724
            if( !h->mb.b_lossless && predict_mode[5] >= 0 )
725
            {
726
                int satd[9];
Fiona Glaser's avatar
Fiona Glaser committed
727
                h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
728
729
                int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
                satd[i_pred_mode] -= 3 * lambda;
730
                for( int i = 2; i >= 0; i-- )
731
                {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
732
733
                    int cost = satd[i];
                    a->i_satd_i8x8_dir[i][idx] = cost + 4 * lambda;
734
735
                    COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i );
                }
736
737
738
739
740
741
742

                /* Take analysis shortcuts: don't analyse modes that are too
                 * far away direction-wise from the favored mode. */
                if( a->i_mbrd < 1 + a->b_fast_intra )
                    predict_mode = intra_analysis_shortcut[predict_mode[8] >= 0][favor_vertical];
                else
                    predict_mode += 3;
743
744
            }

745
            for( ; *predict_mode >= 0 && (i_best >= 0 || a->i_mbrd >= 2); predict_mode++ )
Laurent Aimar's avatar
Laurent Aimar committed
746
            {
747
                int i_satd;
748
                int i_mode = *predict_mode;
Laurent Aimar's avatar
Laurent Aimar committed
749

750
751
752
753
                if( h->mb.b_lossless )
                    x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );
                else
                    h->predict_8x8[i_mode]( p_dst_by, edge );
Laurent Aimar's avatar
Laurent Aimar committed
754

755
                i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE );
756
                if( i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) )
757
                    i_satd -= 3 * lambda;
Laurent Aimar's avatar
Laurent Aimar committed
758

759
                COPY2_IF_LT( i_best, i_satd, a->i_predict8x8[idx], i_mode );
760
                a->i_satd_i8x8_dir[i_mode][idx] = i_satd + 4 * lambda;
Laurent Aimar's avatar
Laurent Aimar committed
761
            }
762
            i_cost += i_best + 3 * lambda;
763
764
765

            if( idx == 3 || i_cost > i_satd_thresh )
                break;
Laurent Aimar's avatar
Laurent Aimar committed
766

767
            /* we need to encode this block now (for next ones) */
768
            h->predict_8x8[a->i_predict8x8[idx]]( p_dst_by, edge );
769
            x264_mb_encode_i8x8( h, idx, a->i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
770

771
            x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] );
772
773
        }

774
        if( idx == 3 )
775
        {
776
            a->i_satd_i8x8 = i_cost;
777
778
779
            if( h->mb.i_skip_intra )
            {
                h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
Fiona Glaser's avatar
Fiona Glaser committed
780
781
782
783
                h->mb.pic.i8x8_nnz_buf[0] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] );
                h->mb.pic.i8x8_nnz_buf[1] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
                h->mb.pic.i8x8_nnz_buf[2] = M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] );
                h->mb.pic.i8x8_nnz_buf[3] = M32( &h->mb.cache.non_zero_count[x264_scan8[10]] );
784
                h->mb.pic.i8x8_cbp = h->mb.i_cbp_luma;
785
786
787
788
                if( h->mb.i_skip_intra == 2 )
                    h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) );
            }
        }
789
790
        else
        {
791
            static const uint16_t cost_div_fix8[3] = {1024,512,341};
792
            a->i_satd_i8x8 = COST_MAX;
793
            i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
Laurent Aimar's avatar
Laurent Aimar committed
794
        }
795
        /* Not heavily tuned */
796
        static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
797
        if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
798
            return;
Laurent Aimar's avatar
Laurent Aimar committed
799
    }
800

801
802
    /* 4x4 prediction selection */
    if( flags & X264_ANALYSE_I4x4 )
803
    {
804
        int i_cost = lambda * (24+16); /* 24from JVT (SATD0), 16 from base predmode costs */
805
        int i_satd_thresh = X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 );
806
        h->mb.i_cbp_luma = 0;
807

808
        if( a->i_mbrd )
809
810
811
            i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8;

        if( h->sh.i_type == SLICE_TYPE_B )
812
            i_cost += lambda * i_mb_b_cost_table[I_4x4];
813
814

        for( idx = 0;; idx++ )
815
        {
816
817
            pixel *p_src_by = p_src + block_idx_xy_fenc[idx];
            pixel *p_dst_by = p_dst + block_idx_xy_fdec[idx];
818
819
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
820

821
            predict_mode = predict_4x4_mode_available( h->mb.i_neighbour4[idx] );
822

823
824
            if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                /* emulate missing topright samples */
825
                MPIXEL_X4( &p_dst_by[4 - FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst_by[3 - FDEC_STRIDE] );
826

827
            if( !h->mb.b_lossless && predict_mode[5] >= 0 )
828
            {
829
                int satd[9];
830
                h->pixf.intra_mbcmp_x3_4x4( p_src_by, p_dst_by, satd );
831
832
                int favor_vertical = satd[I_PRED_4x4_H] > satd[I_PRED_4x4_V];
                satd[i_pred_mode] -= 3 * lambda;
833
                for( int i = 2; i >= 0; i-- )
834
                    COPY2_IF_LT( i_best, satd[i], a->i_predict4x4[idx], i );
835
836
837
838
839
840
841

                /* Take analysis shortcuts: don't analyse modes that are too
                 * far away direction-wise from the favored mode. */
                if( a->i_mbrd < 1 + a->b_fast_intra )
                    predict_mode = intra_analysis_shortcut[predict_mode[8] >= 0][favor_vertical];
                else
                    predict_mode += 3;
842
843
            }

844
            if( i_best > 0 )
845
            {
846
847
848
849
                for( ; *predict_mode >= 0; predict_mode++ )
                {
                    int i_satd;
                    int i_mode = *predict_mode;
850

851
852
853
854
                    if( h->mb.b_lossless )
                        x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );
                    else
                        h->predict_4x4[i_mode]( p_dst_by );
855

856
857
858
                    i_satd = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE );
                    if( i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) )
                    {
859
                        i_satd -= lambda * 3;
860
861
862
863
864
865
866
                        if( i_satd <= 0 )
                        {
                            i_best = i_satd;
                            a->i_predict4x4[idx] = i_mode;
                            break;
                        }
                    }
867

868
869
                    COPY2_IF_LT( i_best, i_satd, a->i_predict4x4[idx], i_mode );
                }
870
            }
871
            i_cost += i_best + 3 * lambda;
872

873
874
            if( i_cost > i_satd_thresh || idx == 15 )
                break;
875

Loren Merritt's avatar