analyse.c 118 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * analyse.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7
 *          Loren Merritt <lorenm@u.washington.edu>
8
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23 24
 *****************************************************************************/

25
#define _ISOC99_SOURCE
Laurent Aimar's avatar
Laurent Aimar committed
26
#include <math.h>
Loren Merritt's avatar
Loren Merritt committed
27
#include <unistd.h>
Laurent Aimar's avatar
Laurent Aimar committed
28

29
#include "common/common.h"
30
#include "common/cpu.h"
Laurent Aimar's avatar
Laurent Aimar committed
31 32
#include "macroblock.h"
#include "me.h"
33
#include "ratecontrol.h"
34 35
#include "analyse.h"
#include "rdo.c"
Laurent Aimar's avatar
Laurent Aimar committed
36 37 38 39 40

typedef struct
{
    /* 16x16 */
    int i_ref;
41
    int       i_rd16x16;
Laurent Aimar's avatar
Laurent Aimar committed
42 43 44 45
    x264_me_t me16x16;

    /* 8x8 */
    int       i_cost8x8;
Håkan Hjort's avatar
Håkan Hjort committed
46
    /* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
47
    ALIGNED_4( int16_t mvc[32][5][2] );
Laurent Aimar's avatar
Laurent Aimar committed
48 49 50 51 52 53 54 55 56 57 58 59
    x264_me_t me8x8[4];

    /* Sub 4x4 */
    int       i_cost4x4[4]; /* cost per 8x8 partition */
    x264_me_t me4x4[4][4];

    /* Sub 8x4 */
    int       i_cost8x4[4]; /* cost per 8x8 partition */
    x264_me_t me8x4[4][2];

    /* Sub 4x8 */
    int       i_cost4x8[4]; /* cost per 8x8 partition */
Loren Merritt's avatar
Loren Merritt committed
60
    x264_me_t me4x8[4][2];
Laurent Aimar's avatar
Laurent Aimar committed
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

    /* 16x8 */
    int       i_cost16x8;
    x264_me_t me16x8[2];

    /* 8x16 */
    int       i_cost8x16;
    x264_me_t me8x16[2];

} x264_mb_analysis_list_t;

typedef struct
{
    /* conduct the analysis using this lamda and QP */
    int i_lambda;
76
    int i_lambda2;
Laurent Aimar's avatar
Laurent Aimar committed
77
    int i_qp;
78
    uint16_t *p_cost_mv;
79 80
    uint16_t *p_cost_ref0;
    uint16_t *p_cost_ref1;
81
    int i_mbrd;
Laurent Aimar's avatar
Laurent Aimar committed
82 83 84


    /* I: Intra part */
85 86
    /* Take some shortcuts in intra search if intra is deemed unlikely */
    int b_fast_intra;
87
    int b_try_pskip;
88

89
    /* Luma part */
90 91
    int i_satd_i16x16;
    int i_satd_i16x16_dir[7];
Laurent Aimar's avatar
Laurent Aimar committed
92 93
    int i_predict16x16;

94
    int i_satd_i8x8;
95
    int i_cbp_i8x8_luma;
96 97
    int i_satd_i8x8_dir[12][4];
    int i_predict8x8[4];
98

99 100
    int i_satd_i4x4;
    int i_predict4x4[16];
Laurent Aimar's avatar
Laurent Aimar committed
101

102 103
    int i_satd_pcm;

Laurent Aimar's avatar
Laurent Aimar committed
104
    /* Chroma part */
105
    int i_satd_i8x8chroma;
106
    int i_satd_i8x8chroma_dir[4];
107
    int i_predict8x8chroma;
Laurent Aimar's avatar
Laurent Aimar committed
108 109 110 111 112 113

    /* II: Inter part P/B frame */
    x264_mb_analysis_list_t l0;
    x264_mb_analysis_list_t l1;

    int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
114 115 116
    int i_cost16x16direct;
    int i_cost8x8bi;
    int i_cost8x8direct[4];
117 118
    int i_cost16x8bi;
    int i_cost8x16bi;
119 120 121 122 123
    int i_rd16x16bi;
    int i_rd16x16direct;
    int i_rd16x8bi;
    int i_rd8x16bi;
    int i_rd8x8bi;
124 125 126 127 128

    int i_mb_partition16x8[2]; /* mb_partition_e */
    int i_mb_partition8x16[2];
    int i_mb_type16x8; /* mb_class_e */
    int i_mb_type8x16;
129 130

    int b_direct_available;
Laurent Aimar's avatar
Laurent Aimar committed
131 132 133

} x264_mb_analysis_t;

134
/* lambda = pow(2,qp/6-2) */
135
const int x264_lambda_tab[52] = {
Laurent Aimar's avatar
Laurent Aimar committed
136 137 138 139 140 141 142 143 144
   1, 1, 1, 1, 1, 1, 1, 1,  /*  0-7 */
   1, 1, 1, 1,              /*  8-11 */
   1, 1, 1, 1, 2, 2, 2, 2,  /* 12-19 */
   3, 3, 3, 4, 4, 4, 5, 6,  /* 20-27 */
   6, 7, 8, 9,10,11,13,14,  /* 28-35 */
  16,18,20,23,25,29,32,36,  /* 36-43 */
  40,45,51,57,64,72,81,91   /* 44-51 */
};

Fiona Glaser's avatar
Fiona Glaser committed
145
/* lambda2 = pow(lambda,2) * .9 * 256 */
146
const int x264_lambda2_tab[52] = {
Fiona Glaser's avatar
Fiona Glaser committed
147 148 149 150 151 152 153
    14,      18,      22,      28,     36,     45,     57,     72, /*  0 -  7 */
    91,     115,     145,     182,    230,    290,    365,    460, /*  8 - 15 */
   580,     731,     921,    1161,   1462,   1843,   2322,   2925, /* 16 - 23 */
  3686,    4644,    5851,    7372,   9289,  11703,  14745,  18578, /* 24 - 31 */
 23407,   29491,   37156,   46814,  58982,  74313,  93628, 117964, /* 32 - 39 */
148626,  187257,  235929,  297252, 374514, 471859, 594505, 749029, /* 40 - 47 */
943718, 1189010, 1498059, 1887436                                  /* 48 - 51 */
154 155
};

156
const uint8_t x264_exp2_lut[64] = {
Anton Mitrofanov's avatar
Anton Mitrofanov committed
157 158 159 160
      0,   3,   6,   8,  11,  14,  17,  20,  23,  26,  29,  32,  36,  39,  42,  45,
     48,  52,  55,  58,  62,  65,  69,  72,  76,  80,  83,  87,  91,  94,  98, 102,
    106, 110, 114, 118, 122, 126, 130, 135, 139, 143, 147, 152, 156, 161, 165, 170,
    175, 179, 184, 189, 194, 198, 203, 208, 214, 219, 224, 229, 234, 240, 245, 250
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
};

const float x264_log2_lut[128] = {
    0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
    0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
    0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
    0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
    0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
    0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
    0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
    0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
    0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
    0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
    0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
    0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
    0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
    0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
    0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
    0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
};

/* Avoid an int/float conversion. */
const float x264_log2_lz_lut[32] = {
    31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
};

Fiona Glaser's avatar
Fiona Glaser committed
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
// should the intra and inter lambdas be different?
// I'm just matching the behaviour of deadzone quant.
static const int x264_trellis_lambda2_tab[2][52] = {
    // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
    {    46,      58,      73,      92,     117,     147,
        185,     233,     294,     370,     466,     587,
        740,     932,    1174,    1480,    1864,    2349,
       2959,    3728,    4697,    5918,    7457,    9395,
      11837,   14914,   18790,   23674,   29828,   37581,
      47349,   59656,   75163,   94699,  119313,  150326,
     189399,  238627,  300652,  378798,  477255,  601304,
     757596,  954511, 1202608, 1515192, 1909022, 2405217,
    3030384, 3818045, 4810435, 6060769 },
    // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
    {    27,      34,      43,      54,      68,      86,
        108,     136,     172,     216,     273,     343,
        433,     545,     687,     865,    1090,    1374,
       1731,    2180,    2747,    3461,    4361,    5494,
       6922,    8721,   10988,   13844,   17442,   21976,
      27688,   34885,   43953,   55377,   69771,   87906,
     110755,  139543,  175813,  221511,  279087,  351627,
     443023,  558174,  703255,  886046, 1116348, 1406511,
    1772093, 2232697, 2813022, 3544186 }
};

static const uint16_t x264_chroma_lambda2_offset_tab[] = {
       16,    20,    25,    32,    40,    50,
       64,    80,   101,   128,   161,   203,
      256,   322,   406,   512,   645,   812,
     1024,  1290,  1625,  2048,  2580,  3250,
     4096,  5160,  6501,  8192, 10321, 13003,
    16384, 20642, 26007, 32768, 41285, 52015,
    65535
};

222
/* TODO: calculate CABAC costs */
Loren Merritt's avatar
Loren Merritt committed
223
static const int i_mb_b_cost_table[X264_MBTYPE_MAX] = {
224
    9, 9, 9, 9, 0, 0, 0, 1, 3, 7, 7, 7, 3, 7, 7, 7, 5, 9, 0
225
};
226
static const int i_mb_b16x8_cost_table[17] = {
227
    0, 0, 0, 0, 0, 0, 0, 0, 5, 7, 7, 7, 5, 7, 9, 9, 9
228 229 230 231
};
static const int i_sub_mb_b_cost_table[13] = {
    7, 5, 5, 3, 7, 5, 7, 3, 7, 7, 7, 5, 1
};
232 233 234
static const int i_sub_mb_p_cost_table[4] = {
    5, 3, 3, 1
};
235

236 237
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );

238 239
static uint16_t x264_cost_ref[92][3][33];
static x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
240

241
int x264_analyse_init_costs( x264_t *h, int qp )
242
{
243
    int i, j;
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
    int lambda = x264_lambda_tab[qp];
    if( h->cost_mv[lambda] )
        return 0;
    /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
    CHECKED_MALLOC( h->cost_mv[lambda], (4*4*2048 + 1) * sizeof(uint16_t) );
    h->cost_mv[lambda] += 2*4*2048;
    for( i = 0; i <= 2*4*2048; i++ )
    {
        h->cost_mv[lambda][-i] =
        h->cost_mv[lambda][i]  = lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
    }
    x264_pthread_mutex_lock( &cost_ref_mutex );
    for( i = 0; i < 3; i++ )
        for( j = 0; j < 33; j++ )
            x264_cost_ref[lambda][i][j] = i ? lambda * bs_size_te( i, j ) : 0;
    x264_pthread_mutex_unlock( &cost_ref_mutex );
    if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[lambda][0] )
261 262 263
    {
        for( j=0; j<4; j++ )
        {
264 265
            CHECKED_MALLOC( h->cost_mv_fpel[lambda][j], (4*2048 + 1) * sizeof(uint16_t) );
            h->cost_mv_fpel[lambda][j] += 2*2048;
266
            for( i = -2*2048; i < 2*2048; i++ )
267
                h->cost_mv_fpel[lambda][j][i] = h->cost_mv[lambda][i*4+j];
268 269
        }
    }
270 271 272
    return 0;
fail:
    return -1;
273 274
}

275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
void x264_analyse_free_costs( x264_t *h )
{
    int i, j;
    for( i = 0; i < 92; i++ )
    {
        if( h->cost_mv[i] )
            x264_free( h->cost_mv[i] - 2*4*2048 );
        if( h->cost_mv_fpel[i][0] )
            for( j = 0; j < 4; j++ )
                x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
    }
}

/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
    a->p_cost_mv = h->cost_mv[a->i_lambda];
    a->p_cost_ref0 = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
    a->p_cost_ref1 = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
}

Laurent Aimar's avatar
Laurent Aimar committed
296 297
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
{
298
    int i = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
Fiona Glaser's avatar
Fiona Glaser committed
299

300 301
    /* mbrd == 1 -> RD mode decision */
    /* mbrd == 2 -> RD refinement */
Fiona Glaser's avatar
Fiona Glaser committed
302 303 304
    /* mbrd == 3 -> QPRD */
    a->i_mbrd = (i>=6) + (i>=8) + (h->param.analyse.i_subpel_refine>=10);

Laurent Aimar's avatar
Laurent Aimar committed
305
    /* conduct the analysis using this lamda and QP */
306
    a->i_qp = h->mb.i_qp = i_qp;
307
    h->mb.i_chroma_qp = h->chroma_qp_table[i_qp];
Fiona Glaser's avatar
Fiona Glaser committed
308

309 310
    a->i_lambda = x264_lambda_tab[i_qp];
    a->i_lambda2 = x264_lambda2_tab[i_qp];
Fiona Glaser's avatar
Fiona Glaser committed
311 312

    h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
Fiona Glaser's avatar
Fiona Glaser committed
313
    if( h->param.analyse.i_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
314 315 316 317 318 319 320
    {
        h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][h->mb.i_qp];
        h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][h->mb.i_qp];
        h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][h->mb.i_chroma_qp];
        h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
    }
    h->mb.i_psy_rd_lambda = a->i_lambda;
Fiona Glaser's avatar
Fiona Glaser committed
321 322
    /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
    h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
Fiona Glaser's avatar
Fiona Glaser committed
323

324
    h->mb.i_me_method = h->param.analyse.i_me_method;
Loren Merritt's avatar
Loren Merritt committed
325 326 327
    h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
    h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
                        && h->mb.i_subpel_refine >= 5;
Fiona Glaser's avatar
Fiona Glaser committed
328

329
    h->mb.b_transform_8x8 = 0;
330
    h->mb.b_noise_reduction = 0;
331

Laurent Aimar's avatar
Laurent Aimar committed
332
    /* I: Intra part */
333 334 335 336
    a->i_satd_i16x16 =
    a->i_satd_i8x8   =
    a->i_satd_i4x4   =
    a->i_satd_i8x8chroma = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
337

Fiona Glaser's avatar
Fiona Glaser committed
338
    /* non-RD PCM decision is inaccurate (as is psy-rd), so don't do it */
339
    a->i_satd_pcm = !h->mb.i_psy_rd && a->i_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX;
340

341
    a->b_fast_intra = 0;
342 343
    h->mb.i_skip_intra =
        h->mb.b_lossless ? 0 :
344
        a->i_mbrd ? 2 :
345
        !h->param.analyse.i_trellis && !h->param.analyse.i_noise_reduction;
346

Laurent Aimar's avatar
Laurent Aimar committed
347 348 349
    /* II: Inter part P/B frame */
    if( h->sh.i_type != SLICE_TYPE_I )
    {
Loren Merritt's avatar
Loren Merritt committed
350 351
        int i, j;
        int i_fmv_range = 4 * h->param.analyse.i_mv_range;
352 353
        // limit motion search to a slightly smaller range than the theoretical limit,
        // since the search may go a few iterations past its given range
354
        int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
Laurent Aimar's avatar
Laurent Aimar committed
355

356
        /* Calculate max allowed MV range */
357
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
358 359
        h->mb.mv_min[0] = 4*( -16*h->mb.i_mb_x - 24 );
        h->mb.mv_max[0] = 4*( 16*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 24 );
Loren Merritt's avatar
Loren Merritt committed
360 361 362 363
        h->mb.mv_min_spel[0] = CLIP_FMV( h->mb.mv_min[0] );
        h->mb.mv_max_spel[0] = CLIP_FMV( h->mb.mv_max[0] );
        h->mb.mv_min_fpel[0] = (h->mb.mv_min_spel[0]>>2) + i_fpel_border;
        h->mb.mv_max_fpel[0] = (h->mb.mv_max_spel[0]>>2) - i_fpel_border;
364 365
        if( h->mb.i_mb_x == 0)
        {
366 367
            int mb_y = h->mb.i_mb_y >> h->sh.b_mbaff;
            int mb_height = h->sps->i_mb_height >> h->sh.b_mbaff;
Loren Merritt's avatar
Loren Merritt committed
368 369 370 371 372 373 374 375 376 377 378 379
            int thread_mvy_range = i_fmv_range;

            if( h->param.i_threads > 1 )
            {
                int pix_y = (h->mb.i_mb_y | h->mb.b_interlaced) * 16;
                int thresh = pix_y + h->param.analyse.i_mv_range_thread;
                for( i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- )
                {
                    x264_frame_t **fref = i ? h->fref1 : h->fref0;
                    int i_ref = i ? h->i_ref1 : h->i_ref0;
                    for( j=0; j<i_ref; j++ )
                    {
380
                        x264_frame_cond_wait( fref[j], thresh );
Loren Merritt's avatar
Loren Merritt committed
381 382 383 384 385 386 387 388 389
                        thread_mvy_range = X264_MIN( thread_mvy_range, fref[j]->i_lines_completed - pix_y );
                    }
                }
                if( h->param.b_deterministic )
                    thread_mvy_range = h->param.analyse.i_mv_range_thread;
                if( h->mb.b_interlaced )
                    thread_mvy_range >>= 1;
            }

390 391
            h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
            h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
392
            h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
Loren Merritt's avatar
Loren Merritt committed
393 394 395 396
            h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
            h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
            h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
            h->mb.mv_max_fpel[1] = (h->mb.mv_max_spel[1]>>2) - i_fpel_border;
397
        }
398
#undef CLIP_FMV
399

400
        a->l0.me16x16.cost =
401
        a->l0.i_rd16x16    =
402
        a->l0.i_cost8x8    = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
403 404 405

        for( i = 0; i < 4; i++ )
        {
406 407 408
            a->l0.i_cost4x4[i] =
            a->l0.i_cost8x4[i] =
            a->l0.i_cost4x8[i] = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
409 410
        }

411 412
        a->l0.i_cost16x8   =
        a->l0.i_cost8x16   = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
413 414
        if( h->sh.i_type == SLICE_TYPE_B )
        {
415
            a->l1.me16x16.cost =
416
            a->l1.i_rd16x16    =
417
            a->l1.i_cost8x8    = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
418 419 420

            for( i = 0; i < 4; i++ )
            {
421 422 423 424
                a->l1.i_cost4x4[i] =
                a->l1.i_cost8x4[i] =
                a->l1.i_cost4x8[i] =
                a->i_cost8x8direct[i] = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
425 426
            }

427 428
            a->l1.i_cost16x8   =
            a->l1.i_cost8x16   =
429 430 431 432 433
            a->i_rd16x16bi     =
            a->i_rd16x16direct =
            a->i_rd8x8bi       =
            a->i_rd16x8bi      =
            a->i_rd8x16bi      =
434 435 436 437 438
            a->i_cost16x16bi   =
            a->i_cost16x16direct =
            a->i_cost8x8bi     =
            a->i_cost16x8bi    =
            a->i_cost8x16bi    = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
439
        }
440 441

        /* Fast intra decision */
Loren Merritt's avatar
Loren Merritt committed
442
        if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
443
        {
444
            if(   IS_INTRA( h->mb.i_mb_type_left )
Loren Merritt's avatar
Loren Merritt committed
445 446 447
               || IS_INTRA( h->mb.i_mb_type_top )
               || IS_INTRA( h->mb.i_mb_type_topleft )
               || IS_INTRA( h->mb.i_mb_type_topright )
448
               || (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] ))
449
               || (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_8x8] + h->stat.frame.i_mb_count[I_16x16])) )
450 451 452 453 454 455
            { /* intra is likely */ }
            else
            {
                a->b_fast_intra = 1;
            }
        }
456
        h->mb.b_skip_mc = 0;
Laurent Aimar's avatar
Laurent Aimar committed
457 458 459 460 461 462 463 464 465 466 467
    }
}



/*
 * Handle intra mb
 */
/* Max = 4 */
static void predict_16x16_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
{
Fiona Glaser's avatar
Fiona Glaser committed
468 469 470
    int b_top = i_neighbour & MB_TOP;
    int b_left = i_neighbour & MB_LEFT;
    if( b_top && b_left )
Laurent Aimar's avatar
Laurent Aimar committed
471
    {
Loren Merritt's avatar
Loren Merritt committed
472
        /* top and left available */
Laurent Aimar's avatar
Laurent Aimar committed
473 474 475
        *mode++ = I_PRED_16x16_V;
        *mode++ = I_PRED_16x16_H;
        *mode++ = I_PRED_16x16_DC;
Fiona Glaser's avatar
Fiona Glaser committed
476 477 478 479 480 481 482
        *pi_count = 3;
        if( i_neighbour & MB_TOPLEFT )
        {
            /* top left available*/
            *mode++ = I_PRED_16x16_P;
            *pi_count = 4;
        }
Laurent Aimar's avatar
Laurent Aimar committed
483
    }
Fiona Glaser's avatar
Fiona Glaser committed
484
    else if( b_left )
Laurent Aimar's avatar
Laurent Aimar committed
485 486 487 488 489 490
    {
        /* left available*/
        *mode++ = I_PRED_16x16_DC_LEFT;
        *mode++ = I_PRED_16x16_H;
        *pi_count = 2;
    }
Fiona Glaser's avatar
Fiona Glaser committed
491
    else if( b_top )
Laurent Aimar's avatar
Laurent Aimar committed
492 493 494 495 496 497 498 499
    {
        /* top available*/
        *mode++ = I_PRED_16x16_DC_TOP;
        *mode++ = I_PRED_16x16_V;
        *pi_count = 2;
    }
    else
    {
Loren Merritt's avatar
Loren Merritt committed
500
        /* none available */
Laurent Aimar's avatar
Laurent Aimar committed
501 502 503 504 505 506
        *mode = I_PRED_16x16_DC_128;
        *pi_count = 1;
    }
}

/* Max = 4 */
507
static void predict_8x8chroma_mode_available( unsigned int i_neighbour, int *mode, int *pi_count )
Laurent Aimar's avatar
Laurent Aimar committed
508
{
Fiona Glaser's avatar
Fiona Glaser committed
509 510 511
    int b_top = i_neighbour & MB_TOP;
    int b_left = i_neighbour & MB_LEFT;
    if( b_top && b_left )
Laurent Aimar's avatar
Laurent Aimar committed
512
    {
Loren Merritt's avatar
Loren Merritt committed
513
        /* top and left available */
Laurent Aimar's avatar
Laurent Aimar committed
514 515 516
        *mode++ = I_PRED_CHROMA_V;
        *mode++ = I_PRED_CHROMA_H;
        *mode++ = I_PRED_CHROMA_DC;
Fiona Glaser's avatar
Fiona Glaser committed
517 518 519 520 521 522 523
        *pi_count = 3;
        if( i_neighbour & MB_TOPLEFT )
        {
            /* top left available */
            *mode++ = I_PRED_CHROMA_P;
            *pi_count = 4;
        }
Laurent Aimar's avatar
Laurent Aimar committed
524
    }
Fiona Glaser's avatar
Fiona Glaser committed
525
    else if( b_left )
Laurent Aimar's avatar
Laurent Aimar committed
526 527 528 529 530 531
    {
        /* left available*/
        *mode++ = I_PRED_CHROMA_DC_LEFT;
        *mode++ = I_PRED_CHROMA_H;
        *pi_count = 2;
    }
Fiona Glaser's avatar
Fiona Glaser committed
532
    else if( b_top )
Laurent Aimar's avatar
Laurent Aimar committed
533 534 535 536 537 538 539 540
    {
        /* top available*/
        *mode++ = I_PRED_CHROMA_DC_TOP;
        *mode++ = I_PRED_CHROMA_V;
        *pi_count = 2;
    }
    else
    {
Loren Merritt's avatar
Loren Merritt committed
541
        /* none available */
Laurent Aimar's avatar
Laurent Aimar committed
542 543 544 545 546
        *mode = I_PRED_CHROMA_DC_128;
        *pi_count = 1;
    }
}

547 548 549
/* MAX = 9 */
static void predict_4x4_mode_available( unsigned int i_neighbour,
                                        int *mode, int *pi_count )
Laurent Aimar's avatar
Laurent Aimar committed
550
{
Fiona Glaser's avatar
Fiona Glaser committed
551 552 553
    int b_top = i_neighbour & MB_TOP;
    int b_left = i_neighbour & MB_LEFT;
    if( b_top && b_left )
Laurent Aimar's avatar
Laurent Aimar committed
554
    {
555
        *pi_count = 6;
Laurent Aimar's avatar
Laurent Aimar committed
556 557 558
        *mode++ = I_PRED_4x4_DC;
        *mode++ = I_PRED_4x4_H;
        *mode++ = I_PRED_4x4_V;
559
        *mode++ = I_PRED_4x4_DDL;
560 561 562 563 564 565 566
        if( i_neighbour & MB_TOPLEFT )
        {
            *mode++ = I_PRED_4x4_DDR;
            *mode++ = I_PRED_4x4_VR;
            *mode++ = I_PRED_4x4_HD;
            *pi_count += 3;
        }
567
        *mode++ = I_PRED_4x4_VL;
Laurent Aimar's avatar
Laurent Aimar committed
568 569
        *mode++ = I_PRED_4x4_HU;
    }
Fiona Glaser's avatar
Fiona Glaser committed
570
    else if( b_left )
Laurent Aimar's avatar
Laurent Aimar committed
571 572 573
    {
        *mode++ = I_PRED_4x4_DC_LEFT;
        *mode++ = I_PRED_4x4_H;
574 575
        *mode++ = I_PRED_4x4_HU;
        *pi_count = 3;
Laurent Aimar's avatar
Laurent Aimar committed
576
    }
Fiona Glaser's avatar
Fiona Glaser committed
577
    else if( b_top )
Laurent Aimar's avatar
Laurent Aimar committed
578 579 580
    {
        *mode++ = I_PRED_4x4_DC_TOP;
        *mode++ = I_PRED_4x4_V;
581 582 583
        *mode++ = I_PRED_4x4_DDL;
        *mode++ = I_PRED_4x4_VL;
        *pi_count = 4;
Laurent Aimar's avatar
Laurent Aimar committed
584 585 586 587 588 589 590 591
    }
    else
    {
        *mode++ = I_PRED_4x4_DC_128;
        *pi_count = 1;
    }
}

592 593 594
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
{
Loren Merritt's avatar
Loren Merritt committed
595 596
    ALIGNED_ARRAY_16( int16_t, dct8x8,[4],[64] );
    ALIGNED_ARRAY_16( int16_t, dct4x4,[16],[16] );
597
    ALIGNED_16( static uint8_t zero[16*FDEC_STRIDE] ) = {0};
598 599 600 601 602 603 604 605
    int i;

    if( do_both_dct || h->mb.b_transform_8x8 )
    {
        h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], zero );
        for( i = 0; i < 4; i++ )
            h->zigzagf.scan_8x8( h->mb.pic.fenc_dct8[i], dct8x8[i] );
    }
Loren Merritt's avatar
Loren Merritt committed
606
    if( do_both_dct || !h->mb.b_transform_8x8 )
607 608 609 610 611 612 613 614 615 616
    {
        h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], zero );
        for( i = 0; i < 16; i++ )
            h->zigzagf.scan_4x4( h->mb.pic.fenc_dct4[i], dct4x4[i] );
    }
}

/* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */
static inline void x264_mb_cache_fenc_satd( x264_t *h )
{
617
    ALIGNED_16( static uint8_t zero[16] ) = {0};
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
    uint8_t *fenc;
    int x, y, satd_sum = 0, sa8d_sum = 0;
    if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
        x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
    if( !h->mb.i_psy_rd )
        return;
    for( y = 0; y < 4; y++ )
        for( x = 0; x < 4; x++ )
        {
            fenc = h->mb.pic.p_fenc[0]+x*4+y*4*FENC_STRIDE;
            h->mb.pic.fenc_satd[y][x] = h->pixf.satd[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )
                                      - (h->pixf.sad[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )>>1);
            satd_sum += h->mb.pic.fenc_satd[y][x];
        }
    for( y = 0; y < 2; y++ )
        for( x = 0; x < 2; x++ )
        {
            fenc = h->mb.pic.p_fenc[0]+x*8+y*8*FENC_STRIDE;
            h->mb.pic.fenc_sa8d[y][x] = h->pixf.sa8d[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )
                                      - (h->pixf.sad[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )>>2);
            sa8d_sum += h->mb.pic.fenc_sa8d[y][x];
        }
    h->mb.pic.fenc_satd_sum = satd_sum;
    h->mb.pic.fenc_sa8d_sum = sa8d_sum;
}

644 645 646 647 648
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
{
    int i;

    int i_max;
649
    int predict_mode[4];
Fiona Glaser's avatar
Fiona Glaser committed
650
    int b_merged_satd = !!h->pixf.intra_mbcmp_x3_8x8c && !h->mb.b_lossless;
651 652 653

    uint8_t *p_dstc[2], *p_srcc[2];

654
    if( a->i_satd_i8x8chroma < COST_MAX )
655 656 657 658 659 660 661 662
        return;

    /* 8x8 prediction selection for chroma */
    p_dstc[0] = h->mb.pic.p_fdec[1];
    p_dstc[1] = h->mb.pic.p_fdec[2];
    p_srcc[0] = h->mb.pic.p_fenc[1];
    p_srcc[1] = h->mb.pic.p_fenc[2];

663
    predict_8x8chroma_mode_available( h->mb.i_neighbour_intra, predict_mode, &i_max );
664
    a->i_satd_i8x8chroma = COST_MAX;
Fiona Glaser's avatar
Fiona Glaser committed
665
    if( i_max == 4 && b_merged_satd )
666
    {
667
        int satdu[4], satdv[4];
Fiona Glaser's avatar
Fiona Glaser committed
668 669
        h->pixf.intra_mbcmp_x3_8x8c( p_srcc[0], p_dstc[0], satdu );
        h->pixf.intra_mbcmp_x3_8x8c( p_srcc[1], p_dstc[1], satdv );
670 671 672 673 674 675
        h->predict_8x8c[I_PRED_CHROMA_P]( p_dstc[0] );
        h->predict_8x8c[I_PRED_CHROMA_P]( p_dstc[1] );
        satdu[I_PRED_CHROMA_P] =
            h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], FDEC_STRIDE, p_srcc[0], FENC_STRIDE );
        satdv[I_PRED_CHROMA_P] =
            h->pixf.mbcmp[PIXEL_8x8]( p_dstc[1], FDEC_STRIDE, p_srcc[1], FENC_STRIDE );
Loren Merritt's avatar
Loren Merritt committed
676

677 678 679 680 681
        for( i=0; i<i_max; i++ )
        {
            int i_mode = predict_mode[i];
            int i_satd = satdu[i_mode] + satdv[i_mode]
                       + a->i_lambda * bs_size_ue(i_mode);
682 683

            a->i_satd_i8x8chroma_dir[i] = i_satd;
684 685 686 687 688 689 690 691 692
            COPY2_IF_LT( a->i_satd_i8x8chroma, i_satd, a->i_predict8x8chroma, i_mode );
        }
    }
    else
    {
        for( i=0; i<i_max; i++ )
        {
            int i_satd;
            int i_mode = predict_mode[i];
693

694
            /* we do the prediction */
695 696 697 698 699 700 701
            if( h->mb.b_lossless )
                x264_predict_lossless_8x8_chroma( h, i_mode );
            else
            {
                h->predict_8x8c[i_mode]( p_dstc[0] );
                h->predict_8x8c[i_mode]( p_dstc[1] );
            }
702

703 704 705 706 707 708
            /* we calculate the cost */
            i_satd = h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], FDEC_STRIDE,
                                               p_srcc[0], FENC_STRIDE ) +
                     h->pixf.mbcmp[PIXEL_8x8]( p_dstc[1], FDEC_STRIDE,
                                               p_srcc[1], FENC_STRIDE ) +
                     a->i_lambda * bs_size_ue( x264_mb_pred_mode8x8c_fix[i_mode] );
709

710
            a->i_satd_i8x8chroma_dir[i] = i_satd;
711 712
            COPY2_IF_LT( a->i_satd_i8x8chroma, i_satd, a->i_predict8x8chroma, i_mode );
        }
713 714 715 716 717
    }

    h->mb.i_chroma_pred_mode = a->i_predict8x8chroma;
}

718
static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
Laurent Aimar's avatar
Laurent Aimar committed
719 720 721 722 723 724 725 726
{
    const unsigned int flags = h->sh.i_type == SLICE_TYPE_I ? h->param.analyse.intra : h->param.analyse.inter;
    uint8_t  *p_src = h->mb.pic.p_fenc[0];
    uint8_t  *p_dst = h->mb.pic.p_fdec[0];

    int i, idx;
    int i_max;
    int predict_mode[9];
727
    int b_merged_satd = !!h->pixf.intra_mbcmp_x3_16x16 && !h->mb.b_lossless;
728

Laurent Aimar's avatar
Laurent Aimar committed
729 730 731
    /*---------------- Try all mode and calculate their score ---------------*/

    /* 16x16 prediction selection */
732
    predict_16x16_mode_available( h->mb.i_neighbour_intra, predict_mode, &i_max );
733 734 735

    if( b_merged_satd && i_max == 4 )
    {
736
        h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
737 738 739 740 741 742 743 744 745 746
        h->predict_16x16[I_PRED_16x16_P]( p_dst );
        a->i_satd_i16x16_dir[I_PRED_16x16_P] =
            h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
        for( i=0; i<4; i++ )
        {
            int cost = a->i_satd_i16x16_dir[i] += a->i_lambda * bs_size_ue(i);
            COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );
        }
    }
    else
Laurent Aimar's avatar
Laurent Aimar committed
747
    {
748 749 750 751
        for( i = 0; i < i_max; i++ )
        {
            int i_satd;
            int i_mode = predict_mode[i];
752 753 754 755 756

            if( h->mb.b_lossless )
                x264_predict_lossless_16x16( h, i_mode );
            else
                h->predict_16x16[i_mode]( p_dst );
757 758 759 760 761 762

            i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
                    a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
            COPY2_IF_LT( a->i_satd_i16x16, i_satd, a->i_predict16x16, i_mode );
            a->i_satd_i16x16_dir[i_mode] = i_satd;
        }
Laurent Aimar's avatar
Laurent Aimar committed
763 764
    }

765 766 767 768 769
    if( h->sh.i_type == SLICE_TYPE_B )
        /* cavlc mb type prefix */
        a->i_satd_i16x16 += a->i_lambda * i_mb_b_cost_table[I_16x16];
    if( a->b_fast_intra && a->i_satd_i16x16 > 2*i_satd_inter )
        return;
770

771 772
    /* 8x8 prediction selection */
    if( flags & X264_ANALYSE_I8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
773
    {
774
        ALIGNED_ARRAY_16( uint8_t, edge,[33] );
775
        x264_pixel_cmp_t sa8d = (h->pixf.mbcmp[0] == h->pixf.satd[0]) ? h->pixf.sa8d[PIXEL_8x8] : h->pixf.mbcmp[PIXEL_8x8];
776
        int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
777
        int i_cost = 0;
778
        h->mb.i_cbp_luma = 0;
Fiona Glaser's avatar
Fiona Glaser committed
779
        b_merged_satd = h->pixf.intra_mbcmp_x3_8x8 && !h->mb.b_lossless;
Laurent Aimar's avatar
Laurent Aimar committed
780

781 782 783
        // FIXME some bias like in i4x4?
        if( h->sh.i_type == SLICE_TYPE_B )
            i_cost += a->i_lambda * i_mb_b_cost_table[I_8x8];
784

785 786 787 788 789 790 791 792
        for( idx = 0;; idx++ )
        {
            int x = idx&1;
            int y = idx>>1;
            uint8_t *p_src_by = p_src + 8*x + 8*y*FENC_STRIDE;
            uint8_t *p_dst_by = p_dst + 8*x + 8*y*FDEC_STRIDE;
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
793

794
            predict_4x4_mode_available( h->mb.i_neighbour8[idx], predict_mode, &i_max );
795
            h->predict_8x8_filter( p_dst_by, edge, h->mb.i_neighbour8[idx], ALL_NEIGHBORS );
796 797 798

            if( b_merged_satd && i_max == 9 )
            {
799
                int satd[9];
Fiona Glaser's avatar
Fiona Glaser committed
800
                h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
801
                satd[i_pred_mode] -= 3 * a->i_lambda;
802 803 804 805 806 807 808 809 810 811 812
                for( i=2; i>=0; i-- )
                {
                    int cost = a->i_satd_i8x8_dir[i][idx] = satd[i] + 4 * a->i_lambda;
                    COPY2_IF_LT( i_best, cost, a->i_predict8x8[idx], i );
                }
                i = 3;
            }
            else
                i = 0;

            for( ; i<i_max; i++ )
Laurent Aimar's avatar
Laurent Aimar committed
813
            {
814 815
                int i_satd;
                int i_mode = predict_mode[i];
Laurent Aimar's avatar
Laurent Aimar committed
816

817 818 819 820
                if( h->mb.b_lossless )
                    x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );
                else
                    h->predict_8x8[i_mode]( p_dst_by, edge );
Laurent Aimar's avatar
Laurent Aimar committed
821

822 823 824
                i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE ) + a->i_lambda * 4;
                if( i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) )
                    i_satd -= a->i_lambda * 3;
Laurent Aimar's avatar
Laurent Aimar committed
825

826 827
                COPY2_IF_LT( i_best, i_satd, a->i_predict8x8[idx], i_mode );
                a->i_satd_i8x8_dir[i_mode][idx] = i_satd;
Laurent Aimar's avatar
Laurent Aimar committed
828
            }
829 830 831 832
            i_cost += i_best;

            if( idx == 3 || i_cost > i_satd_thresh )
                break;
Laurent Aimar's avatar
Laurent Aimar committed
833

834
            /* we need to encode this block now (for next ones) */
835
            h->predict_8x8[a->i_predict8x8[idx]]( p_dst_by, edge );
836
            x264_mb_encode_i8x8( h, idx, a->i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
837

838
            x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, a->i_predict8x8[idx] );
839 840
        }

841
        if( idx == 3 )
842
        {
843
            a->i_satd_i8x8 = i_cost;
844 845 846
            if( h->mb.i_skip_intra )
            {
                h->mc.copy[PIXEL_16x16]( h->mb.pic.i8x8_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
847 848 849 850 851
                h->mb.pic.i8x8_nnz_buf[0] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]];
                h->mb.pic.i8x8_nnz_buf[1] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]];
                h->mb.pic.i8x8_nnz_buf[2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]];
                h->mb.pic.i8x8_nnz_buf[3] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]];
                h->mb.pic.i8x8_cbp = h->mb.i_cbp_luma;
852 853 854 855
                if( h->mb.i_skip_intra == 2 )
                    h->mc.memcpy_aligned( h->mb.pic.i8x8_dct_buf, h->dct.luma8x8, sizeof(h->mb.pic.i8x8_dct_buf) );
            }
        }
856 857
        else
        {
858
            static const uint16_t cost_div_fix8[3] = {1024,512,341};
859
            a->i_satd_i8x8 = COST_MAX;
860
            i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
Laurent Aimar's avatar
Laurent Aimar committed
861
        }
862
        if( X264_MIN(i_cost, a->i_satd_i16x16) > i_satd_inter*(5+!!a->i_mbrd)/4 )
863
            return;
Laurent Aimar's avatar
Laurent Aimar committed
864
    }
865

866 867
    /* 4x4 prediction selection */
    if( flags & X264_ANALYSE_I4x4 )
868
    {
869 870
        int i_cost;
        int i_satd_thresh = X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 );
871
        h->mb.i_cbp_luma = 0;
Fiona Glaser's avatar
Fiona Glaser committed
872
        b_merged_satd = h->pixf.intra_mbcmp_x3_4x4 && !h->mb.b_lossless;
873
        if( a->i_mbrd )
874 875 876 877 878 879 880
            i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8;

        i_cost = a->i_lambda * 24;    /* from JVT (SATD0) */
        if( h->sh.i_type == SLICE_TYPE_B )
            i_cost += a->i_lambda * i_mb_b_cost_table[I_4x4];

        for( idx = 0;; idx++ )
881
        {
882 883
            uint8_t *p_src_by = p_src + block_idx_xy_fenc[idx];
            uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx];
884 885
            int i_best = COST_MAX;
            int i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
886

887
            predict_4x4_mode_available( h->mb.i_neighbour4[idx], predict_mode, &i_max );
888

889 890 891
            if( (h->mb.i_neighbour4[idx] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                /* emulate missing topright samples */
                *(uint32_t*) &p_dst_by[4 - FDEC_STRIDE] = p_dst_by[3 - FDEC_STRIDE] * 0x01010101U;
892

893 894
            if( b_merged_satd && i_max >= 6 )
            {
895
                int satd[9];
896
                h->pixf.intra_mbcmp_x3_4x4( p_src_by, p_dst_by, satd );
897
                satd[i_pred_mode] -= 3 * a->i_lambda;
898
                for( i=2; i>=0; i-- )
899
                    COPY2_IF_LT( i_best, satd[i], a->i_predict4x4[idx], i );
900 901 902 903 904 905
                i = 3;
            }
            else
                i = 0;

            for( ; i<i_max; i++ )
906
            {
907
                int i_satd;
908
                int i_mode = predict_mode[i];
909 910 911 912
                if( h->mb.b_lossless )
                    x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );
                else
                    h->predict_4x4[i_mode]( p_dst_by );
913

914 915 916
                i_satd = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE );
                if( i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) )
                    i_satd -= a->i_lambda * 3;
917

918
                COPY2_IF_LT( i_best, i_satd, a->i_predict4x4[idx], i_mode );
919
            }
920
            i_cost += i_best + 4 * a->i_lambda;
921

922 923
            if( i_cost > i_satd_thresh || idx == 15 )
                break;
924

925 926 927
            /* we need to encode this block now (for next ones) */
            h->predict_4x4[a->i_predict4x4[idx]]( p_dst_by );
            x264_mb_encode_i4x4( h, idx, a->i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
928

929
            h->mb.cache.intra4x4_pred_mode[x264_scan8[idx]] = a->i_predict4x4[idx];
930
        }
931
        if( idx == 15 )
932
        {
933
            a->i_satd_i4x4 = i_cost;
934 935 936
            if( h->mb.i_skip_intra )
            {
                h->mc.copy[PIXEL_16x16]( h->mb.pic.i4x4_fdec_buf, 16, p_dst, FDEC_STRIDE, 16 );
937 938 939 940 941
                h->mb.pic.i4x4_nnz_buf[0] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]];
                h->mb.pic.i4x4_nnz_buf[1] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]];
                h->mb.pic.i4x4_nnz_buf[2] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]];
                h->mb.pic.i4x4_nnz_buf[3] = *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]];
                h->mb.pic.i4x4_cbp = h->mb.i_cbp_luma;
942
                if( h->mb.i_skip_intra == 2 )
943
                    h->mc.memcpy_aligned( h->mb.pic.i4x4_dct_buf, h->dct.luma4x4, sizeof(h->mb.pic.i4x4_dct_buf) );
944 945
            }
        }
946
        else
947
            a->i_satd_i4x4 = COST_MAX;
Laurent Aimar's avatar
Laurent Aimar committed
948 949 950
    }
}

951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh )
{
    if( a->i_satd_i16x16 <= i_satd_thresh )
    {
        h->mb.i_type = I_16x16;
        x264_analyse_update_cache( h, a );
        a->i_satd_i16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
    }
    else
        a->i_satd_i16x16 = COST_MAX;

    if( a->i_satd_i4x4 <= i_satd_thresh && a->i_satd_i4x4 < COST_MAX )
    {
        h->mb.i_type = I_4x4;
        x264_analyse_update_cache( h, a );
        a->i_satd_i4x4 = x264_rd_cost_mb( h, a->i_lambda2 );
    }
    else
        a->i_satd_i4x4 = COST_MAX;

    if( a->i_satd_i8x8 <= i_satd_thresh && a->i_satd_i8x8 < COST_MAX )
    {
        h->mb.i_type = I_8x8;
        x264_analyse_update_cache( h, a );
        a->i_satd_i8x8 = x264_rd_cost_mb( h, a->i_lambda2 );
976
        a->i_cbp_i8x8_luma = h->mb.i_cbp_luma;
977 978 979 980 981
    }
    else
        a->i_satd_i8x8 = COST_MAX;
}

982 983 984 985
static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
{
    uint8_t  *p_dst = h->mb.pic.p_fdec[0];

986
    int i, j, idx, x, y;
987 988
    int i_max, i_mode, i_thresh;
    uint64_t i_satd, i_best;
989
    int predict_mode[9];
990
    h->mb.i_skip_intra = 0;