macroblock.c 43.1 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * macroblock.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 8
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23 24
 *****************************************************************************/

25
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
26 27
#include "macroblock.h"

Fiona Glaser's avatar
Fiona Glaser committed
28 29
/* These chroma DC functions don't have assembly versions and are only used here. */

30
#define ZIG(i,y,x) level[i] = dct[x][y];
31
static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[2][2] )
Laurent Aimar's avatar
Laurent Aimar committed
32
{
33 34 35 36
    ZIG(0,0,0)
    ZIG(1,0,1)
    ZIG(2,1,0)
    ZIG(3,1,1)
Laurent Aimar's avatar
Laurent Aimar committed
37
}
38
#undef ZIG
Laurent Aimar's avatar
Laurent Aimar committed
39

40 41 42 43 44 45 46 47 48 49 50 51 52
#define IDCT_DEQUANT_START \
    int d0 = dct[0][0] + dct[0][1]; \
    int d1 = dct[1][0] + dct[1][1]; \
    int d2 = dct[0][0] - dct[0][1]; \
    int d3 = dct[1][0] - dct[1][1]; \
    int dmf = dequant_mf[i_qp%6][0][0]; \
    int qbits = i_qp/6 - 5; \
    if( qbits > 0 ) \
    { \
        dmf <<= qbits; \
        qbits = 0; \
    }

Fiona Glaser's avatar
Fiona Glaser committed
53 54
static inline void idct_dequant_2x2_dc( int16_t dct[2][2], int16_t dct4x4[4][4][4], int dequant_mf[6][4][4], int i_qp )
{
55
    IDCT_DEQUANT_START
Fiona Glaser's avatar
Fiona Glaser committed
56 57 58 59 60 61
    dct4x4[0][0][0] = (d0 + d1) * dmf >> -qbits;
    dct4x4[1][0][0] = (d0 - d1) * dmf >> -qbits;
    dct4x4[2][0][0] = (d2 + d3) * dmf >> -qbits;
    dct4x4[3][0][0] = (d2 - d3) * dmf >> -qbits;
}

62 63 64 65 66 67 68 69 70
static inline void idct_dequant_2x2_dconly( int16_t dct[2][2], int dequant_mf[6][4][4], int i_qp )
{
    IDCT_DEQUANT_START
    dct[0][0] = (d0 + d1) * dmf >> -qbits;
    dct[0][1] = (d0 - d1) * dmf >> -qbits;
    dct[1][0] = (d2 + d3) * dmf >> -qbits;
    dct[1][1] = (d2 - d3) * dmf >> -qbits;
}

Fiona Glaser's avatar
Fiona Glaser committed
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
static inline void dct2x2dc( int16_t d[2][2], int16_t dct4x4[4][4][4] )
{
    int d0 = dct4x4[0][0][0] + dct4x4[1][0][0];
    int d1 = dct4x4[2][0][0] + dct4x4[3][0][0];
    int d2 = dct4x4[0][0][0] - dct4x4[1][0][0];
    int d3 = dct4x4[2][0][0] - dct4x4[3][0][0];
    d[0][0] = d0 + d1;
    d[1][0] = d2 + d3;
    d[0][1] = d0 - d1;
    d[1][1] = d2 - d3;
    dct4x4[0][0][0] = 0;
    dct4x4[1][0][0] = 0;
    dct4x4[2][0][0] = 0;
    dct4x4[3][0][0] = 0;
}

87 88 89 90 91 92 93 94 95 96 97 98
static inline void dct2x2dc_dconly( int16_t d[2][2] )
{
    int d0 = d[0][0] + d[0][1];
    int d1 = d[1][0] + d[1][1];
    int d2 = d[0][0] - d[0][1];
    int d3 = d[1][0] - d[1][1];
    d[0][0] = d0 + d1;
    d[1][0] = d2 + d3;
    d[0][1] = d0 - d1;
    d[1][1] = d2 - d3;
}

99
static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra, int idx )
100 101 102
{
    int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
    if( h->mb.b_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
103
        return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra, 0, idx );
104
    else
105
        return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
106 107
}

108
static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra, int idx )
109 110 111
{
    int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
    if( h->mb.b_trellis )
112
        return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
113
    else
114
        return h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
115 116
}

117 118 119 120 121 122 123 124 125 126 127
/* All encoding functions must output the correct CBP and NNZ values.
 * The entropy coding functions will check CBP first, then NNZ, before
 * actually reading the DCT coefficients.  NNZ still must be correct even
 * if CBP is zero because of the use of NNZ values for context selection.
 * "NNZ" need only be 0 or 1 rather than the exact coefficient count because
 * that is only needed in CAVLC, and will be calculated by CAVLC's residual
 * coding and stored as necessary. */

/* This means that decimation can be done merely by adjusting the CBP and NNZ
 * rather than memsetting the coefficients. */

128
void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
129
{
130
    int nz;
131 132
    uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
Loren Merritt's avatar
Loren Merritt committed
133
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
134

Loren Merritt's avatar
Loren Merritt committed
135 136
    if( h->mb.b_lossless )
    {
137
        nz = h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst );
138 139
        h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
        h->mb.i_cbp_luma |= nz<<(idx>>2);
Loren Merritt's avatar
Loren Merritt committed
140 141 142
        return;
    }

143
    h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
144

145 146 147
    nz = x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1, idx );
    h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
    if( nz )
148
    {
149
        h->mb.i_cbp_luma |= 1<<(idx>>2);
150
        h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
151
        h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
152 153
        h->dctf.add4x4_idct( p_dst, dct4x4 );
    }
154 155 156 157 158 159
}

#define STORE_8x8_NNZ(idx,nz)\
{\
    *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[idx*4+0]] = nz * 0x0101;\
    *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[idx*4+2]] = nz * 0x0101;\
Laurent Aimar's avatar
Laurent Aimar committed
160 161
}

162
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
163
{
164 165
    int x = 8 * (idx&1);
    int y = 8 * (idx>>1);
166
    int nz;
167 168
    uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
Loren Merritt's avatar
Loren Merritt committed
169
    DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
170

171 172
    if( h->mb.b_lossless )
    {
173
        nz = h->zigzagf.sub_8x8( h->dct.luma8x8[idx], p_src, p_dst );
174 175
        STORE_8x8_NNZ(idx,nz);
        h->mb.i_cbp_luma |= nz<<idx;
176 177 178
        return;
    }

179
    h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
180

181 182 183 184
    nz = x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
    if( nz )
    {
        h->mb.i_cbp_luma |= 1<<idx;
185
        h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
186 187 188 189 190 191
        h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
        h->dctf.add8x8_idct8( p_dst, dct8x8 );
        STORE_8x8_NNZ(idx,1);
    }
    else
        STORE_8x8_NNZ(idx,0);
192 193
}

194
static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
195 196 197 198
{
    uint8_t  *p_src = h->mb.pic.p_fenc[0];
    uint8_t  *p_dst = h->mb.pic.p_fdec[0];

199 200
    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
    DECLARE_ALIGNED_16( int16_t dct_dc4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
201

202
    int i, nz;
Fiona Glaser's avatar
Fiona Glaser committed
203 204
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || (h->param.analyse.b_dct_decimate && h->sh.i_type == SLICE_TYPE_P);
    int decimate_score = b_decimate ? 0 : 9;
Laurent Aimar's avatar
Laurent Aimar committed
205

Loren Merritt's avatar
Loren Merritt committed
206 207 208 209
    if( h->mb.b_lossless )
    {
        for( i = 0; i < 16; i++ )
        {
210 211
            int oe = block_idx_xy_fenc[i];
            int od = block_idx_xy_fdec[i];
212
            nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[i], p_src+oe, p_dst+od, &dct_dc4x4[0][block_idx_yx_1d[i]] );
213 214
            h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
            h->mb.i_cbp_luma |= nz;
Loren Merritt's avatar
Loren Merritt committed
215
        }
216 217
        h->mb.i_cbp_luma *= 0xf;
        h->mb.cache.non_zero_count[x264_scan8[24]] = array_non_zero( dct_dc4x4 );
218
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
Loren Merritt's avatar
Loren Merritt committed
219 220 221
        return;
    }

222
    h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
223

Laurent Aimar's avatar
Laurent Aimar committed
224 225 226
    for( i = 0; i < 16; i++ )
    {
        /* copy dc coeff */
227
        dct_dc4x4[0][block_idx_xy_1d[i]] = dct4x4[i][0][0];
228
        dct4x4[i][0][0] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
229 230

        /* quant/scan/dequant */
231 232 233 234 235 236
        nz = x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1, i );
        h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
        if( nz )
        {
            h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
            h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
Fiona Glaser's avatar
Fiona Glaser committed
237
            if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[i] );
238 239
            h->mb.i_cbp_luma = 0xf;
        }
Laurent Aimar's avatar
Laurent Aimar committed
240 241
    }

Fiona Glaser's avatar
Fiona Glaser committed
242 243 244 245 246 247 248 249 250 251 252
    /* Writing the 16 CBFs in an i16x16 block is quite costly, so decimation can save many bits. */
    /* More useful with CAVLC, but still useful with CABAC. */
    if( decimate_score < 6 )
    {
        h->mb.i_cbp_luma = 0;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
    }

253
    h->dctf.dct4x4dc( dct_dc4x4 );
254
    if( h->mb.b_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
255
        nz = x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1, 0 );
256
    else
257
        nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
Laurent Aimar's avatar
Laurent Aimar committed
258

259 260
    h->mb.cache.non_zero_count[x264_scan8[24]] = nz;
    if( nz )
Laurent Aimar's avatar
Laurent Aimar committed
261
    {
262 263 264 265 266 267 268 269
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );

        /* output samples to fdec */
        h->dctf.idct4x4dc( dct_dc4x4 );
        h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp );  /* XXX not inversed */
        if( h->mb.i_cbp_luma )
            for( i = 0; i < 16; i++ )
                dct4x4[i][0][0] = dct_dc4x4[0][block_idx_xy_1d[i]];
Laurent Aimar's avatar
Laurent Aimar committed
270
    }
271

Laurent Aimar's avatar
Laurent Aimar committed
272
    /* put pixels to fdec */
273 274 275 276
    if( h->mb.i_cbp_luma )
        h->dctf.add16x16_idct( p_dst, dct4x4 );
    else if( nz )
        h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
277 278
}

279
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
280
{
281
    int i, ch, nz, nz_dc;
282
    int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
283
    DECLARE_ALIGNED_16( int16_t dct2x2[2][2]  );
284
    h->mb.i_cbp_chroma = 0;
Laurent Aimar's avatar
Laurent Aimar committed
285

286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
    /* Early termination: check variance of chroma residual before encoding.
     * Don't bother trying early termination at low QPs.
     * Values are experimentally derived. */
    if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) )
    {
        int thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
        int ssd[2];
        int score  = h->pixf.var2_8x8( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, &ssd[0] );
            score += h->pixf.var2_8x8( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, &ssd[1] );
        if( score < thresh*4 )
        {
            h->mb.cache.non_zero_count[x264_scan8[16]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[17]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[18]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[19]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[20]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[21]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[22]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[23]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[25]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[26]] = 0;
            for( ch = 0; ch < 2; ch++ )
            {
                if( ssd[ch] > thresh )
                {
                    h->dctf.sub8x8_dct_dc( dct2x2, h->mb.pic.p_fenc[1+ch], h->mb.pic.p_fdec[1+ch] );
                    dct2x2dc_dconly( dct2x2 );
                    if( h->mb.b_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
314
                        nz_dc = x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1 );
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
                    else
                        nz_dc = h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<
    1 );
                    if( nz_dc )
                    {
                        h->mb.cache.non_zero_count[x264_scan8[25]+ch] = 1;
                        zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
                        idct_dequant_2x2_dconly( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
                        h->dctf.add8x8_idct_dc( h->mb.pic.p_fdec[1+ch], dct2x2 );
                        h->mb.i_cbp_chroma = 1;
                    }
                }
            }
            return;
        }
    }

Laurent Aimar's avatar
Laurent Aimar committed
332 333 334 335 336
    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
        int i_decimate_score = 0;
337
        int nz_ac = 0;
Laurent Aimar's avatar
Laurent Aimar committed
338

Loren Merritt's avatar
Loren Merritt committed
339
        DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
340

Loren Merritt's avatar
Loren Merritt committed
341 342 343 344
        if( h->mb.b_lossless )
        {
            for( i = 0; i < 4; i++ )
            {
345 346
                int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
                int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
347
                nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od, &h->dct.chroma_dc[ch][i] );
348 349
                h->mb.cache.non_zero_count[x264_scan8[16+i+ch*4]] = nz;
                h->mb.i_cbp_chroma |= nz;
Loren Merritt's avatar
Loren Merritt committed
350
            }
351
            h->mb.cache.non_zero_count[x264_scan8[25]+ch] = array_non_zero( h->dct.chroma_dc[ch] );
Loren Merritt's avatar
Loren Merritt committed
352 353
            continue;
        }
Loren Merritt's avatar
Loren Merritt committed
354

355
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Fiona Glaser's avatar
Fiona Glaser committed
356
        dct2x2dc( dct2x2, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
357 358 359
        /* calculate dct coeffs */
        for( i = 0; i < 4; i++ )
        {
360
            if( h->mb.b_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
361
                nz = x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 1, 0 );
362
            else
363 364 365 366 367 368 369 370 371 372
                nz = h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
            h->mb.cache.non_zero_count[x264_scan8[16+i+ch*4]] = nz;
            if( nz )
            {
                nz_ac = 1;
                h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
                h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
                if( b_decimate )
                    i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16+i+ch*4] );
            }
Laurent Aimar's avatar
Laurent Aimar committed
373 374
        }

375
        if( h->mb.b_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
376
            nz_dc = x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter, 1 );
377
        else
378
            nz_dc = h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
Laurent Aimar's avatar
Laurent Aimar committed
379

380 381 382
        h->mb.cache.non_zero_count[x264_scan8[25]+ch] = nz_dc;

        if( (b_decimate && i_decimate_score < 7) || !nz_ac )
Laurent Aimar's avatar
Laurent Aimar committed
383
        {
384 385 386 387 388
            /* Decimate the block */
            h->mb.cache.non_zero_count[x264_scan8[16+0]+24*ch] = 0;
            h->mb.cache.non_zero_count[x264_scan8[16+1]+24*ch] = 0;
            h->mb.cache.non_zero_count[x264_scan8[16+2]+24*ch] = 0;
            h->mb.cache.non_zero_count[x264_scan8[16+3]+24*ch] = 0;
389
            if( !nz_dc ) /* Whole block is empty */
390
                continue;
391 392 393 394
            /* DC-only */
            zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
            idct_dequant_2x2_dconly( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
            h->dctf.add8x8_idct_dc( p_dst, dct2x2 );
Laurent Aimar's avatar
Laurent Aimar committed
395
        }
396 397
        else
        {
398 399
            h->mb.i_cbp_chroma = 1;
            if( nz_dc )
400
            {
401 402
                zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
                idct_dequant_2x2_dc( dct2x2, dct4x4, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
403 404
            }
            h->dctf.add8x8_idct( p_dst, dct4x4 );
405
        }
Laurent Aimar's avatar
Laurent Aimar committed
406
    }
407 408 409

    if( h->mb.i_cbp_chroma )
        h->mb.i_cbp_chroma = 2;    /* dc+ac (we can't do only ac) */
410 411
    else if( h->mb.cache.non_zero_count[x264_scan8[25]] |
             h->mb.cache.non_zero_count[x264_scan8[26]] )
412
        h->mb.i_cbp_chroma = 1;    /* dc only */
Laurent Aimar's avatar
Laurent Aimar committed
413 414
}

415 416 417 418
static void x264_macroblock_encode_skip( x264_t *h )
{
    h->mb.i_cbp_luma = 0x00;
    h->mb.i_cbp_chroma = 0x00;
419
    memset( h->mb.cache.non_zero_count, 0, X264_SCAN8_SIZE );
420 421 422 423
    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = 0;
}

Laurent Aimar's avatar
Laurent Aimar committed
424 425 426 427
/*****************************************************************************
 * x264_macroblock_encode_pskip:
 *  Encode an already marked skip block
 *****************************************************************************/
Loic Le Loarer's avatar
Loic Le Loarer committed
428
static void x264_macroblock_encode_pskip( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
429
{
430 431 432 433
    const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
                                h->mb.mv_min[0], h->mb.mv_max[0] );
    const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
                                h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
434

435
    /* don't do pskip motion compensation if it was already done in macroblock_analyse */
436
    if( !h->mb.b_skip_mc )
437 438 439 440
    {
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
                       mvx, mvy, 16, 16 );
Laurent Aimar's avatar
Laurent Aimar committed
441

442 443 444
        h->mc.mc_chroma( h->mb.pic.p_fdec[1],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
                         mvx, mvy, 8, 8 );
Laurent Aimar's avatar
Laurent Aimar committed
445

446 447 448 449
        h->mc.mc_chroma( h->mb.pic.p_fdec[2],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
                         mvx, mvy, 8, 8 );
    }
Laurent Aimar's avatar
Laurent Aimar committed
450

451
    x264_macroblock_encode_skip( h );
Laurent Aimar's avatar
Laurent Aimar committed
452 453
}

454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
/*****************************************************************************
 * Intra prediction for predictive lossless mode.
 *****************************************************************************/

/* Note that these functions take a shortcut (mc.copy instead of actual pixel prediction) which assumes
 * that the edge pixels of the reconstructed frame are the same as that of the source frame.  This means
 * they will only work correctly if the neighboring blocks are losslessly coded.  In practice, this means
 * lossless mode cannot be mixed with lossy mode within a frame. */
/* This can be resolved by explicitly copying the edge pixels after doing the mc.copy, but this doesn't
 * need to be done unless we decide to allow mixing lossless and lossy compression. */

void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[1] << h->mb.b_interlaced;
    if( i_mode == I_PRED_CHROMA_V )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-stride, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-stride, stride, 8 );
    }
    else if( i_mode == I_PRED_CHROMA_H )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-1, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-1, stride, 8 );
    }
    else
    {
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
    }
}

void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;

    if( i_mode == I_PRED_4x4_V )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
    else if( i_mode == I_PRED_4x4_H )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-1, stride, 4 );
    else
        h->predict_4x4[i_mode]( p_dst );
}

void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;

    if( i_mode == I_PRED_8x8_V )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
    else if( i_mode == I_PRED_8x8_H )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-1, stride, 8 );
    else
        h->predict_8x8[i_mode]( p_dst, edge );
}

void x264_predict_lossless_16x16( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    if( i_mode == I_PRED_16x16_V )
        h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 );
    else if( i_mode == I_PRED_16x16_H )
        h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-1, stride, 16 );
    else
        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
}

Laurent Aimar's avatar
Laurent Aimar committed
522 523 524 525 526 527
/*****************************************************************************
 * x264_macroblock_encode:
 *****************************************************************************/
void x264_macroblock_encode( x264_t *h )
{
    int i_cbp_dc = 0;
528
    int i_qp = h->mb.i_qp;
529
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
530
    int b_force_no_skip = 0;
531 532 533
    int i,idx,nz;
    h->mb.i_cbp_luma = 0;
    h->mb.cache.non_zero_count[x264_scan8[24]] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
534

535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
    if( h->sh.b_mbaff
        && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
        && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
    {
        /* The first skip is predicted to be a frame mb pair.
         * We don't yet support the aff part of mbaff, so force it to non-skip
         * so that we can pick the aff flag. */
        b_force_no_skip = 1;
        if( IS_SKIP(h->mb.i_type) )
        {
            if( h->mb.i_type == P_SKIP )
                h->mb.i_type = P_L0;
            else if( h->mb.i_type == B_SKIP )
                h->mb.i_type = B_DIRECT;
        }
    }

Laurent Aimar's avatar
Laurent Aimar committed
552 553 554 555 556 557
    if( h->mb.i_type == P_SKIP )
    {
        /* A bit special */
        x264_macroblock_encode_pskip( h );
        return;
    }
558 559
    if( h->mb.i_type == B_SKIP )
    {
560
        /* don't do bskip motion compensation if it was already done in macroblock_analyse */
561
        if( !h->mb.b_skip_mc )
562
            x264_mb_mc( h );
563 564 565
        x264_macroblock_encode_skip( h );
        return;
    }
Laurent Aimar's avatar
Laurent Aimar committed
566 567 568 569

    if( h->mb.i_type == I_16x16 )
    {
        const int i_mode = h->mb.i_intra16x16_pred_mode;
570
        h->mb.b_transform_8x8 = 0;
571 572 573 574 575

        if( h->mb.b_lossless )
            x264_predict_lossless_16x16( h, i_mode );
        else
            h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
Laurent Aimar's avatar
Laurent Aimar committed
576 577

        /* encode the 16x16 macroblock */
578
        x264_mb_encode_i16x16( h, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
579
    }
580 581
    else if( h->mb.i_type == I_8x8 )
    {
Loren Merritt's avatar
Loren Merritt committed
582
        DECLARE_ALIGNED_16( uint8_t edge[33] );
583
        h->mb.b_transform_8x8 = 1;
584 585 586 587
        /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
588 589 590 591 592
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = h->mb.pic.i8x8_nnz_buf[0];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = h->mb.pic.i8x8_nnz_buf[1];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = h->mb.pic.i8x8_nnz_buf[2];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = h->mb.pic.i8x8_nnz_buf[3];
            h->mb.i_cbp_luma = h->mb.pic.i8x8_cbp;
593 594 595 596 597
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
                h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
        }
        for( i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
598
        {
599
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
600
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
Fiona Glaser's avatar
Fiona Glaser committed
601
            h->predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
602 603 604 605 606 607

            if( h->mb.b_lossless )
                x264_predict_lossless_8x8( h, p_dst, i, i_mode, edge );
            else
                h->predict_8x8[i_mode]( p_dst, edge );

608
            x264_mb_encode_i8x8( h, i, i_qp );
609 610
        }
    }
Laurent Aimar's avatar
Laurent Aimar committed
611 612
    else if( h->mb.i_type == I_4x4 )
    {
613
        h->mb.b_transform_8x8 = 0;
614 615 616 617
        /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
618 619 620 621 622
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = h->mb.pic.i4x4_nnz_buf[0];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = h->mb.pic.i4x4_nnz_buf[1];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = h->mb.pic.i4x4_nnz_buf[2];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = h->mb.pic.i4x4_nnz_buf[3];
            h->mb.i_cbp_luma = h->mb.pic.i4x4_cbp;
623 624
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
625
                h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
626 627
        }
        for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
Laurent Aimar's avatar
Laurent Aimar committed
628
        {
629
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i]];
Laurent Aimar's avatar
Laurent Aimar committed
630 631
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];

632 633
            if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                /* emulate missing topright samples */
634
                *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
635

636 637 638 639
            if( h->mb.b_lossless )
                x264_predict_lossless_4x4( h, p_dst, i, i_mode );
            else
                h->predict_4x4[i_mode]( p_dst );
640
            x264_mb_encode_i4x4( h, i, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
641 642 643 644
        }
    }
    else    /* Inter MB */
    {
645
        int i8x8, i4x4;
Laurent Aimar's avatar
Laurent Aimar committed
646 647
        int i_decimate_mb = 0;

648 649 650
        /* Don't repeat motion compensation if it was already done in non-RD transform analysis */
        if( !h->mb.b_skip_mc )
            x264_mb_mc( h );
Laurent Aimar's avatar
Laurent Aimar committed
651

Loren Merritt's avatar
Loren Merritt committed
652 653
        if( h->mb.b_lossless )
        {
654 655 656 657 658
            if( h->mb.b_transform_8x8 )
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                {
                    int x = 8*(i8x8&1);
                    int y = 8*(i8x8>>1);
659
                    nz = h->zigzagf.sub_8x8( h->dct.luma8x8[i8x8],
660 661
                                        h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
                                        h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
662 663
                    STORE_8x8_NNZ(i8x8,nz);
                    h->mb.i_cbp_luma |= nz << i8x8;
664 665 666 667
                }
            else
                for( i4x4 = 0; i4x4 < 16; i4x4++ )
                {
668
                    nz = h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
669 670
                                        h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4x4],
                                        h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4x4] );
671 672
                    h->mb.cache.non_zero_count[x264_scan8[i4x4]] = nz;
                    h->mb.i_cbp_luma |= nz << (i4x4>>2);
673
                }
Loren Merritt's avatar
Loren Merritt committed
674 675
        }
        else if( h->mb.b_transform_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
676
        {
Loren Merritt's avatar
Loren Merritt committed
677
            DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
678
            b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
679
            h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
680
            h->nr_count[1] += h->mb.b_noise_reduction * 4;
Laurent Aimar's avatar
Laurent Aimar committed
681

682
            for( idx = 0; idx < 4; idx++ )
Laurent Aimar's avatar
Laurent Aimar committed
683
            {
684
                if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
685
                    h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
686
                nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
Laurent Aimar's avatar
Laurent Aimar committed
687

688
                if( nz )
689
                {
690 691 692 693 694 695 696 697 698 699
                    h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
                    if( b_decimate )
                    {
                        int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[idx] );
                        i_decimate_mb += i_decimate_8x8;
                        if( i_decimate_8x8 >= 4 )
                            h->mb.i_cbp_luma |= 1<<idx;
                    }
                    else
                        h->mb.i_cbp_luma |= 1<<idx;
700
                }
Laurent Aimar's avatar
Laurent Aimar committed
701 702
            }

703
            if( i_decimate_mb < 6 && b_decimate )
704 705 706 707 708 709 710
            {
                h->mb.i_cbp_luma = 0;
                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
            }
711
            else
712 713
            {
                for( idx = 0; idx < 4; idx++ )
714 715
                {
                    if( h->mb.i_cbp_luma&(1<<idx) )
716 717
                    {
                        h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
718
                        h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][(idx&1)*8 + (idx>>1)*8*FDEC_STRIDE], dct8x8[idx] );
719
                        STORE_8x8_NNZ(idx,1);
720
                    }
721 722 723
                    else
                        STORE_8x8_NNZ(idx,0);
                }
724
            }
725 726 727
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
728
            DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
729
            h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
730
            h->nr_count[0] += h->mb.b_noise_reduction * 16;
731 732

            for( i8x8 = 0; i8x8 < 4; i8x8++ )
Laurent Aimar's avatar
Laurent Aimar committed
733
            {
734 735
                int i_decimate_8x8 = 0;
                int cbp = 0;
736 737

                /* encode one 4x4 block */
Laurent Aimar's avatar
Laurent Aimar committed
738 739 740
                for( i4x4 = 0; i4x4 < 4; i4x4++ )
                {
                    idx = i8x8 * 4 + i4x4;
741

742
                    if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
743
                        h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
744 745
                    nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
                    h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
Loren Merritt's avatar
Loren Merritt committed
746

747 748 749 750 751 752 753 754
                    if( nz )
                    {
                        h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
                        h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
                        if( b_decimate && i_decimate_8x8 < 6 )
                            i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[idx] );
                        cbp = 1;
                    }
Laurent Aimar's avatar
Laurent Aimar committed
755 756
                }

757 758
                /* decimate this 8x8 block */
                i_decimate_mb += i_decimate_8x8;
759 760 761 762 763 764 765 766 767 768 769 770
                if( b_decimate )
                {
                    if( i_decimate_8x8 < 4 )
                        STORE_8x8_NNZ(i8x8,0)
                    else
                        h->mb.i_cbp_luma |= 1<<i8x8;
                }
                else if( cbp )
                {
                    h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
                    h->mb.i_cbp_luma |= 1<<i8x8;
                }
Laurent Aimar's avatar
Laurent Aimar committed
771
            }
772

773
            if( b_decimate )
774
            {
775 776 777 778 779 780 781 782 783 784 785 786 787 788
                if( i_decimate_mb < 6 )
                {
                    h->mb.i_cbp_luma = 0;
                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
                }
                else
                {
                    for( i8x8 = 0; i8x8 < 4; i8x8++ )
                        if( h->mb.i_cbp_luma&(1<<i8x8) )
                            h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
                }
789
            }
Laurent Aimar's avatar
Laurent Aimar committed
790 791 792 793 794 795 796
        }
    }

    /* encode chroma */
    if( IS_INTRA( h->mb.i_type ) )
    {
        const int i_mode = h->mb.i_chroma_pred_mode;
797 798 799 800 801 802 803
        if( h->mb.b_lossless )
            x264_predict_lossless_8x8_chroma( h, i_mode );
        else
        {
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
        }
Laurent Aimar's avatar
Laurent Aimar committed
804 805 806
    }

    /* encode the 8x8 blocks */
807
    x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
Laurent Aimar's avatar
Laurent Aimar committed
808 809 810

    if( h->param.b_cabac )
    {
811 812 813
        i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
                 | h->mb.cache.non_zero_count[x264_scan8[25]] << 1
                 | h->mb.cache.non_zero_count[x264_scan8[26]] << 2;
Laurent Aimar's avatar
Laurent Aimar committed
814 815 816 817 818 819 820 821
    }

    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;

    /* Check for P_SKIP
     * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
     *      (if multiple mv give same result)*/
822
    if( !b_force_no_skip )
Laurent Aimar's avatar
Laurent Aimar committed
823
    {
824
        if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
Loren Merritt's avatar
Loren Merritt committed
825
            !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
826 827
            *(uint32_t*)h->mb.cache.mv[0][x264_scan8[0]] == *(uint32_t*)h->mb.cache.pskip_mv
            && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
828
        {
829
            h->mb.i_type = P_SKIP;
Laurent Aimar's avatar
Laurent Aimar committed
830
        }
831

832
        /* Check for B_SKIP */
833
        if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
834 835 836
        {
            h->mb.i_type = B_SKIP;
        }
837
    }
Laurent Aimar's avatar
Laurent Aimar committed
838 839 840
}

/*****************************************************************************
841 842
 * x264_macroblock_probe_skip:
 *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
Laurent Aimar's avatar
Laurent Aimar committed
843 844
 *  the previous QP
 *****************************************************************************/
845
int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
Laurent Aimar's avatar
Laurent Aimar committed
846
{
Fiona Glaser's avatar
Fiona Glaser committed
847
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Loren Merritt's avatar
Loren Merritt committed
848 849
    DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
    DECLARE_ALIGNED_16( int16_t dctscan[16] );
Laurent Aimar's avatar
Laurent Aimar committed
850

851
    int i_qp = h->mb.i_qp;
Laurent Aimar's avatar
Laurent Aimar committed
852
    int mvp[2];
Fiona Glaser's avatar
Fiona Glaser committed
853
    int ch, thresh, ssd;
Laurent Aimar's avatar
Laurent Aimar committed
854 855 856 857

    int i8x8, i4x4;
    int i_decimate_mb;

858 859 860
    if( !b_bidir )
    {
        /* Get the MV */
861 862
        mvp[0] = x264_clip3( h->mb.cache.pskip_mv[0], h->mb.mv_min[0], h->mb.mv_max[0] );
        mvp[1] = x264_clip3( h->mb.cache.pskip_mv[1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
863

864
        /* Motion compensation */
865 866
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
867
                       mvp[0], mvp[1], 16, 16 );
868
    }
Laurent Aimar's avatar
Laurent Aimar committed
869 870 871

    for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
    {
Fiona Glaser's avatar
Fiona Glaser committed
872 873 874 875 876
        int fenc_offset = (i8x8&1) * 8 + (i8x8>>1) * FENC_STRIDE * 8;
        int fdec_offset = (i8x8&1) * 8 + (i8x8>>1) * FDEC_STRIDE * 8;
        /* get luma diff */
        h->dctf.sub8x8_dct( dct4x4, h->mb.pic.p_fenc[0] + fenc_offset,
                                    h->mb.pic.p_fdec[0] + fdec_offset );
Laurent Aimar's avatar
Laurent Aimar committed
877 878 879
        /* encode one 4x4 block */
        for( i4x4 = 0; i4x4 < 4; i4x4++ )
        {
880
            if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ) )
Fiona Glaser's avatar
Fiona Glaser committed
881 882
                continue;
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
883
            i_decimate_mb += h->quantf.decimate_score16( dctscan );
Laurent Aimar's avatar
Laurent Aimar committed
884 885 886 887 888 889
            if( i_decimate_mb >= 6 )
                return 0;
        }
    }

    /* encode chroma */
890
    i_qp = h->mb.i_chroma_qp;
891
    thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
Laurent Aimar's avatar
Laurent Aimar committed
892 893 894 895 896 897

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];

898 899
        if( !b_bidir )
        {
900 901
            h->mc.mc_chroma( h->mb.pic.p_fdec[1+ch],       FDEC_STRIDE,
                             h->mb.pic.p_fref[0][0][4+ch], h->mb.pic.i_stride[1+ch],
902
                             mvp[0], mvp[1], 8, 8 );
903
        }
Laurent Aimar's avatar
Laurent Aimar committed
904

905 906
        /* there is almost never a termination during chroma, but we can't avoid the check entirely */
        /* so instead we check SSD and skip the actual check if the score is low enough. */
Fiona Glaser's avatar
Fiona Glaser committed
907 908
        ssd = h->pixf.ssd[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
        if( ssd < thresh )
909 910
            continue;

911
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
912 913

        /* calculate dct DC */
Fiona Glaser's avatar
Fiona Glaser committed
914
        dct2x2dc( dct2x2, dct4x4 );
915
        if( h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 ) )
Laurent Aimar's avatar
Laurent Aimar committed
916 917
            return 0;

Fiona Glaser's avatar
Fiona Glaser committed
918 919 920 921
        /* If there wasn't a termination in DC, we can check against a much higher threshold. */
        if( ssd < thresh*4 )
            continue;

Laurent Aimar's avatar
Laurent Aimar committed
922 923 924
        /* calculate dct coeffs */
        for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
        {
Fiona Glaser's avatar