macroblock.c 56.5 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1
/*****************************************************************************
Fiona Glaser's avatar
Fiona Glaser committed
2
 * macroblock.c: macroblock encoding
Laurent Aimar's avatar
Laurent Aimar committed
3
 *****************************************************************************
Henrik Gramner's avatar
Henrik Gramner committed
4
 * Copyright (C) 2003-2016 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 8
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Fiona Glaser <fiona@x264.com>
Henrik Gramner's avatar
Henrik Gramner committed
9
 *          Henrik Gramner <henrik@gramner.com>
Laurent Aimar's avatar
Laurent Aimar committed
10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Fiona Glaser's avatar
Fiona Glaser committed
24 25 26
 *
 * This program is also available under a commercial proprietary license.
 * For more information, contact us at licensing@x264.com.
Laurent Aimar's avatar
Laurent Aimar committed
27 28
 *****************************************************************************/

29
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
30 31
#include "macroblock.h"

Fiona Glaser's avatar
Fiona Glaser committed
32 33
/* These chroma DC functions don't have assembly versions and are only used here. */

Loren Merritt's avatar
Loren Merritt committed
34
#define ZIG(i,y,x) level[i] = dct[x*2+y];
35
static inline void zigzag_scan_2x2_dc( dctcoef level[4], dctcoef dct[4] )
Laurent Aimar's avatar
Laurent Aimar committed
36
{
37 38 39 40
    ZIG(0,0,0)
    ZIG(1,0,1)
    ZIG(2,1,0)
    ZIG(3,1,1)
Laurent Aimar's avatar
Laurent Aimar committed
41
}
42
#undef ZIG
Laurent Aimar's avatar
Laurent Aimar committed
43

Henrik Gramner's avatar
Henrik Gramner committed
44 45 46 47 48 49 50 51 52 53 54 55 56
static inline void zigzag_scan_2x4_dc( dctcoef level[8], dctcoef dct[8] )
{
    level[0] = dct[0];
    level[1] = dct[2];
    level[2] = dct[1];
    level[3] = dct[4];
    level[4] = dct[6];
    level[5] = dct[3];
    level[6] = dct[5];
    level[7] = dct[7];
}

#define IDCT_DEQUANT_2X2_START \
Loren Merritt's avatar
Loren Merritt committed
57 58 59 60
    int d0 = dct[0] + dct[1]; \
    int d1 = dct[2] + dct[3]; \
    int d2 = dct[0] - dct[1]; \
    int d3 = dct[2] - dct[3]; \
61
    int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
62

63
static inline void idct_dequant_2x2_dc( dctcoef dct[4], dctcoef dct4x4[4][16], int dequant_mf[6][16], int i_qp )
Fiona Glaser's avatar
Fiona Glaser committed
64
{
Henrik Gramner's avatar
Henrik Gramner committed
65
    IDCT_DEQUANT_2X2_START
66 67 68 69
    dct4x4[0][0] = (d0 + d1) * dmf >> 5;
    dct4x4[1][0] = (d0 - d1) * dmf >> 5;
    dct4x4[2][0] = (d2 + d3) * dmf >> 5;
    dct4x4[3][0] = (d2 - d3) * dmf >> 5;
Fiona Glaser's avatar
Fiona Glaser committed
70 71
}

Henrik Gramner's avatar
Henrik Gramner committed
72
static inline void idct_dequant_2x2_dconly( dctcoef dct[4], int dequant_mf[6][16], int i_qp )
73
{
Henrik Gramner's avatar
Henrik Gramner committed
74 75 76 77 78
    IDCT_DEQUANT_2X2_START
    dct[0] = (d0 + d1) * dmf >> 5;
    dct[1] = (d0 - d1) * dmf >> 5;
    dct[2] = (d2 + d3) * dmf >> 5;
    dct[3] = (d2 - d3) * dmf >> 5;
79
}
Henrik Gramner's avatar
Henrik Gramner committed
80
#undef IDCT_2X2_DEQUANT_START
81

82
static inline void dct2x2dc( dctcoef d[4], dctcoef dct4x4[4][16] )
Fiona Glaser's avatar
Fiona Glaser committed
83
{
Loren Merritt's avatar
Loren Merritt committed
84 85 86 87 88 89 90 91 92 93 94 95
    int d0 = dct4x4[0][0] + dct4x4[1][0];
    int d1 = dct4x4[2][0] + dct4x4[3][0];
    int d2 = dct4x4[0][0] - dct4x4[1][0];
    int d3 = dct4x4[2][0] - dct4x4[3][0];
    d[0] = d0 + d1;
    d[2] = d2 + d3;
    d[1] = d0 - d1;
    d[3] = d2 - d3;
    dct4x4[0][0] = 0;
    dct4x4[1][0] = 0;
    dct4x4[2][0] = 0;
    dct4x4[3][0] = 0;
Fiona Glaser's avatar
Fiona Glaser committed
96 97
}

Henrik Gramner's avatar
Henrik Gramner committed
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
static ALWAYS_INLINE int array_non_zero( dctcoef *v, int i_count )
{
    if( WORD_SIZE == 8 )
    {
        for( int i = 0; i < i_count; i += 8/sizeof(dctcoef) )
            if( M64( &v[i] ) )
                return 1;
    }
    else
    {
        for( int i = 0; i < i_count; i += 4/sizeof(dctcoef) )
            if( M32( &v[i] ) )
                return 1;
    }
    return 0;
}

115 116 117 118 119 120 121 122 123 124 125
/* All encoding functions must output the correct CBP and NNZ values.
 * The entropy coding functions will check CBP first, then NNZ, before
 * actually reading the DCT coefficients.  NNZ still must be correct even
 * if CBP is zero because of the use of NNZ values for context selection.
 * "NNZ" need only be 0 or 1 rather than the exact coefficient count because
 * that is only needed in CAVLC, and will be calculated by CAVLC's residual
 * coding and stored as necessary. */

/* This means that decimation can be done merely by adjusting the CBP and NNZ
 * rather than memsetting the coefficients. */

Fiona Glaser's avatar
Fiona Glaser committed
126
static void x264_mb_encode_i16x16( x264_t *h, int p, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
127
{
Fiona Glaser's avatar
Fiona Glaser committed
128 129
    pixel *p_src = h->mb.pic.p_fenc[p];
    pixel *p_dst = h->mb.pic.p_fdec[p];
Laurent Aimar's avatar
Laurent Aimar committed
130

131 132
    ALIGNED_ARRAY_N( dctcoef, dct4x4,[16],[16] );
    ALIGNED_ARRAY_N( dctcoef, dct_dc4x4,[16] );
Laurent Aimar's avatar
Laurent Aimar committed
133

Fiona Glaser's avatar
Fiona Glaser committed
134
    int nz, block_cbp = 0;
135
    int decimate_score = h->mb.b_dct_decimate ? 0 : 9;
Fiona Glaser's avatar
Fiona Glaser committed
136 137 138 139 140 141 142
    int i_quant_cat = p ? CQM_4IC : CQM_4IY;
    int i_mode = h->mb.i_intra16x16_pred_mode;

    if( h->mb.b_lossless )
        x264_predict_lossless_16x16( h, p, i_mode );
    else
        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[p] );
Laurent Aimar's avatar
Laurent Aimar committed
143

144 145
    if( h->mb.b_lossless )
    {
146
        for( int i = 0; i < 16; i++ )
147
        {
148 149
            int oe = block_idx_xy_fenc[i];
            int od = block_idx_xy_fdec[i];
Fiona Glaser's avatar
Fiona Glaser committed
150 151 152
            nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16*p+i], p_src+oe, p_dst+od, &dct_dc4x4[block_idx_yx_1d[i]] );
            h->mb.cache.non_zero_count[x264_scan8[16*p+i]] = nz;
            block_cbp |= nz;
153
        }
Fiona Glaser's avatar
Fiona Glaser committed
154
        h->mb.i_cbp_luma |= block_cbp * 0xf;
Henrik Gramner's avatar
Henrik Gramner committed
155
        h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = array_non_zero( dct_dc4x4, 16 );
Fiona Glaser's avatar
Fiona Glaser committed
156
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc[p], dct_dc4x4 );
157 158 159
        return;
    }

160
    CLEAR_16x16_NNZ( p );
161

162
    h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
163

164 165 166 167 168
    if( h->mb.b_noise_reduction )
        for( int idx = 0; idx < 16; idx++ )
            h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );

    for( int idx = 0; idx < 16; idx++ )
Laurent Aimar's avatar
Laurent Aimar committed
169
    {
170 171 172
        dct_dc4x4[block_idx_xy_1d[idx]] = dct4x4[idx][0];
        dct4x4[idx][0] = 0;
    }
Laurent Aimar's avatar
Laurent Aimar committed
173

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
    if( h->mb.b_trellis )
    {
        for( int idx = 0; idx < 16; idx++ )
            if( x264_quant_4x4_trellis( h, dct4x4[idx], i_quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_AC][p], 1, !!p, idx ) )
            {
                block_cbp = 0xf;
                h->zigzagf.scan_4x4( h->dct.luma4x4[16*p+idx], dct4x4[idx] );
                h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[i_quant_cat], i_qp );
                if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16*p+idx] );
                h->mb.cache.non_zero_count[x264_scan8[16*p+idx]] = 1;
            }
    }
    else
    {
        for( int i8x8 = 0; i8x8 < 4; i8x8++ )
189
        {
190 191 192 193 194 195 196 197 198 199 200 201
            nz = h->quantf.quant_4x4x4( &dct4x4[i8x8*4], h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
            if( nz )
            {
                block_cbp = 0xf;
                FOREACH_BIT( idx, i8x8*4, nz )
                {
                    h->zigzagf.scan_4x4( h->dct.luma4x4[16*p+idx], dct4x4[idx] );
                    h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[i_quant_cat], i_qp );
                    if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16*p+idx] );
                    h->mb.cache.non_zero_count[x264_scan8[16*p+idx]] = 1;
                }
            }
202
        }
Laurent Aimar's avatar
Laurent Aimar committed
203 204
    }

205 206 207 208
    /* Writing the 16 CBFs in an i16x16 block is quite costly, so decimation can save many bits. */
    /* More useful with CAVLC, but still useful with CABAC. */
    if( decimate_score < 6 )
    {
Fiona Glaser's avatar
Fiona Glaser committed
209 210
        CLEAR_16x16_NNZ( p );
        block_cbp = 0;
211
    }
Fiona Glaser's avatar
Fiona Glaser committed
212 213
    else
        h->mb.i_cbp_luma |= block_cbp;
214

215
    h->dctf.dct4x4dc( dct_dc4x4 );
216
    if( h->mb.b_trellis )
Henrik Gramner's avatar
Henrik Gramner committed
217
        nz = x264_quant_luma_dc_trellis( h, dct_dc4x4, i_quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_DC][p], 1, LUMA_DC+p );
218
    else
Fiona Glaser's avatar
Fiona Glaser committed
219
        nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[i_quant_cat][i_qp][0]>>1, h->quant4_bias[i_quant_cat][i_qp][0]<<1 );
Laurent Aimar's avatar
Laurent Aimar committed
220

Fiona Glaser's avatar
Fiona Glaser committed
221
    h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = nz;
222
    if( nz )
Laurent Aimar's avatar
Laurent Aimar committed
223
    {
Fiona Glaser's avatar
Fiona Glaser committed
224
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc[p], dct_dc4x4 );
225 226 227

        /* output samples to fdec */
        h->dctf.idct4x4dc( dct_dc4x4 );
Fiona Glaser's avatar
Fiona Glaser committed
228 229
        h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[i_quant_cat], i_qp );  /* XXX not inversed */
        if( block_cbp )
230
            for( int i = 0; i < 16; i++ )
Loren Merritt's avatar
Loren Merritt committed
231
                dct4x4[i][0] = dct_dc4x4[block_idx_xy_1d[i]];
Laurent Aimar's avatar
Laurent Aimar committed
232
    }
233

Laurent Aimar's avatar
Laurent Aimar committed
234
    /* put pixels to fdec */
Fiona Glaser's avatar
Fiona Glaser committed
235
    if( block_cbp )
236 237 238
        h->dctf.add16x16_idct( p_dst, dct4x4 );
    else if( nz )
        h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
239 240
}

241 242 243 244
/* Round down coefficients losslessly in DC-only chroma blocks.
 * Unlike luma blocks, this can't be done with a lookup table or
 * other shortcut technique because of the interdependencies
 * between the coefficients due to the chroma DC transform. */
Henrik Gramner's avatar
Henrik Gramner committed
245
static ALWAYS_INLINE int x264_mb_optimize_chroma_dc( x264_t *h, dctcoef *dct_dc, int dequant_mf[6][16], int i_qp, int chroma422 )
246
{
247
    int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
248 249

    /* If the QP is too high, there's no benefit to rounding optimization. */
250
    if( dmf > 32*64 )
251 252
        return 1;

Henrik Gramner's avatar
Henrik Gramner committed
253 254 255 256
    if( chroma422 )
        return h->quantf.optimize_chroma_2x4_dc( dct_dc, dmf );
    else
        return h->quantf.optimize_chroma_2x2_dc( dct_dc, dmf );
257 258
}

Henrik Gramner's avatar
Henrik Gramner committed
259
static ALWAYS_INLINE void x264_mb_encode_chroma_internal( x264_t *h, int b_inter, int i_qp, int chroma422 )
Laurent Aimar's avatar
Laurent Aimar committed
260
{
261
    int nz, nz_dc;
262
    int b_decimate = b_inter && h->mb.b_dct_decimate;
Henrik Gramner's avatar
Henrik Gramner committed
263 264
    int (*dequant_mf)[16] = h->dequant4_mf[CQM_4IC + b_inter];
    ALIGNED_ARRAY_16( dctcoef, dct_dc,[8] );
265
    h->mb.i_cbp_chroma = 0;
Fiona Glaser's avatar
Fiona Glaser committed
266
    h->nr_count[2] += h->mb.b_noise_reduction * 4;
Laurent Aimar's avatar
Laurent Aimar committed
267

268 269 270 271 272 273 274 275 276 277 278 279
    M16( &h->mb.cache.non_zero_count[x264_scan8[16]] ) = 0;
    M16( &h->mb.cache.non_zero_count[x264_scan8[18]] ) = 0;
    M16( &h->mb.cache.non_zero_count[x264_scan8[32]] ) = 0;
    M16( &h->mb.cache.non_zero_count[x264_scan8[34]] ) = 0;
    if( chroma422 )
    {
        M16( &h->mb.cache.non_zero_count[x264_scan8[24]] ) = 0;
        M16( &h->mb.cache.non_zero_count[x264_scan8[26]] ) = 0;
        M16( &h->mb.cache.non_zero_count[x264_scan8[40]] ) = 0;
        M16( &h->mb.cache.non_zero_count[x264_scan8[42]] ) = 0;
    }

280 281 282
    /* Early termination: check variance of chroma residual before encoding.
     * Don't bother trying early termination at low QPs.
     * Values are experimentally derived. */
Fiona Glaser's avatar
Fiona Glaser committed
283
    if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) && !h->mb.b_noise_reduction )
284
    {
Henrik Gramner's avatar
Henrik Gramner committed
285
        int thresh = chroma422 ? (x264_lambda2_tab[i_qp] + 16) >> 5 : (x264_lambda2_tab[i_qp] + 32) >> 6;
286
        int ssd[2];
Henrik Gramner's avatar
Henrik Gramner committed
287 288 289
        int chromapix = chroma422 ? PIXEL_8x16 : PIXEL_8x8;

        int score  = h->pixf.var2[chromapix]( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, &ssd[0] );
290
        if( score < thresh*4 )
Henrik Gramner's avatar
Henrik Gramner committed
291
            score += h->pixf.var2[chromapix]( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, &ssd[1] );
292 293
        if( score < thresh*4 )
        {
Fiona Glaser's avatar
Fiona Glaser committed
294 295
            h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]] = 0;
            h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] = 0;
Henrik Gramner's avatar
Henrik Gramner committed
296

297
            for( int ch = 0; ch < 2; ch++ )
298 299 300
            {
                if( ssd[ch] > thresh )
                {
Henrik Gramner's avatar
Henrik Gramner committed
301 302 303 304 305 306 307 308 309
                    pixel *p_src = h->mb.pic.p_fenc[1+ch];
                    pixel *p_dst = h->mb.pic.p_fdec[1+ch];

                    if( chroma422 )
                        /* Cannot be replaced by two calls to sub8x8_dct_dc since the hadamard transform is different */
                        h->dctf.sub8x16_dct_dc( dct_dc, p_src, p_dst );
                    else
                        h->dctf.sub8x8_dct_dc( dct_dc, p_src, p_dst );

310
                    if( h->mb.b_trellis )
Henrik Gramner's avatar
Henrik Gramner committed
311
                        nz_dc = x264_quant_chroma_dc_trellis( h, dct_dc, i_qp+3*chroma422, !b_inter, CHROMA_DC+ch );
312
                    else
Henrik Gramner's avatar
Henrik Gramner committed
313 314 315 316 317 318
                    {
                        nz_dc = 0;
                        for( int i = 0; i <= chroma422; i++ )
                            nz_dc |= h->quantf.quant_2x2_dc( &dct_dc[4*i], h->quant4_mf[CQM_4IC+b_inter][i_qp+3*chroma422][0] >> 1,
                                                             h->quant4_bias[CQM_4IC+b_inter][i_qp+3*chroma422][0] << 1 );
                    }
319

320 321
                    if( nz_dc )
                    {
Henrik Gramner's avatar
Henrik Gramner committed
322
                        if( !x264_mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) )
323
                            continue;
Fiona Glaser's avatar
Fiona Glaser committed
324
                        h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = 1;
Henrik Gramner's avatar
Henrik Gramner committed
325 326 327 328 329 330 331 332 333 334 335 336 337
                        if( chroma422 )
                        {
                            zigzag_scan_2x4_dc( h->dct.chroma_dc[ch], dct_dc );
                            h->quantf.idct_dequant_2x4_dconly( dct_dc, dequant_mf, i_qp+3 );
                        }
                        else
                        {
                            zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct_dc );
                            idct_dequant_2x2_dconly( dct_dc, dequant_mf, i_qp );
                        }

                        for( int i = 0; i <= chroma422; i++ )
                            h->dctf.add8x8_idct_dc( p_dst + 8*i*FDEC_STRIDE, &dct_dc[4*i] );
338 339 340 341 342 343 344 345
                        h->mb.i_cbp_chroma = 1;
                    }
                }
            }
            return;
        }
    }

346
    for( int ch = 0; ch < 2; ch++ )
Laurent Aimar's avatar
Laurent Aimar committed
347
    {
348 349
        pixel *p_src = h->mb.pic.p_fenc[1+ch];
        pixel *p_dst = h->mb.pic.p_fdec[1+ch];
350
        int i_decimate_score = b_decimate ? 0 : 7;
351
        int nz_ac = 0;
Laurent Aimar's avatar
Laurent Aimar committed
352

353
        ALIGNED_ARRAY_N( dctcoef, dct4x4,[8],[16] );
Laurent Aimar's avatar
Laurent Aimar committed
354

355 356
        if( h->mb.b_lossless )
        {
Henrik Gramner's avatar
Henrik Gramner committed
357 358 359
            static const uint8_t chroma422_scan[8] = { 0, 2, 1, 5, 3, 6, 4, 7 };

            for( int i = 0; i < (chroma422?8:4); i++ )
360
            {
Henrik Gramner's avatar
Henrik Gramner committed
361 362 363 364 365
                int oe = 4*(i&1) + 4*(i>>1)*FENC_STRIDE;
                int od = 4*(i&1) + 4*(i>>1)*FDEC_STRIDE;
                nz = h->zigzagf.sub_4x4ac( h->dct.luma4x4[16+i+(chroma422?i&4:0)+ch*16], p_src+oe, p_dst+od,
                                           &h->dct.chroma_dc[ch][chroma422?chroma422_scan[i]:i] );
                h->mb.cache.non_zero_count[x264_scan8[16+i+(chroma422?i&4:0)+ch*16]] = nz;
366
                h->mb.i_cbp_chroma |= nz;
367
            }
Henrik Gramner's avatar
Henrik Gramner committed
368
            h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = array_non_zero( h->dct.chroma_dc[ch], chroma422?8:4 );
369 370
            continue;
        }
Loren Merritt's avatar
Loren Merritt committed
371

Henrik Gramner's avatar
Henrik Gramner committed
372 373 374
        for( int i = 0; i <= chroma422; i++ )
            h->dctf.sub8x8_dct( &dct4x4[4*i], p_src + 8*i*FENC_STRIDE, p_dst + 8*i*FDEC_STRIDE );

Fiona Glaser's avatar
Fiona Glaser committed
375
        if( h->mb.b_noise_reduction )
Henrik Gramner's avatar
Henrik Gramner committed
376
            for( int i = 0; i < (chroma422?8:4); i++ )
Fiona Glaser's avatar
Fiona Glaser committed
377
                h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[2], h->nr_offset[2], 16 );
Henrik Gramner's avatar
Henrik Gramner committed
378 379 380 381 382 383

        if( chroma422 )
            h->dctf.dct2x4dc( dct_dc, dct4x4 );
        else
            dct2x2dc( dct_dc, dct4x4 );

Laurent Aimar's avatar
Laurent Aimar committed
384
        /* calculate dct coeffs */
385
        for( int i8x8 = 0; i8x8 < (chroma422?2:1); i8x8++ )
Laurent Aimar's avatar
Laurent Aimar committed
386
        {
387
            if( h->mb.b_trellis )
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
            {
                for( int i4x4 = 0; i4x4 < 4; i4x4++ )
                {
                    if( x264_quant_4x4_trellis( h, dct4x4[i8x8*4+i4x4], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 1, 0 ) )
                    {
                        int idx = 16+ch*16+i8x8*8+i4x4;
                        h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[i8x8*4+i4x4] );
                        h->quantf.dequant_4x4( dct4x4[i8x8*4+i4x4], dequant_mf, i_qp );
                        if( i_decimate_score < 7 )
                            i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[idx] );
                        h->mb.cache.non_zero_count[x264_scan8[idx]] = 1;
                        nz_ac = 1;
                    }
                }
            }
403
            else
404
            {
405 406 407 408 409 410 411 412 413 414 415 416 417 418
                nz = h->quantf.quant_4x4x4( &dct4x4[i8x8*4], h->quant4_mf[CQM_4IC+b_inter][i_qp],
                                            h->quant4_bias[CQM_4IC+b_inter][i_qp] );
                nz_ac |= nz;

                FOREACH_BIT( i4x4, 0, nz )
                {
                    int idx = 16+ch*16+i8x8*8+i4x4;

                    h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[i8x8*4+i4x4] );
                    h->quantf.dequant_4x4( dct4x4[i8x8*4+i4x4], dequant_mf, i_qp );
                    if( i_decimate_score < 7 )
                        i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[idx] );
                    h->mb.cache.non_zero_count[x264_scan8[idx]] = 1;
                }
419
            }
Laurent Aimar's avatar
Laurent Aimar committed
420 421
        }

422
        if( h->mb.b_trellis )
Henrik Gramner's avatar
Henrik Gramner committed
423
            nz_dc = x264_quant_chroma_dc_trellis( h, dct_dc, i_qp+3*chroma422, !b_inter, CHROMA_DC+ch );
424
        else
Henrik Gramner's avatar
Henrik Gramner committed
425 426 427 428 429 430
        {
            nz_dc = 0;
            for( int i = 0; i <= chroma422; i++ )
                nz_dc |= h->quantf.quant_2x2_dc( &dct_dc[4*i], h->quant4_mf[CQM_4IC+b_inter][i_qp+3*chroma422][0] >> 1,
                                                 h->quant4_bias[CQM_4IC+b_inter][i_qp+3*chroma422][0] << 1 );
        }
Laurent Aimar's avatar
Laurent Aimar committed
431

Fiona Glaser's avatar
Fiona Glaser committed
432
        h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = nz_dc;
433

434
        if( i_decimate_score < 7 || !nz_ac )
Laurent Aimar's avatar
Laurent Aimar committed
435
        {
436
            /* Decimate the block */
Henrik Gramner's avatar
Henrik Gramner committed
437 438 439 440 441 442 443 444
            M16( &h->mb.cache.non_zero_count[x264_scan8[16+16*ch]] ) = 0;
            M16( &h->mb.cache.non_zero_count[x264_scan8[18+16*ch]] ) = 0;
            if( chroma422 )
            {
                M16( &h->mb.cache.non_zero_count[x264_scan8[24+16*ch]] ) = 0;
                M16( &h->mb.cache.non_zero_count[x264_scan8[26+16*ch]] ) = 0;
            }

445
            if( !nz_dc ) /* Whole block is empty */
446
                continue;
Henrik Gramner's avatar
Henrik Gramner committed
447
            if( !x264_mb_optimize_chroma_dc( h, dct_dc, dequant_mf, i_qp+3*chroma422, chroma422 ) )
448
            {
Fiona Glaser's avatar
Fiona Glaser committed
449
                h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+ch]] = 0;
450 451
                continue;
            }
452
            /* DC-only */
Henrik Gramner's avatar
Henrik Gramner committed
453 454 455 456 457 458 459 460 461 462 463 464 465
            if( chroma422 )
            {
                zigzag_scan_2x4_dc( h->dct.chroma_dc[ch], dct_dc );
                h->quantf.idct_dequant_2x4_dconly( dct_dc, dequant_mf, i_qp+3 );
            }
            else
            {
                zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct_dc );
                idct_dequant_2x2_dconly( dct_dc, dequant_mf, i_qp );
            }

            for( int i = 0; i <= chroma422; i++ )
                h->dctf.add8x8_idct_dc( p_dst + 8*i*FDEC_STRIDE, &dct_dc[4*i] );
Laurent Aimar's avatar
Laurent Aimar committed
466
        }
467 468
        else
        {
469
            h->mb.i_cbp_chroma = 1;
Henrik Gramner's avatar
Henrik Gramner committed
470

471
            if( nz_dc )
472
            {
Henrik Gramner's avatar
Henrik Gramner committed
473 474 475 476 477 478 479 480 481 482
                if( chroma422 )
                {
                    zigzag_scan_2x4_dc( h->dct.chroma_dc[ch], dct_dc );
                    h->quantf.idct_dequant_2x4_dc( dct_dc, dct4x4, dequant_mf, i_qp+3 );
                }
                else
                {
                    zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct_dc );
                    idct_dequant_2x2_dc( dct_dc, dct4x4, dequant_mf, i_qp );
                }
483
            }
Henrik Gramner's avatar
Henrik Gramner committed
484 485 486

            for( int i = 0; i <= chroma422; i++ )
                h->dctf.add8x8_idct( p_dst + 8*i*FDEC_STRIDE, &dct4x4[4*i] );
487
        }
Laurent Aimar's avatar
Laurent Aimar committed
488
    }
489

Henrik Gramner's avatar
Henrik Gramner committed
490
    /* 0 = none, 1 = DC only, 2 = DC+AC */
Fiona Glaser's avatar
Fiona Glaser committed
491 492
    h->mb.i_cbp_chroma += (h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]] |
                           h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] | h->mb.i_cbp_chroma);
Laurent Aimar's avatar
Laurent Aimar committed
493 494
}

Henrik Gramner's avatar
Henrik Gramner committed
495 496 497 498 499 500 501 502
void x264_mb_encode_chroma( x264_t *h, int b_inter, int i_qp )
{
    if( CHROMA_FORMAT == CHROMA_420 )
        x264_mb_encode_chroma_internal( h, b_inter, i_qp, 0 );
    else
        x264_mb_encode_chroma_internal( h, b_inter, i_qp, 1 );
}

503 504
static void x264_macroblock_encode_skip( x264_t *h )
{
Fiona Glaser's avatar
Fiona Glaser committed
505 506 507 508 509 510 511 512
    M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = 0;
    M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = 0;
    M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = 0;
    M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = 0;
    M32( &h->mb.cache.non_zero_count[x264_scan8[16+ 0]] ) = 0;
    M32( &h->mb.cache.non_zero_count[x264_scan8[16+ 2]] ) = 0;
    M32( &h->mb.cache.non_zero_count[x264_scan8[32+ 0]] ) = 0;
    M32( &h->mb.cache.non_zero_count[x264_scan8[32+ 2]] ) = 0;
Henrik Gramner's avatar
Henrik Gramner committed
513
    if( CHROMA_FORMAT >= CHROMA_422 )
Fiona Glaser's avatar
Fiona Glaser committed
514 515 516 517 518 519
    {
        M32( &h->mb.cache.non_zero_count[x264_scan8[16+ 8]] ) = 0;
        M32( &h->mb.cache.non_zero_count[x264_scan8[16+10]] ) = 0;
        M32( &h->mb.cache.non_zero_count[x264_scan8[32+ 8]] ) = 0;
        M32( &h->mb.cache.non_zero_count[x264_scan8[32+10]] ) = 0;
    }
Henrik Gramner's avatar
Henrik Gramner committed
520 521
    h->mb.i_cbp_luma = 0;
    h->mb.i_cbp_chroma = 0;
522 523 524
    h->mb.cbp[h->mb.i_mb_xy] = 0;
}

525 526 527 528
/*****************************************************************************
 * Intra prediction for predictive lossless mode.
 *****************************************************************************/

Henrik Gramner's avatar
Henrik Gramner committed
529
void x264_predict_lossless_chroma( x264_t *h, int i_mode )
530
{
531
    int height = 16 >> CHROMA_V_SHIFT;
532 533
    if( i_mode == I_PRED_CHROMA_V )
    {
Henrik Gramner's avatar
Henrik Gramner committed
534 535
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1]-FENC_STRIDE, FENC_STRIDE, height );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2]-FENC_STRIDE, FENC_STRIDE, height );
536 537
        memcpy( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[1]-FDEC_STRIDE, 8*sizeof(pixel) );
        memcpy( h->mb.pic.p_fdec[2], h->mb.pic.p_fdec[2]-FDEC_STRIDE, 8*sizeof(pixel) );
538 539 540
    }
    else if( i_mode == I_PRED_CHROMA_H )
    {
Henrik Gramner's avatar
Henrik Gramner committed
541 542
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1]-1, FENC_STRIDE, height );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2]-1, FENC_STRIDE, height );
543 544
        x264_copy_column8( h->mb.pic.p_fdec[1]+4*FDEC_STRIDE, h->mb.pic.p_fdec[1]+4*FDEC_STRIDE-1 );
        x264_copy_column8( h->mb.pic.p_fdec[2]+4*FDEC_STRIDE, h->mb.pic.p_fdec[2]+4*FDEC_STRIDE-1 );
Henrik Gramner's avatar
Henrik Gramner committed
545 546 547 548 549
        if( CHROMA_FORMAT == CHROMA_422 )
        {
            x264_copy_column8( h->mb.pic.p_fdec[1]+12*FDEC_STRIDE, h->mb.pic.p_fdec[1]+12*FDEC_STRIDE-1 );
            x264_copy_column8( h->mb.pic.p_fdec[2]+12*FDEC_STRIDE, h->mb.pic.p_fdec[2]+12*FDEC_STRIDE-1 );
        }
550 551 552
    }
    else
    {
Henrik Gramner's avatar
Henrik Gramner committed
553 554
        h->predict_chroma[i_mode]( h->mb.pic.p_fdec[1] );
        h->predict_chroma[i_mode]( h->mb.pic.p_fdec[2] );
555 556 557
    }
}

Fiona Glaser's avatar
Fiona Glaser committed
558
void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode )
559
{
Fiona Glaser's avatar
Fiona Glaser committed
560 561
    int stride = h->fenc->i_stride[p] << MB_INTERLACED;
    pixel *p_src = h->mb.pic.p_fenc_plane[p] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;
562 563 564 565 566 567 568 569 570

    if( i_mode == I_PRED_4x4_V )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
    else if( i_mode == I_PRED_4x4_H )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-1, stride, 4 );
    else
        h->predict_4x4[i_mode]( p_dst );
}

571
void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[36] )
572
{
Fiona Glaser's avatar
Fiona Glaser committed
573 574
    int stride = h->fenc->i_stride[p] << MB_INTERLACED;
    pixel *p_src = h->mb.pic.p_fenc_plane[p] + (idx&1)*8 + (idx>>1)*8*stride;
575 576 577 578 579 580 581 582 583

    if( i_mode == I_PRED_8x8_V )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
    else if( i_mode == I_PRED_8x8_H )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-1, stride, 8 );
    else
        h->predict_8x8[i_mode]( p_dst, edge );
}

Fiona Glaser's avatar
Fiona Glaser committed
584
void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode )
585
{
Fiona Glaser's avatar
Fiona Glaser committed
586
    int stride = h->fenc->i_stride[p] << MB_INTERLACED;
587
    if( i_mode == I_PRED_16x16_V )
Fiona Glaser's avatar
Fiona Glaser committed
588
        h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc_plane[p]-stride, stride, 16 );
589
    else if( i_mode == I_PRED_16x16_H )
Fiona Glaser's avatar
Fiona Glaser committed
590
        h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc_plane[p]-1, stride, 16 );
591
    else
Fiona Glaser's avatar
Fiona Glaser committed
592
        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[p] );
593 594
}

Laurent Aimar's avatar
Laurent Aimar committed
595 596 597
/*****************************************************************************
 * x264_macroblock_encode:
 *****************************************************************************/
Fiona Glaser's avatar
Fiona Glaser committed
598
static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_count, int chroma )
Laurent Aimar's avatar
Laurent Aimar committed
599
{
600
    int i_qp = h->mb.i_qp;
601
    int b_decimate = h->mb.b_dct_decimate;
602
    int b_force_no_skip = 0;
603
    int nz;
604
    h->mb.i_cbp_luma = 0;
Fiona Glaser's avatar
Fiona Glaser committed
605 606
    for( int p = 0; p < plane_count; p++ )
        h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
607

Fiona Glaser's avatar
Fiona Glaser committed
608 609 610
    if( h->mb.i_type == I_PCM )
    {
        /* if PCM is chosen, we need to store reconstructed frame data */
Fiona Glaser's avatar
Fiona Glaser committed
611 612 613 614
        for( int p = 0; p < plane_count; p++ )
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc[p], FENC_STRIDE, 16 );
        if( chroma )
        {
615
            int height = 16 >> CHROMA_V_SHIFT;
Henrik Gramner's avatar
Henrik Gramner committed
616 617
            h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, height );
            h->mc.copy[PIXEL_8x8]  ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, height );
Fiona Glaser's avatar
Fiona Glaser committed
618
        }
Fiona Glaser's avatar
Fiona Glaser committed
619 620 621
        return;
    }

622
    if( !h->mb.b_allow_skip )
623 624 625 626 627 628 629 630 631 632 633
    {
        b_force_no_skip = 1;
        if( IS_SKIP(h->mb.i_type) )
        {
            if( h->mb.i_type == P_SKIP )
                h->mb.i_type = P_L0;
            else if( h->mb.i_type == B_SKIP )
                h->mb.i_type = B_DIRECT;
        }
    }

Laurent Aimar's avatar
Laurent Aimar committed
634 635
    if( h->mb.i_type == P_SKIP )
    {
Fiona Glaser's avatar
Fiona Glaser committed
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
        /* don't do pskip motion compensation if it was already done in macroblock_analyse */
        if( !h->mb.b_skip_mc )
        {
            int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
                                  h->mb.mv_min[0], h->mb.mv_max[0] );
            int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
                                  h->mb.mv_min[1], h->mb.mv_max[1] );

            for( int p = 0; p < plane_count; p++ )
                h->mc.mc_luma( h->mb.pic.p_fdec[p], FDEC_STRIDE,
                               &h->mb.pic.p_fref[0][0][p*4], h->mb.pic.i_stride[p],
                               mvx, mvy, 16, 16, &h->sh.weight[0][p] );

            if( chroma )
            {
651
                int v_shift = CHROMA_V_SHIFT;
Henrik Gramner's avatar
Henrik Gramner committed
652 653
                int height = 16 >> v_shift;

Fiona Glaser's avatar
Fiona Glaser committed
654 655 656 657
                /* Special case for mv0, which is (of course) very common in P-skip mode. */
                if( mvx | mvy )
                    h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE,
                                     h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
Henrik Gramner's avatar
Henrik Gramner committed
658
                                     mvx, 2*mvy>>v_shift, 8, height );
Fiona Glaser's avatar
Fiona Glaser committed
659
                else
Henrik Gramner's avatar
Henrik Gramner committed
660 661
                    h->mc.load_deinterleave_chroma_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4],
                                                         h->mb.pic.i_stride[1], height );
Fiona Glaser's avatar
Fiona Glaser committed
662 663 664 665

                if( h->sh.weight[0][1].weightfn )
                    h->sh.weight[0][1].weightfn[8>>2]( h->mb.pic.p_fdec[1], FDEC_STRIDE,
                                                       h->mb.pic.p_fdec[1], FDEC_STRIDE,
Henrik Gramner's avatar
Henrik Gramner committed
666
                                                       &h->sh.weight[0][1], height );
Fiona Glaser's avatar
Fiona Glaser committed
667 668 669
                if( h->sh.weight[0][2].weightfn )
                    h->sh.weight[0][2].weightfn[8>>2]( h->mb.pic.p_fdec[2], FDEC_STRIDE,
                                                       h->mb.pic.p_fdec[2], FDEC_STRIDE,
Henrik Gramner's avatar
Henrik Gramner committed
670
                                                       &h->sh.weight[0][2], height );
Fiona Glaser's avatar
Fiona Glaser committed
671 672 673 674
            }
        }

        x264_macroblock_encode_skip( h );
Laurent Aimar's avatar
Laurent Aimar committed
675 676
        return;
    }
677 678
    if( h->mb.i_type == B_SKIP )
    {
679
        /* don't do bskip motion compensation if it was already done in macroblock_analyse */
680
        if( !h->mb.b_skip_mc )
681
            x264_mb_mc( h );
682 683 684
        x264_macroblock_encode_skip( h );
        return;
    }
Laurent Aimar's avatar
Laurent Aimar committed
685 686 687

    if( h->mb.i_type == I_16x16 )
    {
688
        h->mb.b_transform_8x8 = 0;
689

690
        for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
Fiona Glaser's avatar
Fiona Glaser committed
691
            x264_mb_encode_i16x16( h, p, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
692
    }
693 694
    else if( h->mb.i_type == I_8x8 )
    {
695
        h->mb.b_transform_8x8 = 1;
696 697 698 699
        /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
Fiona Glaser's avatar
Fiona Glaser committed
700 701 702 703
            M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = h->mb.pic.i8x8_nnz_buf[0];
            M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = h->mb.pic.i8x8_nnz_buf[1];
            M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = h->mb.pic.i8x8_nnz_buf[2];
            M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = h->mb.pic.i8x8_nnz_buf[3];
704
            h->mb.i_cbp_luma = h->mb.pic.i8x8_cbp;
705 706 707 708
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
                h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
        }
709
        for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
710
        {
Fiona Glaser's avatar
Fiona Glaser committed
711 712 713
            for( int i = (p == 0 && h->mb.i_skip_intra) ? 3 : 0 ; i < 4; i++ )
            {
                int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
714
                x264_mb_encode_i8x8( h, p, i, i_qp, i_mode, NULL, 1 );
Fiona Glaser's avatar
Fiona Glaser committed
715
            }
716 717
        }
    }
Laurent Aimar's avatar
Laurent Aimar committed
718 719
    else if( h->mb.i_type == I_4x4 )
    {
720
        h->mb.b_transform_8x8 = 0;
721 722 723 724
        /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
Fiona Glaser's avatar
Fiona Glaser committed
725 726 727 728
            M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = h->mb.pic.i4x4_nnz_buf[0];
            M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = h->mb.pic.i4x4_nnz_buf[1];
            M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = h->mb.pic.i4x4_nnz_buf[2];
            M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = h->mb.pic.i4x4_nnz_buf[3];
729
            h->mb.i_cbp_luma = h->mb.pic.i4x4_cbp;
730 731
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
732
                h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
733
        }
734
        for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
Laurent Aimar's avatar
Laurent Aimar committed
735
        {
Fiona Glaser's avatar
Fiona Glaser committed
736 737 738 739
            for( int i = (p == 0 && h->mb.i_skip_intra) ? 15 : 0 ; i < 16; i++ )
            {
                pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[i]];
                int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
Laurent Aimar's avatar
Laurent Aimar committed
740

Fiona Glaser's avatar
Fiona Glaser committed
741 742 743
                if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                    /* emulate missing topright samples */
                    MPIXEL_X4( &p_dst[4-FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst[3-FDEC_STRIDE] );
744

745
                x264_mb_encode_i4x4( h, p, i, i_qp, i_mode, 1 );
Fiona Glaser's avatar
Fiona Glaser committed
746
            }
Laurent Aimar's avatar
Laurent Aimar committed
747 748 749 750 751 752
        }
    }
    else    /* Inter MB */
    {
        int i_decimate_mb = 0;

753 754 755
        /* Don't repeat motion compensation if it was already done in non-RD transform analysis */
        if( !h->mb.b_skip_mc )
            x264_mb_mc( h );
Laurent Aimar's avatar
Laurent Aimar committed
756

757 758
        if( h->mb.b_lossless )
        {
759
            if( h->mb.b_transform_8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
760 761 762 763 764 765 766 767 768 769
                for( int p = 0; p < plane_count; p++ )
                    for( int i8x8 = 0; i8x8 < 4; i8x8++ )
                    {
                        int x = i8x8&1;
                        int y = i8x8>>1;
                        nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+i8x8], h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE,
                                                                           h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE );
                        STORE_8x8_NNZ( p, i8x8, nz );
                        h->mb.i_cbp_luma |= nz << i8x8;
                    }
770
            else
Fiona Glaser's avatar
Fiona Glaser committed
771 772 773 774 775 776 777 778 779
                for( int p = 0; p < plane_count; p++ )
                    for( int i4x4 = 0; i4x4 < 16; i4x4++ )
                    {
                        nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+i4x4],
                                                 h->mb.pic.p_fenc[p]+block_idx_xy_fenc[i4x4],
                                                 h->mb.pic.p_fdec[p]+block_idx_xy_fdec[i4x4] );
                        h->mb.cache.non_zero_count[x264_scan8[p*16+i4x4]] = nz;
                        h->mb.i_cbp_luma |= nz << (i4x4>>2);
                    }
780 781
        }
        else if( h->mb.b_transform_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
782
        {
783
            ALIGNED_ARRAY_N( dctcoef, dct8x8,[4],[64] );
Fiona Glaser's avatar
Fiona Glaser committed
784
            b_decimate &= !h->mb.b_trellis || !h->param.b_cabac; // 8x8 trellis is inherently optimal decimation for CABAC
Laurent Aimar's avatar
Laurent Aimar committed
785

786
            for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
Laurent Aimar's avatar
Laurent Aimar committed
787
            {
788
                int quant_cat = p ? CQM_8PC : CQM_8PY;
789
                CLEAR_16x16_NNZ( p );
Fiona Glaser's avatar
Fiona Glaser committed
790 791
                h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[p], h->mb.pic.p_fdec[p] );
                h->nr_count[1+!!p*2] += h->mb.b_noise_reduction * 4;
Laurent Aimar's avatar
Laurent Aimar committed
792

Fiona Glaser's avatar
Fiona Glaser committed
793 794
                int plane_cbp = 0;
                for( int idx = 0; idx < 4; idx++ )
795
                {
Fiona Glaser's avatar
Fiona Glaser committed
796 797 798
                    nz = x264_quant_8x8( h, dct8x8[idx], i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 0, p, idx );

                    if( nz )
799
                    {
Fiona Glaser's avatar
Fiona Glaser committed
800 801 802 803 804 805 806 807 808 809
                        h->zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8[idx] );
                        if( b_decimate )
                        {
                            int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[p*4+idx] );
                            i_decimate_mb += i_decimate_8x8;
                            if( i_decimate_8x8 >= 4 )
                                plane_cbp |= 1<<idx;
                        }
                        else
                            plane_cbp |= 1<<idx;
810
                    }
811
                }
Laurent Aimar's avatar
Laurent Aimar committed
812

813
                if( i_decimate_mb >= 6 || !b_decimate )
Fiona Glaser's avatar
Fiona Glaser committed
814
                {
815 816
                    h->mb.i_cbp_luma |= plane_cbp;
                    FOREACH_BIT( idx, 0, plane_cbp )
817
                    {
818
                        h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[quant_cat], i_qp );
819 820
                        h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[p][8*(idx&1) + 8*(idx>>1)*FDEC_STRIDE], dct8x8[idx] );
                        STORE_8x8_NNZ( p, idx, 1 );
821
                    }
822
                }
823
            }
824 825 826
        }
        else
        {
827
            ALIGNED_ARRAY_N( dctcoef, dct4x4,[16],[16] );
828
            for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
Laurent Aimar's avatar
Laurent Aimar committed
829
            {
830
                int quant_cat = p ? CQM_4PC : CQM_4PY;
831
                CLEAR_16x16_NNZ( p );
Fiona Glaser's avatar
Fiona Glaser committed
832
                h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[p], h->mb.pic.p_fdec[p] );
833 834 835 836 837 838 839

                if( h->mb.b_noise_reduction )
                {
                    h->nr_count[0+!!p*2] += 16;
                    for( int idx = 0; idx < 16; idx++ )
                        h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0+!!p*2], h->nr_offset[0+!!p*2], 16 );
                }
840

Fiona Glaser's avatar
Fiona Glaser committed
841 842
                int plane_cbp = 0;
                for( int i8x8 = 0; i8x8 < 4; i8x8++ )
Laurent Aimar's avatar
Laurent Aimar committed
843
                {
844 845 846
                    int i_decimate_8x8 = b_decimate ? 0 : 6;
                    int nnz8x8 = 0;
                    if( h->mb.b_trellis )
Fiona Glaser's avatar
Fiona Glaser committed
847
                    {
848 849 850
                        for( int i4x4 = 0; i4x4 < 4; i4x4++ )
                        {
                            int idx = i8x8*4+i4x4;
851
                            if( x264_quant_4x4_trellis( h, dct4x4[idx], quant_cat, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 0, !!p, p*16+idx ) )
852 853
                            {
                                h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4[idx] );
854
                                h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[quant_cat], i_qp );
855 856 857 858 859 860 861 862 863
                                if( i_decimate_8x8 < 6 )
                                    i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[p*16+idx] );
                                h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = 1;
                                nnz8x8 = 1;
                            }
                        }
                    }
                    else
                    {
864
                        nnz8x8 = nz = h->quantf.quant_4x4x4( &dct4x4[i8x8*4], h->quant4_mf[quant_cat][i_qp], h->quant4_bias[quant_cat][i_qp] );
Fiona Glaser's avatar
Fiona Glaser committed
865 866
                        if( nz )
                        {
867 868 869
                            FOREACH_BIT( idx, i8x8*4, nz )
                            {
                                h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4[idx] );
870
                                h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[quant_cat], i_qp );
871 872 873 874
                                if( i_decimate_8x8 < 6 )
                                    i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[p*16+idx] );
                                h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = 1;
                            }
Fiona Glaser's avatar
Fiona Glaser committed
875 876
                        }
                    }
877
                    if( nnz8x8 )
Fiona Glaser's avatar
Fiona Glaser committed
878
                    {
879
                        i_decimate_mb += i_decimate_8x8;
Fiona Glaser's avatar
Fiona Glaser committed
880 881 882 883 884
                        if( i_decimate_8x8 < 4 )
                            STORE_8x8_NNZ( p, i8x8, 0 );
                        else
                            plane_cbp |= 1<<i8x8;
                    }
Laurent Aimar's avatar
Laurent Aimar committed
885 886
                }

887
                if( i_decimate_mb < 6 )
888
                {
889 890 891 892 893 894 895
                    plane_cbp = 0;
                    CLEAR_16x16_NNZ( p );
                }
                else
                {
                    h->mb.i_cbp_luma |= plane_cbp;
                    FOREACH_BIT( i8x8, 0, plane_cbp )
Fiona Glaser's avatar
Fiona Glaser committed
896
                    {
897
                        h->dctf.add8x8_idct( &h->mb.pic.p_fdec[p][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
Fiona Glaser's avatar
Fiona Glaser committed
898
                    }
899
                }
900
            }
Laurent Aimar's avatar
Laurent Aimar committed
901 902 903 904
        }
    }

    /* encode chroma */
Fiona Glaser's avatar
Fiona Glaser committed
905
    if( chroma )
Laurent Aimar's avatar
Laurent Aimar committed
906
    {
Fiona Glaser's avatar
Fiona Glaser committed
907
        if( IS_INTRA( h->mb.i_type ) )
908
        {
Henrik Gramner's avatar
Henrik Gramner committed
909
            int i_mode = h->mb.i_chroma_pred_mode;
Fiona Glaser's avatar
Fiona Glaser committed
910
            if( h->mb.b_lossless )
Henrik Gramner's avatar
Henrik Gramner committed
911
                x264_predict_lossless_chroma( h, i_mode );
Fiona Glaser's avatar
Fiona Glaser committed
912 913
            else
            {
Henrik Gramner's avatar
Henrik Gramner committed
914 915
                h->predict_chroma[i_mode]( h->mb.pic.p_fdec[1] );
                h->predict_chroma[i_mode]( h->mb.pic.p_fdec[2] );
Fiona Glaser's avatar
Fiona Glaser committed
916
            }
917
        }
Laurent Aimar's avatar
Laurent Aimar committed
918

Fiona Glaser's avatar
Fiona Glaser committed
919
        /* encode the 8x8 blocks */
Henrik Gramner's avatar
Henrik Gramner committed
920
        x264_mb_encode_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
Fiona Glaser's avatar
Fiona Glaser committed
921 922 923
    }
    else
        h->mb.i_cbp_chroma = 0;
Laurent Aimar's avatar
Laurent Aimar committed
924 925

    /* store cbp */
Henrik Gramner's avatar
Henrik Gramner committed
926 927
    int cbp = h->mb.i_cbp_chroma << 4 | h->mb.i_cbp_luma;
    if( h->param.b_cabac )
Fiona Glaser's avatar
Fiona Glaser committed
928 929 930
        cbp |= h->mb.cache.non_zero_count[x264_scan8[LUMA_DC    ]] << 8
            |  h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]] << 9
            |  h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] << 10;
Henrik Gramner's avatar
Henrik Gramner committed
931
    h->mb.cbp[h->mb.i_mb_xy] = cbp;
Laurent Aimar's avatar
Laurent Aimar committed
932 933 934 935

    /* Check for P_SKIP
     * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
     *      (if multiple mv give same result)*/
936
    if( !b_force_no_skip )
Laurent Aimar's avatar
Laurent Aimar committed
937
    {
938
        if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
Loren Merritt's avatar
Loren Merritt committed
939
            !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
Fiona Glaser's avatar
Fiona Glaser committed
940
            M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
941
            && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
942
        {
943
            h->mb.i_type = P_SKIP;
Laurent Aimar's avatar
Laurent Aimar committed
944
        }
945

946
        /* Check for B_SKIP */
947
        if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
948 949 950
        {
            h->mb.i_type = B_SKIP;
        }
951
    }
Laurent Aimar's avatar
Laurent Aimar committed
952 953
}

Fiona Glaser's avatar
Fiona Glaser committed
954 955 956 957 958 959 960 961
void x264_macroblock_encode( x264_t *h )
{
    if( CHROMA444 )
        x264_macroblock_encode_internal( h, 3, 0 );
    else
        x264_macroblock_encode_internal( h, 1, 1 );
}

Laurent Aimar's avatar
Laurent Aimar committed
962
/*****************************************************************************
963
 * x264_macroblock_probe_skip:
964
 *  Check if the current MB could be encoded as a [PB]_SKIP
Laurent Aimar's avatar
Laurent Aimar committed
965
 *****************************************************************************/
Fiona Glaser's avatar
Fiona Glaser committed
966
static ALWAYS_INLINE int x264_macroblock_probe_skip_internal( x264_t *h, int b_bidir, int plane_count, int chroma )
Laurent Aimar's avatar
Laurent Aimar committed
967
{
968
    ALIGNED_ARRAY_N( dctcoef, dct4x4,[8],[16] );
969
    ALIGNED_ARRAY_16( dctcoef, dctscan,[16] );
970
    ALIGNED_4( int16_t mvp[2] );
971
    int i_qp = h->mb.i_qp;
Laurent Aimar's avatar
Laurent Aimar committed
972

973
    for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
974
    {