macroblock.c 34.9 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * macroblock.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 8
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23 24
 *****************************************************************************/

25
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
26 27
#include "macroblock.h"

28
#define ZIG(i,y,x) level[i] = dct[x][y];
29
static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[2][2] )
Laurent Aimar's avatar
Laurent Aimar committed
30
{
31 32 33 34
    ZIG(0,0,0)
    ZIG(1,0,1)
    ZIG(2,1,0)
    ZIG(3,1,1)
Laurent Aimar's avatar
Laurent Aimar committed
35
}
36
#undef ZIG
Laurent Aimar's avatar
Laurent Aimar committed
37

38
static ALWAYS_INLINE void x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra, int idx )
39 40 41
{
    int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
    if( h->mb.b_trellis )
42
        x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra, idx );
43 44 45 46
    else
        h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
}

47
static ALWAYS_INLINE void x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra, int idx )
48 49 50
{
    int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
    if( h->mb.b_trellis )
51
        x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
52 53 54 55 56
    else
        h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
}

void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
57
{
58 59
    uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
Loren Merritt's avatar
Loren Merritt committed
60
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
61

Loren Merritt's avatar
Loren Merritt committed
62 63
    if( h->mb.b_lossless )
    {
64
        h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
65 66 67
        return;
    }

68
    h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
69

70
    x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1, idx );
Loren Merritt's avatar
Loren Merritt committed
71

72 73 74
    if( array_non_zero( dct4x4 ) )
    {
        h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
75
        h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
76

77 78 79 80 81
        /* output samples to fdec */
        h->dctf.add4x4_idct( p_dst, dct4x4 );
    }
    else
        memset( h->dct.luma4x4[idx], 0, sizeof(h->dct.luma4x4[idx]));
Laurent Aimar's avatar
Laurent Aimar committed
82 83
}

84
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
85
{
86 87 88 89
    int x = 8 * (idx&1);
    int y = 8 * (idx>>1);
    uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
Loren Merritt's avatar
Loren Merritt committed
90
    DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
91

92 93 94 95 96 97
    if( h->mb.b_lossless )
    {
        h->zigzagf.sub_8x8( h->dct.luma8x8[idx], p_src, p_dst );
        return;
    }

98
    h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
99

100
    x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
Loren Merritt's avatar
Loren Merritt committed
101

102
    h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
103
    h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
104
    h->dctf.add8x8_idct8( p_dst, dct8x8 );
105 106
}

107
static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
108 109 110 111
{
    uint8_t  *p_src = h->mb.pic.p_fenc[0];
    uint8_t  *p_dst = h->mb.pic.p_fdec[0];

112 113
    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
    DECLARE_ALIGNED_16( int16_t dct_dc4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
114 115 116

    int i;

Loren Merritt's avatar
Loren Merritt committed
117 118 119 120
    if( h->mb.b_lossless )
    {
        for( i = 0; i < 16; i++ )
        {
121 122
            int oe = block_idx_xy_fenc[i];
            int od = block_idx_xy_fdec[i];
123
            h->zigzagf.sub_4x4( h->dct.luma4x4[i], p_src+oe, p_dst+od );
124
            dct_dc4x4[0][block_idx_yx_1d[i]] = h->dct.luma4x4[i][0];
Fiona Glaser's avatar
Fiona Glaser committed
125
            h->dct.luma4x4[i][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
126
        }
127
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
Loren Merritt's avatar
Loren Merritt committed
128 129 130
        return;
    }

131
    h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
132 133 134
    for( i = 0; i < 16; i++ )
    {
        /* copy dc coeff */
135
        dct_dc4x4[0][block_idx_xy_1d[i]] = dct4x4[i][0][0];
136
        dct4x4[i][0][0] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
137 138

        /* quant/scan/dequant */
139
        x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1, i );
Loren Merritt's avatar
Loren Merritt committed
140

141
        h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
142
        h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
143 144
    }

145
    h->dctf.dct4x4dc( dct_dc4x4 );
146 147 148 149
    if( h->mb.b_trellis )
        x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1);
    else
        h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
150
    h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
151 152

    /* output samples to fdec */
153
    h->dctf.idct4x4dc( dct_dc4x4 );
154
    x264_mb_dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp );  /* XXX not inversed */
Laurent Aimar's avatar
Laurent Aimar committed
155 156 157 158 159

    /* calculate dct coeffs */
    for( i = 0; i < 16; i++ )
    {
        /* copy dc coeff */
160
        dct4x4[i][0][0] = dct_dc4x4[0][block_idx_xy_1d[i]];
Laurent Aimar's avatar
Laurent Aimar committed
161 162
    }
    /* put pixels to fdec */
163
    h->dctf.add16x16_idct( p_dst, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
164 165
}

166
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
167 168
{
    int i, ch;
169
    int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
Laurent Aimar's avatar
Laurent Aimar committed
170 171 172 173 174 175 176

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
        int i_decimate_score = 0;

Loren Merritt's avatar
Loren Merritt committed
177 178
        DECLARE_ALIGNED_16( int16_t dct2x2[2][2]  );
        DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
179

Loren Merritt's avatar
Loren Merritt committed
180 181 182 183
        if( h->mb.b_lossless )
        {
            for( i = 0; i < 4; i++ )
            {
184 185
                int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
                int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
186
                h->zigzagf.sub_4x4( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od );
Loren Merritt's avatar
Loren Merritt committed
187
                h->dct.chroma_dc[ch][i] = h->dct.luma4x4[16+i+ch*4][0];
Fiona Glaser's avatar
Fiona Glaser committed
188
                h->dct.luma4x4[16+i+ch*4][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
189 190 191
            }
            continue;
        }
Loren Merritt's avatar
Loren Merritt committed
192

193
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
194 195 196 197
        /* calculate dct coeffs */
        for( i = 0; i < 4; i++ )
        {
            /* copy dc coeff */
198
            dct2x2[i>>1][i&1] = dct4x4[i][0][0];
199
            dct4x4[i][0][0] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
200

201 202 203 204
            if( h->mb.b_trellis )
                x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 0 );
            else
                h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
205
            h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
Laurent Aimar's avatar
Laurent Aimar committed
206

207
            if( b_decimate )
208
                i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16+i+ch*4] );
Laurent Aimar's avatar
Laurent Aimar committed
209 210 211
        }

        h->dctf.dct2x2dc( dct2x2 );
212 213 214 215
        if( h->mb.b_trellis )
            x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter );
        else
            h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
216
        zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
Laurent Aimar's avatar
Laurent Aimar committed
217 218 219

        /* output samples to fdec */
        h->dctf.idct2x2dc( dct2x2 );
220
        x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );  /* XXX not inversed */
Laurent Aimar's avatar
Laurent Aimar committed
221

222
        if( b_decimate && i_decimate_score < 7 )
Laurent Aimar's avatar
Laurent Aimar committed
223 224
        {
            /* Near null chroma 8x8 block so make it null (bits saving) */
225
            memset( &h->dct.luma4x4[16+ch*4], 0, 4 * sizeof( *h->dct.luma4x4 ) );
226
            if( !array_non_zero( dct2x2 ) )
227 228
                continue;
            memset( dct4x4, 0, sizeof( dct4x4 ) );
Laurent Aimar's avatar
Laurent Aimar committed
229
        }
230 231 232
        else
        {
            for( i = 0; i < 4; i++ )
233
                h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
234
        }
235 236 237 238
        dct4x4[0][0][0] = dct2x2[0][0];
        dct4x4[1][0][0] = dct2x2[0][1];
        dct4x4[2][0][0] = dct2x2[1][0];
        dct4x4[3][0][0] = dct2x2[1][1];
239
        h->dctf.add8x8_idct( p_dst, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
240
    }
241 242 243 244 245

    /* coded block pattern */
    h->mb.i_cbp_chroma = 0;
    for( i = 0; i < 8; i++ )
    {
246
        int nz = array_non_zero( h->dct.luma4x4[16+i] );
247 248 249 250 251
        h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
        h->mb.i_cbp_chroma |= nz;
    }
    if( h->mb.i_cbp_chroma )
        h->mb.i_cbp_chroma = 2;    /* dc+ac (we can't do only ac) */
252
    else if( array_non_zero( h->dct.chroma_dc ) )
253
        h->mb.i_cbp_chroma = 1;    /* dc only */
Laurent Aimar's avatar
Laurent Aimar committed
254 255
}

256 257 258 259
static void x264_macroblock_encode_skip( x264_t *h )
{
    h->mb.i_cbp_luma = 0x00;
    h->mb.i_cbp_chroma = 0x00;
260
    memset( h->mb.cache.non_zero_count, 0, X264_SCAN8_SIZE );
261 262 263 264
    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = 0;
}

Laurent Aimar's avatar
Laurent Aimar committed
265 266 267 268
/*****************************************************************************
 * x264_macroblock_encode_pskip:
 *  Encode an already marked skip block
 *****************************************************************************/
Loic Le Loarer's avatar
Loic Le Loarer committed
269
static void x264_macroblock_encode_pskip( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
270
{
271 272 273 274
    const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
                                h->mb.mv_min[0], h->mb.mv_max[0] );
    const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
                                h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
275

276
    /* don't do pskip motion compensation if it was already done in macroblock_analyse */
277
    if( !h->mb.b_skip_mc )
278 279 280 281
    {
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
                       mvx, mvy, 16, 16 );
Laurent Aimar's avatar
Laurent Aimar committed
282

283 284 285
        h->mc.mc_chroma( h->mb.pic.p_fdec[1],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
                         mvx, mvy, 8, 8 );
Laurent Aimar's avatar
Laurent Aimar committed
286

287 288 289 290
        h->mc.mc_chroma( h->mb.pic.p_fdec[2],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
                         mvx, mvy, 8, 8 );
    }
Laurent Aimar's avatar
Laurent Aimar committed
291

292
    x264_macroblock_encode_skip( h );
Laurent Aimar's avatar
Laurent Aimar committed
293 294
}

295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362
/*****************************************************************************
 * Intra prediction for predictive lossless mode.
 *****************************************************************************/

/* Note that these functions take a shortcut (mc.copy instead of actual pixel prediction) which assumes
 * that the edge pixels of the reconstructed frame are the same as that of the source frame.  This means
 * they will only work correctly if the neighboring blocks are losslessly coded.  In practice, this means
 * lossless mode cannot be mixed with lossy mode within a frame. */
/* This can be resolved by explicitly copying the edge pixels after doing the mc.copy, but this doesn't
 * need to be done unless we decide to allow mixing lossless and lossy compression. */

void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[1] << h->mb.b_interlaced;
    if( i_mode == I_PRED_CHROMA_V )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-stride, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-stride, stride, 8 );
    }
    else if( i_mode == I_PRED_CHROMA_H )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-1, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-1, stride, 8 );
    }
    else
    {
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
    }
}

void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;

    if( i_mode == I_PRED_4x4_V )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
    else if( i_mode == I_PRED_4x4_H )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-1, stride, 4 );
    else
        h->predict_4x4[i_mode]( p_dst );
}

void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;

    if( i_mode == I_PRED_8x8_V )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
    else if( i_mode == I_PRED_8x8_H )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-1, stride, 8 );
    else
        h->predict_8x8[i_mode]( p_dst, edge );
}

void x264_predict_lossless_16x16( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    if( i_mode == I_PRED_16x16_V )
        h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 );
    else if( i_mode == I_PRED_16x16_H )
        h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-1, stride, 16 );
    else
        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
}

Laurent Aimar's avatar
Laurent Aimar committed
363 364 365 366 367 368
/*****************************************************************************
 * x264_macroblock_encode:
 *****************************************************************************/
void x264_macroblock_encode( x264_t *h )
{
    int i_cbp_dc = 0;
369
    int i_qp = h->mb.i_qp;
370
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
371
    int b_force_no_skip = 0;
372 373
    int i,j,idx;
    uint8_t nnz8x8[4] = {1,1,1,1};
Laurent Aimar's avatar
Laurent Aimar committed
374

375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
    if( h->sh.b_mbaff
        && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
        && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
    {
        /* The first skip is predicted to be a frame mb pair.
         * We don't yet support the aff part of mbaff, so force it to non-skip
         * so that we can pick the aff flag. */
        b_force_no_skip = 1;
        if( IS_SKIP(h->mb.i_type) )
        {
            if( h->mb.i_type == P_SKIP )
                h->mb.i_type = P_L0;
            else if( h->mb.i_type == B_SKIP )
                h->mb.i_type = B_DIRECT;
        }
    }

Laurent Aimar's avatar
Laurent Aimar committed
392 393 394 395 396 397
    if( h->mb.i_type == P_SKIP )
    {
        /* A bit special */
        x264_macroblock_encode_pskip( h );
        return;
    }
398 399
    if( h->mb.i_type == B_SKIP )
    {
400
        /* don't do bskip motion compensation if it was already done in macroblock_analyse */
401
        if( !h->mb.b_skip_mc )
402
            x264_mb_mc( h );
403 404 405
        x264_macroblock_encode_skip( h );
        return;
    }
Laurent Aimar's avatar
Laurent Aimar committed
406 407 408 409

    if( h->mb.i_type == I_16x16 )
    {
        const int i_mode = h->mb.i_intra16x16_pred_mode;
410
        h->mb.b_transform_8x8 = 0;
411 412 413 414 415

        if( h->mb.b_lossless )
            x264_predict_lossless_16x16( h, i_mode );
        else
            h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
Laurent Aimar's avatar
Laurent Aimar committed
416 417

        /* encode the 16x16 macroblock */
418
        x264_mb_encode_i16x16( h, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
419
    }
420 421
    else if( h->mb.i_type == I_8x8 )
    {
Loren Merritt's avatar
Loren Merritt committed
422
        DECLARE_ALIGNED_16( uint8_t edge[33] );
423
        h->mb.b_transform_8x8 = 1;
424 425 426 427 428 429 430 431 432
        /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
                h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
        }
        for( i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
433
        {
434
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
435
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
436
            x264_predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
437 438 439 440 441 442

            if( h->mb.b_lossless )
                x264_predict_lossless_8x8( h, p_dst, i, i_mode, edge );
            else
                h->predict_8x8[i_mode]( p_dst, edge );

443
            x264_mb_encode_i8x8( h, i, i_qp );
444
        }
445 446
        for( i = 0; i < 4; i++ )
            nnz8x8[i] = array_non_zero( h->dct.luma8x8[i] );
447
    }
Laurent Aimar's avatar
Laurent Aimar committed
448 449
    else if( h->mb.i_type == I_4x4 )
    {
450
        h->mb.b_transform_8x8 = 0;
451 452 453 454 455 456
        /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
457
                h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
458 459
        }
        for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
Laurent Aimar's avatar
Laurent Aimar committed
460
        {
461
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i]];
Laurent Aimar's avatar
Laurent Aimar committed
462 463
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];

464 465
            if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                /* emulate missing topright samples */
466
                *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
467

468 469 470 471
            if( h->mb.b_lossless )
                x264_predict_lossless_4x4( h, p_dst, i, i_mode );
            else
                h->predict_4x4[i_mode]( p_dst );
472
            x264_mb_encode_i4x4( h, i, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
473 474 475 476
        }
    }
    else    /* Inter MB */
    {
477
        int i8x8, i4x4;
Laurent Aimar's avatar
Laurent Aimar committed
478 479
        int i_decimate_mb = 0;

480 481 482
        /* Don't repeat motion compensation if it was already done in non-RD transform analysis */
        if( !h->mb.b_skip_mc )
            x264_mb_mc( h );
Laurent Aimar's avatar
Laurent Aimar committed
483

Loren Merritt's avatar
Loren Merritt committed
484 485
        if( h->mb.b_lossless )
        {
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
            if( h->mb.b_transform_8x8 )
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                {
                    int x = 8*(i8x8&1);
                    int y = 8*(i8x8>>1);
                    h->zigzagf.sub_8x8( h->dct.luma8x8[i8x8],
                                        h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
                                        h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
                    nnz8x8[i8x8] = array_non_zero( h->dct.luma8x8[i8x8] );
                }
            else
                for( i4x4 = 0; i4x4 < 16; i4x4++ )
                {
                    h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
                                        h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4x4],
                                        h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4x4] );
                }
Loren Merritt's avatar
Loren Merritt committed
503 504
        }
        else if( h->mb.b_transform_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
505
        {
Loren Merritt's avatar
Loren Merritt committed
506
            DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
507
            b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
508
            h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
509
            h->nr_count[1] += h->mb.b_noise_reduction * 4;
Laurent Aimar's avatar
Laurent Aimar committed
510

511
            for( idx = 0; idx < 4; idx++ )
Laurent Aimar's avatar
Laurent Aimar committed
512
            {
513
                if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
514
                    h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
515
                x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
Laurent Aimar's avatar
Laurent Aimar committed
516

517
                h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
Laurent Aimar's avatar
Laurent Aimar committed
518

519
                if( b_decimate )
520
                {
521
                    int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[idx] );
Loren Merritt's avatar
Loren Merritt committed
522 523
                    i_decimate_mb += i_decimate_8x8;
                    if( i_decimate_8x8 < 4 )
524
                        nnz8x8[idx] = 0;
525
                }
526
                else
527
                    nnz8x8[idx] = array_non_zero( dct8x8[idx] );
Laurent Aimar's avatar
Laurent Aimar committed
528 529
            }

530
            if( i_decimate_mb < 6 && b_decimate )
531
                *(uint32_t*)nnz8x8 = 0;
532
            else
533 534 535 536 537
            {
                for( idx = 0; idx < 4; idx++ )
                    if( nnz8x8[idx] )
                    {
                        h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
538
                        h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][(idx&1)*8 + (idx>>1)*8*FDEC_STRIDE], dct8x8[idx] );
539 540
                    }
            }
541 542 543
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
544
            DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
545
            h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
546
            h->nr_count[0] += h->mb.b_noise_reduction * 16;
547 548

            for( i8x8 = 0; i8x8 < 4; i8x8++ )
Laurent Aimar's avatar
Laurent Aimar committed
549
            {
550 551 552 553
                int i_decimate_8x8;

                /* encode one 4x4 block */
                i_decimate_8x8 = 0;
Laurent Aimar's avatar
Laurent Aimar committed
554 555 556
                for( i4x4 = 0; i4x4 < 4; i4x4++ )
                {
                    idx = i8x8 * 4 + i4x4;
557

558
                    if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
559
                        h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
560
                    x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
Loren Merritt's avatar
Loren Merritt committed
561

562
                    h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
563 564

                    if( b_decimate && i_decimate_8x8 <= 6 )
565
                        i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[idx] );
Laurent Aimar's avatar
Laurent Aimar committed
566 567
                }

568 569
                /* decimate this 8x8 block */
                i_decimate_mb += i_decimate_8x8;
570
                if( i_decimate_8x8 < 4 && b_decimate )
571
                    nnz8x8[i8x8] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
572
            }
573

574
            if( i_decimate_mb < 6 && b_decimate )
575
                *(uint32_t*)nnz8x8 = 0;
576
            else
577 578 579 580 581 582
            {
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                    if( nnz8x8[i8x8] )
                    {
                        for( i = 0; i < 4; i++ )
                            h->quantf.dequant_4x4( dct4x4[i8x8*4+i], h->dequant4_mf[CQM_4PY], i_qp );
583
                        h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
584 585
                    }
            }
Laurent Aimar's avatar
Laurent Aimar committed
586 587 588 589 590 591 592
        }
    }

    /* encode chroma */
    if( IS_INTRA( h->mb.i_type ) )
    {
        const int i_mode = h->mb.i_chroma_pred_mode;
593 594 595 596 597 598 599
        if( h->mb.b_lossless )
            x264_predict_lossless_8x8_chroma( h, i_mode );
        else
        {
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
        }
Laurent Aimar's avatar
Laurent Aimar committed
600 601 602
    }

    /* encode the 8x8 blocks */
603
    x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
Laurent Aimar's avatar
Laurent Aimar committed
604

605
    /* coded block pattern and non_zero_count */
606
    h->mb.i_cbp_luma = 0x00;
Laurent Aimar's avatar
Laurent Aimar committed
607 608 609 610
    if( h->mb.i_type == I_16x16 )
    {
        for( i = 0; i < 16; i++ )
        {
611
            int nz = array_non_zero( h->dct.luma4x4[i] );
Laurent Aimar's avatar
Laurent Aimar committed
612
            h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
613
            h->mb.i_cbp_luma |= nz;
Laurent Aimar's avatar
Laurent Aimar committed
614
        }
615
        h->mb.i_cbp_luma *= 0xf;
Laurent Aimar's avatar
Laurent Aimar committed
616 617 618
    }
    else
    {
619
        for( i = 0; i < 4; i++)
Laurent Aimar's avatar
Laurent Aimar committed
620
        {
621
            if(!nnz8x8[i])
622 623 624 625
            {
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[0+i*4]] = 0;
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[2+i*4]] = 0;
            }
626 627
            else if( h->mb.b_transform_8x8 )
            {
628 629 630
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[0+4*i]] = nnz8x8[i] * 0x0101;
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[2+4*i]] = nnz8x8[i] * 0x0101;
                h->mb.i_cbp_luma |= nnz8x8[i] << i;
631 632 633
            }
            else
            {
634
                int nz, cbp = 0;
635 636
                for( j = 0; j < 4; j++ )
                {
637 638 639
                    nz = array_non_zero( h->dct.luma4x4[j+4*i] );
                    h->mb.cache.non_zero_count[x264_scan8[j+4*i]] = nz;
                    cbp |= nz;
640
                }
641
                h->mb.i_cbp_luma |= cbp << i;
642
            }
Laurent Aimar's avatar
Laurent Aimar committed
643 644 645 646 647
        }
    }

    if( h->param.b_cabac )
    {
648 649 650
        i_cbp_dc = ( h->mb.i_type == I_16x16 && array_non_zero( h->dct.luma16x16_dc ) )
                 | array_non_zero( h->dct.chroma_dc[0] ) << 1
                 | array_non_zero( h->dct.chroma_dc[1] ) << 2;
Laurent Aimar's avatar
Laurent Aimar committed
651 652 653 654 655 656 657 658
    }

    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;

    /* Check for P_SKIP
     * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
     *      (if multiple mv give same result)*/
659
    if( !b_force_no_skip )
Laurent Aimar's avatar
Laurent Aimar committed
660
    {
661
        if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
Loren Merritt's avatar
Loren Merritt committed
662
            !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
663 664
            *(uint32_t*)h->mb.cache.mv[0][x264_scan8[0]] == *(uint32_t*)h->mb.cache.pskip_mv
            && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
665
        {
666
            h->mb.i_type = P_SKIP;
Laurent Aimar's avatar
Laurent Aimar committed
667
        }
668

669
        /* Check for B_SKIP */
670
        if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
671 672 673
        {
            h->mb.i_type = B_SKIP;
        }
674
    }
Laurent Aimar's avatar
Laurent Aimar committed
675 676 677
}

/*****************************************************************************
678 679
 * x264_macroblock_probe_skip:
 *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
Laurent Aimar's avatar
Laurent Aimar committed
680 681
 *  the previous QP
 *****************************************************************************/
682
int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
Laurent Aimar's avatar
Laurent Aimar committed
683
{
Fiona Glaser's avatar
Fiona Glaser committed
684
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Loren Merritt's avatar
Loren Merritt committed
685 686
    DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
    DECLARE_ALIGNED_16( int16_t dctscan[16] );
Laurent Aimar's avatar
Laurent Aimar committed
687

688
    int i_qp = h->mb.i_qp;
Laurent Aimar's avatar
Laurent Aimar committed
689
    int mvp[2];
690
    int ch, thresh;
Laurent Aimar's avatar
Laurent Aimar committed
691 692 693 694

    int i8x8, i4x4;
    int i_decimate_mb;

695 696 697
    if( !b_bidir )
    {
        /* Get the MV */
698 699
        mvp[0] = x264_clip3( h->mb.cache.pskip_mv[0], h->mb.mv_min[0], h->mb.mv_max[0] );
        mvp[1] = x264_clip3( h->mb.cache.pskip_mv[1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
700

701
        /* Motion compensation */
702 703
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
704
                       mvp[0], mvp[1], 16, 16 );
705
    }
Laurent Aimar's avatar
Laurent Aimar committed
706 707 708

    for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
    {
Fiona Glaser's avatar
Fiona Glaser committed
709 710 711 712 713
        int fenc_offset = (i8x8&1) * 8 + (i8x8>>1) * FENC_STRIDE * 8;
        int fdec_offset = (i8x8&1) * 8 + (i8x8>>1) * FDEC_STRIDE * 8;
        /* get luma diff */
        h->dctf.sub8x8_dct( dct4x4, h->mb.pic.p_fenc[0] + fenc_offset,
                                    h->mb.pic.p_fdec[0] + fdec_offset );
Laurent Aimar's avatar
Laurent Aimar committed
714 715 716
        /* encode one 4x4 block */
        for( i4x4 = 0; i4x4 < 4; i4x4++ )
        {
Fiona Glaser's avatar
Fiona Glaser committed
717 718 719 720
            h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
            if( !array_non_zero(dct4x4[i4x4]) )
                continue;
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
721
            i_decimate_mb += h->quantf.decimate_score16( dctscan );
Laurent Aimar's avatar
Laurent Aimar committed
722 723 724 725 726 727
            if( i_decimate_mb >= 6 )
                return 0;
        }
    }

    /* encode chroma */
728
    i_qp = h->mb.i_chroma_qp;
729
    thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
Laurent Aimar's avatar
Laurent Aimar committed
730 731 732 733 734 735

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];

736 737
        if( !b_bidir )
        {
738 739
            h->mc.mc_chroma( h->mb.pic.p_fdec[1+ch],       FDEC_STRIDE,
                             h->mb.pic.p_fref[0][0][4+ch], h->mb.pic.i_stride[1+ch],
740
                             mvp[0], mvp[1], 8, 8 );
741
        }
Laurent Aimar's avatar
Laurent Aimar committed
742

743 744 745 746 747
        /* there is almost never a termination during chroma, but we can't avoid the check entirely */
        /* so instead we check SSD and skip the actual check if the score is low enough. */
        if( h->pixf.ssd[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) < thresh )
            continue;

748
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
749 750 751 752 753 754 755

        /* calculate dct DC */
        dct2x2[0][0] = dct4x4[0][0][0];
        dct2x2[0][1] = dct4x4[1][0][0];
        dct2x2[1][0] = dct4x4[2][0][0];
        dct2x2[1][1] = dct4x4[3][0][0];
        h->dctf.dct2x2dc( dct2x2 );
Loren Merritt's avatar
Loren Merritt committed
756
        h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 );
Fiona Glaser's avatar
Fiona Glaser committed
757
        if( array_non_zero(dct2x2) )
Laurent Aimar's avatar
Laurent Aimar committed
758 759 760 761 762
            return 0;

        /* calculate dct coeffs */
        for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
        {
763
            dct4x4[i4x4][0][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
764
            h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
Fiona Glaser's avatar
Fiona Glaser committed
765 766
            if( !array_non_zero(dct4x4[i4x4]) )
                continue;
767
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
768
            i_decimate_mb += h->quantf.decimate_score15( dctscan );
Laurent Aimar's avatar
Laurent Aimar committed
769 770 771 772 773
            if( i_decimate_mb >= 7 )
                return 0;
        }
    }

774
    h->mb.b_skip_mc = 1;
Laurent Aimar's avatar
Laurent Aimar committed
775 776
    return 1;
}
777 778 779 780 781 782 783 784 785

/****************************************************************************
 * DCT-domain noise reduction / adaptive deadzone
 * from libavcodec
 ****************************************************************************/

void x264_noise_reduction_update( x264_t *h )
{
    int cat, i;
786
    for( cat = 0; cat < 2; cat++ )
787
    {
788
        int size = cat ? 64 : 16;
789
        const uint16_t *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
790

791
        if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
792 793 794 795 796 797 798 799 800 801 802 803 804 805
        {
            for( i = 0; i < size; i++ )
                h->nr_residual_sum[cat][i] >>= 1;
            h->nr_count[cat] >>= 1;
        }

        for( i = 0; i < size; i++ )
            h->nr_offset[cat][i] =
                ((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
                 + h->nr_residual_sum[cat][i]/2)
              / ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
    }
}

806 807 808 809 810 811 812 813 814
/*****************************************************************************
 * RD only; 4 calls to this do not make up for one macroblock_encode.
 * doesn't transform chroma dc.
 *****************************************************************************/
void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
{
    int i_qp = h->mb.i_qp;
    uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
    uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
815
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
816
    int nnz8x8 = 0;
817 818 819 820
    int ch;

    x264_mb_mc_8x8( h, i8 );

821
    if( h->mb.b_lossless )
822
    {
823
        int i4;
824 825
        if( h->mb.b_transform_8x8 )
        {
826
            h->zigzagf.sub_8x8( h->dct.luma8x8[i8], p_fenc, p_fdec );
827 828 829
            nnz8x8 = array_non_zero( h->dct.luma8x8[i8] );
        }
        else
830
        {
831 832 833 834 835 836 837
            for( i4 = i8*4; i4 < i8*4+4; i4++ )
            {
                h->zigzagf.sub_4x4( h->dct.luma4x4[i4],
                                    h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4],
                                    h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4] );
                nnz8x8 |= array_non_zero( h->dct.luma4x4[i4] );
            }
838 839 840 841 842 843 844
        }
        for( ch = 0; ch < 2; ch++ )
        {
            p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE;
            p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
            h->zigzagf.sub_4x4( h->dct.luma4x4[16+i8+ch*4], p_fenc, p_fdec );
            h->dct.luma4x4[16+i8+ch*4][0] = 0;
845 846 847 848
        }
    }
    else
    {
849
        if( h->mb.b_transform_8x8 )
850
        {