macroblock.c 34.2 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * macroblock.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 8
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23 24
 *****************************************************************************/

25
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
26 27
#include "macroblock.h"

28
#define ZIG(i,y,x) level[i] = dct[x][y];
29
static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[2][2] )
Laurent Aimar's avatar
Laurent Aimar committed
30
{
31 32 33 34
    ZIG(0,0,0)
    ZIG(1,0,1)
    ZIG(2,1,0)
    ZIG(3,1,1)
Laurent Aimar's avatar
Laurent Aimar committed
35
}
36
#undef ZIG
Laurent Aimar's avatar
Laurent Aimar committed
37 38 39 40 41 42 43 44 45

/* (ref: JVT-B118)
 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
 * to 0 (low score means set it to null)
 * Used in inter macroblock (luma and chroma)
 *  luma: for a 8x8 block: if score < 4 -> null
 *        for the complete mb: if score < 6 -> null
 *  chroma: for the complete mb: if score < 7 -> null
 */
46
static int x264_mb_decimate_score( int16_t *dct, int i_max )
Laurent Aimar's avatar
Laurent Aimar committed
47
{
48 49 50 51 52 53 54 55 56
    static const int i_ds_table4[16] = {
        3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
    static const int i_ds_table8[64] = {
        3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
        1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };

    const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
Laurent Aimar's avatar
Laurent Aimar committed
57 58 59 60 61 62 63 64 65 66
    int i_score = 0;
    int idx = i_max - 1;

    while( idx >= 0 && dct[idx] == 0 )
        idx--;

    while( idx >= 0 )
    {
        int i_run;

67
        if( (unsigned)(dct[idx--] + 1) > 2 )
Laurent Aimar's avatar
Laurent Aimar committed
68 69 70 71 72 73 74 75
            return 9;

        i_run = 0;
        while( idx >= 0 && dct[idx] == 0 )
        {
            idx--;
            i_run++;
        }
76
        i_score += ds_table[i_run];
Laurent Aimar's avatar
Laurent Aimar committed
77 78 79 80 81
    }

    return i_score;
}

82
static ALWAYS_INLINE void x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra, int idx )
83 84 85
{
    int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
    if( h->mb.b_trellis )
86
        x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra, idx );
87 88 89 90
    else
        h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
}

91
static ALWAYS_INLINE void x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra, int idx )
92 93 94
{
    int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
    if( h->mb.b_trellis )
95
        x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
96 97 98 99 100
    else
        h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
}

void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
101
{
102 103
    uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
Loren Merritt's avatar
Loren Merritt committed
104
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
105

Loren Merritt's avatar
Loren Merritt committed
106 107
    if( h->mb.b_lossless )
    {
108
        h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
109 110 111
        return;
    }

112
    h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
113

114
    x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1, idx );
Loren Merritt's avatar
Loren Merritt committed
115

116 117 118
    if( array_non_zero( dct4x4 ) )
    {
        h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
119
        h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
120

121 122 123 124 125
        /* output samples to fdec */
        h->dctf.add4x4_idct( p_dst, dct4x4 );
    }
    else
        memset( h->dct.luma4x4[idx], 0, sizeof(h->dct.luma4x4[idx]));
Laurent Aimar's avatar
Laurent Aimar committed
126 127
}

128
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
129
{
130 131 132 133
    int x = 8 * (idx&1);
    int y = 8 * (idx>>1);
    uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
Loren Merritt's avatar
Loren Merritt committed
134
    DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
135

136 137 138 139 140 141
    if( h->mb.b_lossless )
    {
        h->zigzagf.sub_8x8( h->dct.luma8x8[idx], p_src, p_dst );
        return;
    }

142
    h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
143

144
    x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
Loren Merritt's avatar
Loren Merritt committed
145

146
    h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
147
    h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
148
    h->dctf.add8x8_idct8( p_dst, dct8x8 );
149 150
}

151
static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
152 153 154 155
{
    uint8_t  *p_src = h->mb.pic.p_fenc[0];
    uint8_t  *p_dst = h->mb.pic.p_fdec[0];

156 157
    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
    DECLARE_ALIGNED_16( int16_t dct_dc4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
158 159 160

    int i;

Loren Merritt's avatar
Loren Merritt committed
161 162 163 164
    if( h->mb.b_lossless )
    {
        for( i = 0; i < 16; i++ )
        {
165 166
            int oe = block_idx_xy_fenc[i];
            int od = block_idx_xy_fdec[i];
167
            h->zigzagf.sub_4x4( h->dct.luma4x4[i], p_src+oe, p_dst+od );
168
            dct_dc4x4[0][block_idx_yx_1d[i]] = h->dct.luma4x4[i][0];
Fiona Glaser's avatar
Fiona Glaser committed
169
            h->dct.luma4x4[i][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
170
        }
171
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
Loren Merritt's avatar
Loren Merritt committed
172 173 174
        return;
    }

175
    h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
176 177 178
    for( i = 0; i < 16; i++ )
    {
        /* copy dc coeff */
179
        dct_dc4x4[0][block_idx_xy_1d[i]] = dct4x4[i][0][0];
180
        dct4x4[i][0][0] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
181 182

        /* quant/scan/dequant */
183
        x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1, i );
Loren Merritt's avatar
Loren Merritt committed
184

185
        h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
186
        h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
187 188
    }

189
    h->dctf.dct4x4dc( dct_dc4x4 );
190
    h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
191
    h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
192 193

    /* output samples to fdec */
194
    h->dctf.idct4x4dc( dct_dc4x4 );
195
    x264_mb_dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp );  /* XXX not inversed */
Laurent Aimar's avatar
Laurent Aimar committed
196 197 198 199 200

    /* calculate dct coeffs */
    for( i = 0; i < 16; i++ )
    {
        /* copy dc coeff */
201
        dct4x4[i][0][0] = dct_dc4x4[0][block_idx_xy_1d[i]];
Laurent Aimar's avatar
Laurent Aimar committed
202 203
    }
    /* put pixels to fdec */
204
    h->dctf.add16x16_idct( p_dst, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
205 206
}

207
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
208 209
{
    int i, ch;
210
    int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
Laurent Aimar's avatar
Laurent Aimar committed
211 212 213 214 215 216 217

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
        int i_decimate_score = 0;

Loren Merritt's avatar
Loren Merritt committed
218 219
        DECLARE_ALIGNED_16( int16_t dct2x2[2][2]  );
        DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
220

Loren Merritt's avatar
Loren Merritt committed
221 222 223 224
        if( h->mb.b_lossless )
        {
            for( i = 0; i < 4; i++ )
            {
225 226
                int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
                int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
227
                h->zigzagf.sub_4x4( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od );
Loren Merritt's avatar
Loren Merritt committed
228
                h->dct.chroma_dc[ch][i] = h->dct.luma4x4[16+i+ch*4][0];
Fiona Glaser's avatar
Fiona Glaser committed
229
                h->dct.luma4x4[16+i+ch*4][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
230 231 232
            }
            continue;
        }
Loren Merritt's avatar
Loren Merritt committed
233

234
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
235 236 237 238
        /* calculate dct coeffs */
        for( i = 0; i < 4; i++ )
        {
            /* copy dc coeff */
239
            dct2x2[i>>1][i&1] = dct4x4[i][0][0];
240
            dct4x4[i][0][0] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
241

Loren Merritt's avatar
Loren Merritt committed
242
            /* no trellis; it doesn't seem to help chroma noticeably */
243
            h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
244
            h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
Laurent Aimar's avatar
Laurent Aimar committed
245

246
            if( b_decimate )
247
                i_decimate_score += x264_mb_decimate_score( h->dct.luma4x4[16+i+ch*4]+1, 15 );
Laurent Aimar's avatar
Laurent Aimar committed
248 249 250
        }

        h->dctf.dct2x2dc( dct2x2 );
251
        h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
252
        zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
Laurent Aimar's avatar
Laurent Aimar committed
253 254 255

        /* output samples to fdec */
        h->dctf.idct2x2dc( dct2x2 );
256
        x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );  /* XXX not inversed */
Laurent Aimar's avatar
Laurent Aimar committed
257

258
        if( b_decimate && i_decimate_score < 7 )
Laurent Aimar's avatar
Laurent Aimar committed
259 260
        {
            /* Near null chroma 8x8 block so make it null (bits saving) */
261
            memset( &h->dct.luma4x4[16+ch*4], 0, 4 * sizeof( *h->dct.luma4x4 ) );
262
            if( !array_non_zero( dct2x2 ) )
263 264
                continue;
            memset( dct4x4, 0, sizeof( dct4x4 ) );
Laurent Aimar's avatar
Laurent Aimar committed
265
        }
266 267 268
        else
        {
            for( i = 0; i < 4; i++ )
269
                h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
270
        }
271 272 273 274
        dct4x4[0][0][0] = dct2x2[0][0];
        dct4x4[1][0][0] = dct2x2[0][1];
        dct4x4[2][0][0] = dct2x2[1][0];
        dct4x4[3][0][0] = dct2x2[1][1];
275
        h->dctf.add8x8_idct( p_dst, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
276
    }
277 278 279 280 281

    /* coded block pattern */
    h->mb.i_cbp_chroma = 0;
    for( i = 0; i < 8; i++ )
    {
282
        int nz = array_non_zero( h->dct.luma4x4[16+i] );
283 284 285 286 287
        h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
        h->mb.i_cbp_chroma |= nz;
    }
    if( h->mb.i_cbp_chroma )
        h->mb.i_cbp_chroma = 2;    /* dc+ac (we can't do only ac) */
288
    else if( array_non_zero( h->dct.chroma_dc ) )
289
        h->mb.i_cbp_chroma = 1;    /* dc only */
Laurent Aimar's avatar
Laurent Aimar committed
290 291
}

292 293 294 295
static void x264_macroblock_encode_skip( x264_t *h )
{
    h->mb.i_cbp_luma = 0x00;
    h->mb.i_cbp_chroma = 0x00;
296
    memset( h->mb.cache.non_zero_count, 0, X264_SCAN8_SIZE );
297 298 299 300
    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = 0;
}

Laurent Aimar's avatar
Laurent Aimar committed
301 302 303 304
/*****************************************************************************
 * x264_macroblock_encode_pskip:
 *  Encode an already marked skip block
 *****************************************************************************/
Loic Le Loarer's avatar
Loic Le Loarer committed
305
static void x264_macroblock_encode_pskip( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
306
{
307 308 309 310
    const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
                                h->mb.mv_min[0], h->mb.mv_max[0] );
    const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
                                h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
311

312
    /* don't do pskip motion compensation if it was already done in macroblock_analyse */
313
    if( !h->mb.b_skip_mc )
314 315 316 317
    {
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
                       mvx, mvy, 16, 16 );
Laurent Aimar's avatar
Laurent Aimar committed
318

319 320 321
        h->mc.mc_chroma( h->mb.pic.p_fdec[1],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
                         mvx, mvy, 8, 8 );
Laurent Aimar's avatar
Laurent Aimar committed
322

323 324 325 326
        h->mc.mc_chroma( h->mb.pic.p_fdec[2],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
                         mvx, mvy, 8, 8 );
    }
Laurent Aimar's avatar
Laurent Aimar committed
327

328
    x264_macroblock_encode_skip( h );
Laurent Aimar's avatar
Laurent Aimar committed
329 330
}

331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
/*****************************************************************************
 * Intra prediction for predictive lossless mode.
 *****************************************************************************/

/* Note that these functions take a shortcut (mc.copy instead of actual pixel prediction) which assumes
 * that the edge pixels of the reconstructed frame are the same as that of the source frame.  This means
 * they will only work correctly if the neighboring blocks are losslessly coded.  In practice, this means
 * lossless mode cannot be mixed with lossy mode within a frame. */
/* This can be resolved by explicitly copying the edge pixels after doing the mc.copy, but this doesn't
 * need to be done unless we decide to allow mixing lossless and lossy compression. */

void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[1] << h->mb.b_interlaced;
    if( i_mode == I_PRED_CHROMA_V )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-stride, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-stride, stride, 8 );
    }
    else if( i_mode == I_PRED_CHROMA_H )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-1, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-1, stride, 8 );
    }
    else
    {
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
    }
}

void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;

    if( i_mode == I_PRED_4x4_V )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
    else if( i_mode == I_PRED_4x4_H )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-1, stride, 4 );
    else
        h->predict_4x4[i_mode]( p_dst );
}

void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;

    if( i_mode == I_PRED_8x8_V )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
    else if( i_mode == I_PRED_8x8_H )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-1, stride, 8 );
    else
        h->predict_8x8[i_mode]( p_dst, edge );
}

void x264_predict_lossless_16x16( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    if( i_mode == I_PRED_16x16_V )
        h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 );
    else if( i_mode == I_PRED_16x16_H )
        h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-1, stride, 16 );
    else
        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
}

Laurent Aimar's avatar
Laurent Aimar committed
399 400 401 402 403 404
/*****************************************************************************
 * x264_macroblock_encode:
 *****************************************************************************/
void x264_macroblock_encode( x264_t *h )
{
    int i_cbp_dc = 0;
405
    int i_qp = h->mb.i_qp;
406
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
407
    int b_force_no_skip = 0;
408 409
    int i,j,idx;
    uint8_t nnz8x8[4] = {1,1,1,1};
Laurent Aimar's avatar
Laurent Aimar committed
410

411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
    if( h->sh.b_mbaff
        && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
        && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
    {
        /* The first skip is predicted to be a frame mb pair.
         * We don't yet support the aff part of mbaff, so force it to non-skip
         * so that we can pick the aff flag. */
        b_force_no_skip = 1;
        if( IS_SKIP(h->mb.i_type) )
        {
            if( h->mb.i_type == P_SKIP )
                h->mb.i_type = P_L0;
            else if( h->mb.i_type == B_SKIP )
                h->mb.i_type = B_DIRECT;
        }
    }

Laurent Aimar's avatar
Laurent Aimar committed
428 429 430 431 432 433
    if( h->mb.i_type == P_SKIP )
    {
        /* A bit special */
        x264_macroblock_encode_pskip( h );
        return;
    }
434 435
    if( h->mb.i_type == B_SKIP )
    {
436
        /* don't do bskip motion compensation if it was already done in macroblock_analyse */
437
        if( !h->mb.b_skip_mc )
438
            x264_mb_mc( h );
439 440 441
        x264_macroblock_encode_skip( h );
        return;
    }
Laurent Aimar's avatar
Laurent Aimar committed
442 443 444 445

    if( h->mb.i_type == I_16x16 )
    {
        const int i_mode = h->mb.i_intra16x16_pred_mode;
446
        h->mb.b_transform_8x8 = 0;
447 448 449 450 451

        if( h->mb.b_lossless )
            x264_predict_lossless_16x16( h, i_mode );
        else
            h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
Laurent Aimar's avatar
Laurent Aimar committed
452 453

        /* encode the 16x16 macroblock */
454
        x264_mb_encode_i16x16( h, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
455
    }
456 457
    else if( h->mb.i_type == I_8x8 )
    {
Loren Merritt's avatar
Loren Merritt committed
458
        DECLARE_ALIGNED_16( uint8_t edge[33] );
459
        h->mb.b_transform_8x8 = 1;
460 461 462 463 464 465 466 467 468
        /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
                h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
        }
        for( i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
469
        {
470
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
471
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
472
            x264_predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
473 474 475 476 477 478

            if( h->mb.b_lossless )
                x264_predict_lossless_8x8( h, p_dst, i, i_mode, edge );
            else
                h->predict_8x8[i_mode]( p_dst, edge );

479
            x264_mb_encode_i8x8( h, i, i_qp );
480
        }
481 482
        for( i = 0; i < 4; i++ )
            nnz8x8[i] = array_non_zero( h->dct.luma8x8[i] );
483
    }
Laurent Aimar's avatar
Laurent Aimar committed
484 485
    else if( h->mb.i_type == I_4x4 )
    {
486
        h->mb.b_transform_8x8 = 0;
487 488 489 490 491 492
        /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
493
                h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
494 495
        }
        for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
Laurent Aimar's avatar
Laurent Aimar committed
496
        {
497
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i]];
Laurent Aimar's avatar
Laurent Aimar committed
498 499
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];

500 501
            if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                /* emulate missing topright samples */
502
                *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
503

504 505 506 507
            if( h->mb.b_lossless )
                x264_predict_lossless_4x4( h, p_dst, i, i_mode );
            else
                h->predict_4x4[i_mode]( p_dst );
508
            x264_mb_encode_i4x4( h, i, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
509 510 511 512
        }
    }
    else    /* Inter MB */
    {
513
        int i8x8, i4x4;
Laurent Aimar's avatar
Laurent Aimar committed
514 515
        int i_decimate_mb = 0;

516 517 518
        /* Don't repeat motion compensation if it was already done in non-RD transform analysis */
        if( !h->mb.b_skip_mc )
            x264_mb_mc( h );
Laurent Aimar's avatar
Laurent Aimar committed
519

Loren Merritt's avatar
Loren Merritt committed
520 521
        if( h->mb.b_lossless )
        {
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
            if( h->mb.b_transform_8x8 )
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                {
                    int x = 8*(i8x8&1);
                    int y = 8*(i8x8>>1);
                    h->zigzagf.sub_8x8( h->dct.luma8x8[i8x8],
                                        h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
                                        h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
                    nnz8x8[i8x8] = array_non_zero( h->dct.luma8x8[i8x8] );
                }
            else
                for( i4x4 = 0; i4x4 < 16; i4x4++ )
                {
                    h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
                                        h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4x4],
                                        h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4x4] );
                }
Loren Merritt's avatar
Loren Merritt committed
539 540
        }
        else if( h->mb.b_transform_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
541
        {
Loren Merritt's avatar
Loren Merritt committed
542
            DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
543
            b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
544
            h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
545
            h->nr_count[1] += h->mb.b_noise_reduction * 4;
Laurent Aimar's avatar
Laurent Aimar committed
546

547
            for( idx = 0; idx < 4; idx++ )
Laurent Aimar's avatar
Laurent Aimar committed
548
            {
549
                if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
550
                    h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
551
                x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
Laurent Aimar's avatar
Laurent Aimar committed
552

553
                h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
Laurent Aimar's avatar
Laurent Aimar committed
554

555
                if( b_decimate )
556
                {
Loren Merritt's avatar
Loren Merritt committed
557 558 559
                    int i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
                    i_decimate_mb += i_decimate_8x8;
                    if( i_decimate_8x8 < 4 )
560
                        nnz8x8[idx] = 0;
561
                }
562
                else
563
                    nnz8x8[idx] = array_non_zero( dct8x8[idx] );
Laurent Aimar's avatar
Laurent Aimar committed
564 565
            }

566
            if( i_decimate_mb < 6 && b_decimate )
567
                *(uint32_t*)nnz8x8 = 0;
568
            else
569 570 571 572 573
            {
                for( idx = 0; idx < 4; idx++ )
                    if( nnz8x8[idx] )
                    {
                        h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
574
                        h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][(idx&1)*8 + (idx>>1)*8*FDEC_STRIDE], dct8x8[idx] );
575 576
                    }
            }
577 578 579
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
580
            DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
581
            h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
582
            h->nr_count[0] += h->mb.b_noise_reduction * 16;
583 584

            for( i8x8 = 0; i8x8 < 4; i8x8++ )
Laurent Aimar's avatar
Laurent Aimar committed
585
            {
586 587 588 589
                int i_decimate_8x8;

                /* encode one 4x4 block */
                i_decimate_8x8 = 0;
Laurent Aimar's avatar
Laurent Aimar committed
590 591 592
                for( i4x4 = 0; i4x4 < 4; i4x4++ )
                {
                    idx = i8x8 * 4 + i4x4;
593

594
                    if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
595
                        h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
596
                    x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
Loren Merritt's avatar
Loren Merritt committed
597

598
                    h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
599 600

                    if( b_decimate && i_decimate_8x8 <= 6 )
601
                        i_decimate_8x8 += x264_mb_decimate_score( h->dct.luma4x4[idx], 16 );
Laurent Aimar's avatar
Laurent Aimar committed
602 603
                }

604 605
                /* decimate this 8x8 block */
                i_decimate_mb += i_decimate_8x8;
606
                if( i_decimate_8x8 < 4 && b_decimate )
607
                    nnz8x8[i8x8] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
608
            }
609

610
            if( i_decimate_mb < 6 && b_decimate )
611
                *(uint32_t*)nnz8x8 = 0;
612
            else
613 614 615 616 617 618
            {
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                    if( nnz8x8[i8x8] )
                    {
                        for( i = 0; i < 4; i++ )
                            h->quantf.dequant_4x4( dct4x4[i8x8*4+i], h->dequant4_mf[CQM_4PY], i_qp );
619
                        h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
620 621
                    }
            }
Laurent Aimar's avatar
Laurent Aimar committed
622 623 624 625 626 627 628
        }
    }

    /* encode chroma */
    if( IS_INTRA( h->mb.i_type ) )
    {
        const int i_mode = h->mb.i_chroma_pred_mode;
629 630 631 632 633 634 635
        if( h->mb.b_lossless )
            x264_predict_lossless_8x8_chroma( h, i_mode );
        else
        {
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
        }
Laurent Aimar's avatar
Laurent Aimar committed
636 637 638
    }

    /* encode the 8x8 blocks */
639
    x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
Laurent Aimar's avatar
Laurent Aimar committed
640

641
    /* coded block pattern and non_zero_count */
642
    h->mb.i_cbp_luma = 0x00;
Laurent Aimar's avatar
Laurent Aimar committed
643 644 645 646
    if( h->mb.i_type == I_16x16 )
    {
        for( i = 0; i < 16; i++ )
        {
647
            int nz = array_non_zero( h->dct.luma4x4[i] );
Laurent Aimar's avatar
Laurent Aimar committed
648
            h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
649
            h->mb.i_cbp_luma |= nz;
Laurent Aimar's avatar
Laurent Aimar committed
650
        }
651
        h->mb.i_cbp_luma *= 0xf;
Laurent Aimar's avatar
Laurent Aimar committed
652 653 654
    }
    else
    {
655
        for( i = 0; i < 4; i++)
Laurent Aimar's avatar
Laurent Aimar committed
656
        {
657
            if(!nnz8x8[i])
658 659 660 661
            {
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[0+i*4]] = 0;
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[2+i*4]] = 0;
            }
662 663
            else if( h->mb.b_transform_8x8 )
            {
664 665 666
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[0+4*i]] = nnz8x8[i] * 0x0101;
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[2+4*i]] = nnz8x8[i] * 0x0101;
                h->mb.i_cbp_luma |= nnz8x8[i] << i;
667 668 669
            }
            else
            {
670
                int nz, cbp = 0;
671 672
                for( j = 0; j < 4; j++ )
                {
673 674 675
                    nz = array_non_zero( h->dct.luma4x4[j+4*i] );
                    h->mb.cache.non_zero_count[x264_scan8[j+4*i]] = nz;
                    cbp |= nz;
676
                }
677
                h->mb.i_cbp_luma |= cbp << i;
678
            }
Laurent Aimar's avatar
Laurent Aimar committed
679 680 681 682 683
        }
    }

    if( h->param.b_cabac )
    {
684 685 686
        i_cbp_dc = ( h->mb.i_type == I_16x16 && array_non_zero( h->dct.luma16x16_dc ) )
                 | array_non_zero( h->dct.chroma_dc[0] ) << 1
                 | array_non_zero( h->dct.chroma_dc[1] ) << 2;
Laurent Aimar's avatar
Laurent Aimar committed
687 688 689 690 691 692 693 694
    }

    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;

    /* Check for P_SKIP
     * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
     *      (if multiple mv give same result)*/
695
    if( !b_force_no_skip )
Laurent Aimar's avatar
Laurent Aimar committed
696
    {
697
        if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
Loren Merritt's avatar
Loren Merritt committed
698
            !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
699 700
            *(uint32_t*)h->mb.cache.mv[0][x264_scan8[0]] == *(uint32_t*)h->mb.cache.pskip_mv
            && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
701
        {
702
            h->mb.i_type = P_SKIP;
Laurent Aimar's avatar
Laurent Aimar committed
703
        }
704

705
        /* Check for B_SKIP */
706
        if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
707 708 709
        {
            h->mb.i_type = B_SKIP;
        }
710
    }
Laurent Aimar's avatar
Laurent Aimar committed
711 712 713
}

/*****************************************************************************
714 715
 * x264_macroblock_probe_skip:
 *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
Laurent Aimar's avatar
Laurent Aimar committed
716 717
 *  the previous QP
 *****************************************************************************/
718
int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
Laurent Aimar's avatar
Laurent Aimar committed
719
{
Fiona Glaser's avatar
Fiona Glaser committed
720
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Loren Merritt's avatar
Loren Merritt committed
721 722
    DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
    DECLARE_ALIGNED_16( int16_t dctscan[16] );
Laurent Aimar's avatar
Laurent Aimar committed
723

724
    int i_qp = h->mb.i_qp;
Laurent Aimar's avatar
Laurent Aimar committed
725
    int mvp[2];
726
    int ch, thresh;
Laurent Aimar's avatar
Laurent Aimar committed
727 728 729 730

    int i8x8, i4x4;
    int i_decimate_mb;

731 732 733
    if( !b_bidir )
    {
        /* Get the MV */
734 735
        mvp[0] = x264_clip3( h->mb.cache.pskip_mv[0], h->mb.mv_min[0], h->mb.mv_max[0] );
        mvp[1] = x264_clip3( h->mb.cache.pskip_mv[1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
736

737
        /* Motion compensation */
738 739
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
740
                       mvp[0], mvp[1], 16, 16 );
741
    }
Laurent Aimar's avatar
Laurent Aimar committed
742 743 744

    for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
    {
Fiona Glaser's avatar
Fiona Glaser committed
745 746 747 748 749
        int fenc_offset = (i8x8&1) * 8 + (i8x8>>1) * FENC_STRIDE * 8;
        int fdec_offset = (i8x8&1) * 8 + (i8x8>>1) * FDEC_STRIDE * 8;
        /* get luma diff */
        h->dctf.sub8x8_dct( dct4x4, h->mb.pic.p_fenc[0] + fenc_offset,
                                    h->mb.pic.p_fdec[0] + fdec_offset );
Laurent Aimar's avatar
Laurent Aimar committed
750 751 752
        /* encode one 4x4 block */
        for( i4x4 = 0; i4x4 < 4; i4x4++ )
        {
Fiona Glaser's avatar
Fiona Glaser committed
753 754 755 756
            h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
            if( !array_non_zero(dct4x4[i4x4]) )
                continue;
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
Laurent Aimar's avatar
Laurent Aimar committed
757 758 759 760 761 762 763
            i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
            if( i_decimate_mb >= 6 )
                return 0;
        }
    }

    /* encode chroma */
764
    i_qp = h->mb.i_chroma_qp;
765
    thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
Laurent Aimar's avatar
Laurent Aimar committed
766 767 768 769 770 771

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];

772 773
        if( !b_bidir )
        {
774 775
            h->mc.mc_chroma( h->mb.pic.p_fdec[1+ch],       FDEC_STRIDE,
                             h->mb.pic.p_fref[0][0][4+ch], h->mb.pic.i_stride[1+ch],
776
                             mvp[0], mvp[1], 8, 8 );
777
        }
Laurent Aimar's avatar
Laurent Aimar committed
778

779 780 781 782 783
        /* there is almost never a termination during chroma, but we can't avoid the check entirely */
        /* so instead we check SSD and skip the actual check if the score is low enough. */
        if( h->pixf.ssd[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) < thresh )
            continue;

784
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
785 786 787 788 789 790 791

        /* calculate dct DC */
        dct2x2[0][0] = dct4x4[0][0][0];
        dct2x2[0][1] = dct4x4[1][0][0];
        dct2x2[1][0] = dct4x4[2][0][0];
        dct2x2[1][1] = dct4x4[3][0][0];
        h->dctf.dct2x2dc( dct2x2 );
Loren Merritt's avatar
Loren Merritt committed
792
        h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 );
Fiona Glaser's avatar
Fiona Glaser committed
793
        if( array_non_zero(dct2x2) )
Laurent Aimar's avatar
Laurent Aimar committed
794 795 796 797 798
            return 0;

        /* calculate dct coeffs */
        for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
        {
Loren Merritt's avatar
Loren Merritt committed
799
            h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
Fiona Glaser's avatar
Fiona Glaser committed
800 801
            if( !array_non_zero(dct4x4[i4x4]) )
                continue;
802 803
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
            i_decimate_mb += x264_mb_decimate_score( dctscan+1, 15 );
Laurent Aimar's avatar
Laurent Aimar committed
804 805 806 807 808
            if( i_decimate_mb >= 7 )
                return 0;
        }
    }

809
    h->mb.b_skip_mc = 1;
Laurent Aimar's avatar
Laurent Aimar committed
810 811
    return 1;
}
812 813 814 815 816 817 818 819 820

/****************************************************************************
 * DCT-domain noise reduction / adaptive deadzone
 * from libavcodec
 ****************************************************************************/

void x264_noise_reduction_update( x264_t *h )
{
    int cat, i;
821
    for( cat = 0; cat < 2; cat++ )
822
    {
823
        int size = cat ? 64 : 16;
824
        const uint16_t *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
825

826
        if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
827 828 829 830 831 832 833 834 835 836 837 838 839 840
        {
            for( i = 0; i < size; i++ )
                h->nr_residual_sum[cat][i] >>= 1;
            h->nr_count[cat] >>= 1;
        }

        for( i = 0; i < size; i++ )
            h->nr_offset[cat][i] =
                ((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
                 + h->nr_residual_sum[cat][i]/2)
              / ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
    }
}

841 842 843 844 845 846 847 848 849
/*****************************************************************************
 * RD only; 4 calls to this do not make up for one macroblock_encode.
 * doesn't transform chroma dc.
 *****************************************************************************/
void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
{
    int i_qp = h->mb.i_qp;
    uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
    uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
850
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
851
    int nnz8x8 = 0;
852 853 854 855
    int ch;

    x264_mb_mc_8x8( h, i8 );

856
    if( h->mb.b_lossless )
857
    {
858
        int i4;
859 860 861 862 863 864
        if( h->mb.b_transform_8x8 )
        {
            h->zigzagf.sub_4x4( h->dct.luma4x4[i8], p_fenc, p_fdec );
            nnz8x8 = array_non_zero( h->dct.luma8x8[i8] );
        }
        else
865
        {
Fiona Glaser's avatar