macroblock.c 35.6 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * macroblock.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 8
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23 24
 *****************************************************************************/

25
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
26 27
#include "macroblock.h"

Fiona Glaser's avatar
Fiona Glaser committed
28 29
/* These chroma DC functions don't have assembly versions and are only used here. */

30
#define ZIG(i,y,x) level[i] = dct[x][y];
31
static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[2][2] )
Laurent Aimar's avatar
Laurent Aimar committed
32
{
33 34 35 36
    ZIG(0,0,0)
    ZIG(1,0,1)
    ZIG(2,1,0)
    ZIG(3,1,1)
Laurent Aimar's avatar
Laurent Aimar committed
37
}
38
#undef ZIG
Laurent Aimar's avatar
Laurent Aimar committed
39

Fiona Glaser's avatar
Fiona Glaser committed
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
static inline void idct_dequant_2x2_dc( int16_t dct[2][2], int16_t dct4x4[4][4][4], int dequant_mf[6][4][4], int i_qp )
{
    int d0 = dct[0][0] + dct[0][1];
    int d1 = dct[1][0] + dct[1][1];
    int d2 = dct[0][0] - dct[0][1];
    int d3 = dct[1][0] - dct[1][1];
    int dmf = dequant_mf[i_qp%6][0][0];
    int qbits = i_qp/6 - 5;
    if( qbits > 0 )
    {
        dmf <<= qbits;
        qbits = 0;
    }
    dct4x4[0][0][0] = (d0 + d1) * dmf >> -qbits;
    dct4x4[1][0][0] = (d0 - d1) * dmf >> -qbits;
    dct4x4[2][0][0] = (d2 + d3) * dmf >> -qbits;
    dct4x4[3][0][0] = (d2 - d3) * dmf >> -qbits;
}

static inline void dct2x2dc( int16_t d[2][2], int16_t dct4x4[4][4][4] )
{
    int d0 = dct4x4[0][0][0] + dct4x4[1][0][0];
    int d1 = dct4x4[2][0][0] + dct4x4[3][0][0];
    int d2 = dct4x4[0][0][0] - dct4x4[1][0][0];
    int d3 = dct4x4[2][0][0] - dct4x4[3][0][0];
    d[0][0] = d0 + d1;
    d[1][0] = d2 + d3;
    d[0][1] = d0 - d1;
    d[1][1] = d2 - d3;
    dct4x4[0][0][0] = 0;
    dct4x4[1][0][0] = 0;
    dct4x4[2][0][0] = 0;
    dct4x4[3][0][0] = 0;
}

75
static ALWAYS_INLINE void x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra, int idx )
76 77 78
{
    int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
    if( h->mb.b_trellis )
79
        x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra, idx );
80 81 82 83
    else
        h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
}

84
static ALWAYS_INLINE void x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra, int idx )
85 86 87
{
    int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
    if( h->mb.b_trellis )
88
        x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
89 90 91 92 93
    else
        h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
}

void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
94
{
95 96
    uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
Loren Merritt's avatar
Loren Merritt committed
97
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
98

Loren Merritt's avatar
Loren Merritt committed
99 100
    if( h->mb.b_lossless )
    {
101
        h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
102 103 104
        return;
    }

105
    h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
106

107
    x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1, idx );
Loren Merritt's avatar
Loren Merritt committed
108

109 110 111
    if( array_non_zero( dct4x4 ) )
    {
        h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
112
        h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
113

114 115 116 117 118
        /* output samples to fdec */
        h->dctf.add4x4_idct( p_dst, dct4x4 );
    }
    else
        memset( h->dct.luma4x4[idx], 0, sizeof(h->dct.luma4x4[idx]));
Laurent Aimar's avatar
Laurent Aimar committed
119 120
}

121
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
122
{
123 124 125 126
    int x = 8 * (idx&1);
    int y = 8 * (idx>>1);
    uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
Loren Merritt's avatar
Loren Merritt committed
127
    DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
128

129 130 131 132 133 134
    if( h->mb.b_lossless )
    {
        h->zigzagf.sub_8x8( h->dct.luma8x8[idx], p_src, p_dst );
        return;
    }

135
    h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
136

137
    x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
Loren Merritt's avatar
Loren Merritt committed
138

139
    h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
140
    h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
141
    h->dctf.add8x8_idct8( p_dst, dct8x8 );
142 143
}

144
static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
145 146 147 148
{
    uint8_t  *p_src = h->mb.pic.p_fenc[0];
    uint8_t  *p_dst = h->mb.pic.p_fdec[0];

149 150
    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
    DECLARE_ALIGNED_16( int16_t dct_dc4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
151 152 153

    int i;

Loren Merritt's avatar
Loren Merritt committed
154 155 156 157
    if( h->mb.b_lossless )
    {
        for( i = 0; i < 16; i++ )
        {
158 159
            int oe = block_idx_xy_fenc[i];
            int od = block_idx_xy_fdec[i];
160
            h->zigzagf.sub_4x4( h->dct.luma4x4[i], p_src+oe, p_dst+od );
161
            dct_dc4x4[0][block_idx_yx_1d[i]] = h->dct.luma4x4[i][0];
Fiona Glaser's avatar
Fiona Glaser committed
162
            h->dct.luma4x4[i][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
163
        }
164
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
Loren Merritt's avatar
Loren Merritt committed
165 166 167
        return;
    }

168
    h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
169 170 171
    for( i = 0; i < 16; i++ )
    {
        /* copy dc coeff */
172
        dct_dc4x4[0][block_idx_xy_1d[i]] = dct4x4[i][0][0];
173
        dct4x4[i][0][0] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
174 175

        /* quant/scan/dequant */
176
        x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1, i );
Loren Merritt's avatar
Loren Merritt committed
177

178
        h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
179
        h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
180 181
    }

182
    h->dctf.dct4x4dc( dct_dc4x4 );
183 184 185 186
    if( h->mb.b_trellis )
        x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1);
    else
        h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
187
    h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
188 189

    /* output samples to fdec */
190
    h->dctf.idct4x4dc( dct_dc4x4 );
Fiona Glaser's avatar
Fiona Glaser committed
191
    h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp );  /* XXX not inversed */
Laurent Aimar's avatar
Laurent Aimar committed
192 193 194 195 196

    /* calculate dct coeffs */
    for( i = 0; i < 16; i++ )
    {
        /* copy dc coeff */
197
        dct4x4[i][0][0] = dct_dc4x4[0][block_idx_xy_1d[i]];
Laurent Aimar's avatar
Laurent Aimar committed
198 199
    }
    /* put pixels to fdec */
200
    h->dctf.add16x16_idct( p_dst, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
201 202
}

203
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
204 205
{
    int i, ch;
206
    int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
Laurent Aimar's avatar
Laurent Aimar committed
207 208 209 210 211 212 213

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
        int i_decimate_score = 0;

Loren Merritt's avatar
Loren Merritt committed
214 215
        DECLARE_ALIGNED_16( int16_t dct2x2[2][2]  );
        DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
216

Loren Merritt's avatar
Loren Merritt committed
217 218 219 220
        if( h->mb.b_lossless )
        {
            for( i = 0; i < 4; i++ )
            {
221 222
                int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
                int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
223
                h->zigzagf.sub_4x4( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od );
Loren Merritt's avatar
Loren Merritt committed
224
                h->dct.chroma_dc[ch][i] = h->dct.luma4x4[16+i+ch*4][0];
Fiona Glaser's avatar
Fiona Glaser committed
225
                h->dct.luma4x4[16+i+ch*4][0] = 0;
Loren Merritt's avatar
Loren Merritt committed
226 227 228
            }
            continue;
        }
Loren Merritt's avatar
Loren Merritt committed
229

230
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Fiona Glaser's avatar
Fiona Glaser committed
231
        dct2x2dc( dct2x2, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
232 233 234
        /* calculate dct coeffs */
        for( i = 0; i < 4; i++ )
        {
235 236 237 238
            if( h->mb.b_trellis )
                x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 0 );
            else
                h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
239
            h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
Laurent Aimar's avatar
Laurent Aimar committed
240

241
            if( b_decimate )
242
                i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16+i+ch*4] );
Laurent Aimar's avatar
Laurent Aimar committed
243 244
        }

245 246 247 248
        if( h->mb.b_trellis )
            x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter );
        else
            h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
Laurent Aimar's avatar
Laurent Aimar committed
249

250
        if( b_decimate && i_decimate_score < 7 )
Laurent Aimar's avatar
Laurent Aimar committed
251 252
        {
            /* Near null chroma 8x8 block so make it null (bits saving) */
253
            memset( &h->dct.luma4x4[16+ch*4], 0, 4 * sizeof( *h->dct.luma4x4 ) );
254
            if( !array_non_zero( dct2x2 ) )
Fiona Glaser's avatar
Fiona Glaser committed
255 256
            {
                memset( h->dct.chroma_dc[ch], 0, sizeof( h->dct.chroma_dc[ch] ) );
257
                continue;
Fiona Glaser's avatar
Fiona Glaser committed
258
            }
259
            memset( dct4x4, 0, sizeof( dct4x4 ) );
Laurent Aimar's avatar
Laurent Aimar committed
260
        }
261 262 263
        else
        {
            for( i = 0; i < 4; i++ )
264
                h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
265
        }
Fiona Glaser's avatar
Fiona Glaser committed
266 267 268

        zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
        idct_dequant_2x2_dc( dct2x2, dct4x4, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
269
        h->dctf.add8x8_idct( p_dst, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
270
    }
271 272 273 274 275

    /* coded block pattern */
    h->mb.i_cbp_chroma = 0;
    for( i = 0; i < 8; i++ )
    {
276
        int nz = array_non_zero( h->dct.luma4x4[16+i] );
277 278 279 280 281
        h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
        h->mb.i_cbp_chroma |= nz;
    }
    if( h->mb.i_cbp_chroma )
        h->mb.i_cbp_chroma = 2;    /* dc+ac (we can't do only ac) */
282
    else if( array_non_zero( h->dct.chroma_dc ) )
283
        h->mb.i_cbp_chroma = 1;    /* dc only */
Laurent Aimar's avatar
Laurent Aimar committed
284 285
}

286 287 288 289
static void x264_macroblock_encode_skip( x264_t *h )
{
    h->mb.i_cbp_luma = 0x00;
    h->mb.i_cbp_chroma = 0x00;
290
    memset( h->mb.cache.non_zero_count, 0, X264_SCAN8_SIZE );
291 292 293 294
    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = 0;
}

Laurent Aimar's avatar
Laurent Aimar committed
295 296 297 298
/*****************************************************************************
 * x264_macroblock_encode_pskip:
 *  Encode an already marked skip block
 *****************************************************************************/
Loic Le Loarer's avatar
Loic Le Loarer committed
299
static void x264_macroblock_encode_pskip( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
300
{
301 302 303 304
    const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
                                h->mb.mv_min[0], h->mb.mv_max[0] );
    const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
                                h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
305

306
    /* don't do pskip motion compensation if it was already done in macroblock_analyse */
307
    if( !h->mb.b_skip_mc )
308 309 310 311
    {
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
                       mvx, mvy, 16, 16 );
Laurent Aimar's avatar
Laurent Aimar committed
312

313 314 315
        h->mc.mc_chroma( h->mb.pic.p_fdec[1],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
                         mvx, mvy, 8, 8 );
Laurent Aimar's avatar
Laurent Aimar committed
316

317 318 319 320
        h->mc.mc_chroma( h->mb.pic.p_fdec[2],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
                         mvx, mvy, 8, 8 );
    }
Laurent Aimar's avatar
Laurent Aimar committed
321

322
    x264_macroblock_encode_skip( h );
Laurent Aimar's avatar
Laurent Aimar committed
323 324
}

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
/*****************************************************************************
 * Intra prediction for predictive lossless mode.
 *****************************************************************************/

/* Note that these functions take a shortcut (mc.copy instead of actual pixel prediction) which assumes
 * that the edge pixels of the reconstructed frame are the same as that of the source frame.  This means
 * they will only work correctly if the neighboring blocks are losslessly coded.  In practice, this means
 * lossless mode cannot be mixed with lossy mode within a frame. */
/* This can be resolved by explicitly copying the edge pixels after doing the mc.copy, but this doesn't
 * need to be done unless we decide to allow mixing lossless and lossy compression. */

void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[1] << h->mb.b_interlaced;
    if( i_mode == I_PRED_CHROMA_V )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-stride, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-stride, stride, 8 );
    }
    else if( i_mode == I_PRED_CHROMA_H )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-1, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-1, stride, 8 );
    }
    else
    {
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
    }
}

void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;

    if( i_mode == I_PRED_4x4_V )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
    else if( i_mode == I_PRED_4x4_H )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-1, stride, 4 );
    else
        h->predict_4x4[i_mode]( p_dst );
}

void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;

    if( i_mode == I_PRED_8x8_V )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
    else if( i_mode == I_PRED_8x8_H )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-1, stride, 8 );
    else
        h->predict_8x8[i_mode]( p_dst, edge );
}

void x264_predict_lossless_16x16( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    if( i_mode == I_PRED_16x16_V )
        h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 );
    else if( i_mode == I_PRED_16x16_H )
        h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-1, stride, 16 );
    else
        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
}

Laurent Aimar's avatar
Laurent Aimar committed
393 394 395 396 397 398
/*****************************************************************************
 * x264_macroblock_encode:
 *****************************************************************************/
void x264_macroblock_encode( x264_t *h )
{
    int i_cbp_dc = 0;
399
    int i_qp = h->mb.i_qp;
400
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
401
    int b_force_no_skip = 0;
402 403
    int i,j,idx;
    uint8_t nnz8x8[4] = {1,1,1,1};
Laurent Aimar's avatar
Laurent Aimar committed
404

405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
    if( h->sh.b_mbaff
        && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
        && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
    {
        /* The first skip is predicted to be a frame mb pair.
         * We don't yet support the aff part of mbaff, so force it to non-skip
         * so that we can pick the aff flag. */
        b_force_no_skip = 1;
        if( IS_SKIP(h->mb.i_type) )
        {
            if( h->mb.i_type == P_SKIP )
                h->mb.i_type = P_L0;
            else if( h->mb.i_type == B_SKIP )
                h->mb.i_type = B_DIRECT;
        }
    }

Laurent Aimar's avatar
Laurent Aimar committed
422 423 424 425 426 427
    if( h->mb.i_type == P_SKIP )
    {
        /* A bit special */
        x264_macroblock_encode_pskip( h );
        return;
    }
428 429
    if( h->mb.i_type == B_SKIP )
    {
430
        /* don't do bskip motion compensation if it was already done in macroblock_analyse */
431
        if( !h->mb.b_skip_mc )
432
            x264_mb_mc( h );
433 434 435
        x264_macroblock_encode_skip( h );
        return;
    }
Laurent Aimar's avatar
Laurent Aimar committed
436 437 438 439

    if( h->mb.i_type == I_16x16 )
    {
        const int i_mode = h->mb.i_intra16x16_pred_mode;
440
        h->mb.b_transform_8x8 = 0;
441 442 443 444 445

        if( h->mb.b_lossless )
            x264_predict_lossless_16x16( h, i_mode );
        else
            h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
Laurent Aimar's avatar
Laurent Aimar committed
446 447

        /* encode the 16x16 macroblock */
448
        x264_mb_encode_i16x16( h, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
449
    }
450 451
    else if( h->mb.i_type == I_8x8 )
    {
Loren Merritt's avatar
Loren Merritt committed
452
        DECLARE_ALIGNED_16( uint8_t edge[33] );
453
        h->mb.b_transform_8x8 = 1;
454 455 456 457 458 459 460 461 462
        /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
                h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
        }
        for( i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
463
        {
464
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
465
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
466
            x264_predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
467 468 469 470 471 472

            if( h->mb.b_lossless )
                x264_predict_lossless_8x8( h, p_dst, i, i_mode, edge );
            else
                h->predict_8x8[i_mode]( p_dst, edge );

473
            x264_mb_encode_i8x8( h, i, i_qp );
474
        }
475 476
        for( i = 0; i < 4; i++ )
            nnz8x8[i] = array_non_zero( h->dct.luma8x8[i] );
477
    }
Laurent Aimar's avatar
Laurent Aimar committed
478 479
    else if( h->mb.i_type == I_4x4 )
    {
480
        h->mb.b_transform_8x8 = 0;
481 482 483 484 485 486
        /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
487
                h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
488 489
        }
        for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
Laurent Aimar's avatar
Laurent Aimar committed
490
        {
491
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i]];
Laurent Aimar's avatar
Laurent Aimar committed
492 493
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];

494 495
            if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                /* emulate missing topright samples */
496
                *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
497

498 499 500 501
            if( h->mb.b_lossless )
                x264_predict_lossless_4x4( h, p_dst, i, i_mode );
            else
                h->predict_4x4[i_mode]( p_dst );
502
            x264_mb_encode_i4x4( h, i, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
503 504 505 506
        }
    }
    else    /* Inter MB */
    {
507
        int i8x8, i4x4;
Laurent Aimar's avatar
Laurent Aimar committed
508 509
        int i_decimate_mb = 0;

510 511 512
        /* Don't repeat motion compensation if it was already done in non-RD transform analysis */
        if( !h->mb.b_skip_mc )
            x264_mb_mc( h );
Laurent Aimar's avatar
Laurent Aimar committed
513

Loren Merritt's avatar
Loren Merritt committed
514 515
        if( h->mb.b_lossless )
        {
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
            if( h->mb.b_transform_8x8 )
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                {
                    int x = 8*(i8x8&1);
                    int y = 8*(i8x8>>1);
                    h->zigzagf.sub_8x8( h->dct.luma8x8[i8x8],
                                        h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
                                        h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
                    nnz8x8[i8x8] = array_non_zero( h->dct.luma8x8[i8x8] );
                }
            else
                for( i4x4 = 0; i4x4 < 16; i4x4++ )
                {
                    h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
                                        h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4x4],
                                        h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4x4] );
                }
Loren Merritt's avatar
Loren Merritt committed
533 534
        }
        else if( h->mb.b_transform_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
535
        {
Loren Merritt's avatar
Loren Merritt committed
536
            DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
537
            b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
538
            h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
539
            h->nr_count[1] += h->mb.b_noise_reduction * 4;
Laurent Aimar's avatar
Laurent Aimar committed
540

541
            for( idx = 0; idx < 4; idx++ )
Laurent Aimar's avatar
Laurent Aimar committed
542
            {
543
                if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
544
                    h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
545
                x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
Laurent Aimar's avatar
Laurent Aimar committed
546

547
                h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
Laurent Aimar's avatar
Laurent Aimar committed
548

549
                if( b_decimate )
550
                {
551
                    int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[idx] );
Loren Merritt's avatar
Loren Merritt committed
552 553
                    i_decimate_mb += i_decimate_8x8;
                    if( i_decimate_8x8 < 4 )
554
                        nnz8x8[idx] = 0;
555
                }
556
                else
557
                    nnz8x8[idx] = array_non_zero( dct8x8[idx] );
Laurent Aimar's avatar
Laurent Aimar committed
558 559
            }

560
            if( i_decimate_mb < 6 && b_decimate )
561
                *(uint32_t*)nnz8x8 = 0;
562
            else
563 564 565 566 567
            {
                for( idx = 0; idx < 4; idx++ )
                    if( nnz8x8[idx] )
                    {
                        h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
568
                        h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][(idx&1)*8 + (idx>>1)*8*FDEC_STRIDE], dct8x8[idx] );
569 570
                    }
            }
571 572 573
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
574
            DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
575
            h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
576
            h->nr_count[0] += h->mb.b_noise_reduction * 16;
577 578

            for( i8x8 = 0; i8x8 < 4; i8x8++ )
Laurent Aimar's avatar
Laurent Aimar committed
579
            {
580 581 582 583
                int i_decimate_8x8;

                /* encode one 4x4 block */
                i_decimate_8x8 = 0;
Laurent Aimar's avatar
Laurent Aimar committed
584 585 586
                for( i4x4 = 0; i4x4 < 4; i4x4++ )
                {
                    idx = i8x8 * 4 + i4x4;
587

588
                    if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
589
                        h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
590
                    x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
Loren Merritt's avatar
Loren Merritt committed
591

592
                    h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
593 594

                    if( b_decimate && i_decimate_8x8 <= 6 )
595
                        i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[idx] );
Laurent Aimar's avatar
Laurent Aimar committed
596 597
                }

598 599
                /* decimate this 8x8 block */
                i_decimate_mb += i_decimate_8x8;
600
                if( i_decimate_8x8 < 4 && b_decimate )
601
                    nnz8x8[i8x8] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
602
            }
603

604
            if( i_decimate_mb < 6 && b_decimate )
605
                *(uint32_t*)nnz8x8 = 0;
606
            else
607 608 609 610 611 612
            {
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                    if( nnz8x8[i8x8] )
                    {
                        for( i = 0; i < 4; i++ )
                            h->quantf.dequant_4x4( dct4x4[i8x8*4+i], h->dequant4_mf[CQM_4PY], i_qp );
613
                        h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
614 615
                    }
            }
Laurent Aimar's avatar
Laurent Aimar committed
616 617 618 619 620 621 622
        }
    }

    /* encode chroma */
    if( IS_INTRA( h->mb.i_type ) )
    {
        const int i_mode = h->mb.i_chroma_pred_mode;
623 624 625 626 627 628 629
        if( h->mb.b_lossless )
            x264_predict_lossless_8x8_chroma( h, i_mode );
        else
        {
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
        }
Laurent Aimar's avatar
Laurent Aimar committed
630 631 632
    }

    /* encode the 8x8 blocks */
633
    x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
Laurent Aimar's avatar
Laurent Aimar committed
634

635
    /* coded block pattern and non_zero_count */
636
    h->mb.i_cbp_luma = 0x00;
Laurent Aimar's avatar
Laurent Aimar committed
637 638 639 640
    if( h->mb.i_type == I_16x16 )
    {
        for( i = 0; i < 16; i++ )
        {
641
            int nz = array_non_zero( h->dct.luma4x4[i] );
Laurent Aimar's avatar
Laurent Aimar committed
642
            h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
643
            h->mb.i_cbp_luma |= nz;
Laurent Aimar's avatar
Laurent Aimar committed
644
        }
645
        h->mb.i_cbp_luma *= 0xf;
Laurent Aimar's avatar
Laurent Aimar committed
646 647 648
    }
    else
    {
649
        for( i = 0; i < 4; i++)
Laurent Aimar's avatar
Laurent Aimar committed
650
        {
651
            if(!nnz8x8[i])
652 653 654 655
            {
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[0+i*4]] = 0;
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[2+i*4]] = 0;
            }
656 657
            else if( h->mb.b_transform_8x8 )
            {
658 659 660
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[0+4*i]] = nnz8x8[i] * 0x0101;
                *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[2+4*i]] = nnz8x8[i] * 0x0101;
                h->mb.i_cbp_luma |= nnz8x8[i] << i;
661 662 663
            }
            else
            {
664
                int nz, cbp = 0;
665 666
                for( j = 0; j < 4; j++ )
                {
667 668 669
                    nz = array_non_zero( h->dct.luma4x4[j+4*i] );
                    h->mb.cache.non_zero_count[x264_scan8[j+4*i]] = nz;
                    cbp |= nz;
670
                }
671
                h->mb.i_cbp_luma |= cbp << i;
672
            }
Laurent Aimar's avatar
Laurent Aimar committed
673 674 675 676 677
        }
    }

    if( h->param.b_cabac )
    {
678 679 680
        i_cbp_dc = ( h->mb.i_type == I_16x16 && array_non_zero( h->dct.luma16x16_dc ) )
                 | array_non_zero( h->dct.chroma_dc[0] ) << 1
                 | array_non_zero( h->dct.chroma_dc[1] ) << 2;
Laurent Aimar's avatar
Laurent Aimar committed
681 682 683 684 685 686 687 688
    }

    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;

    /* Check for P_SKIP
     * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
     *      (if multiple mv give same result)*/
689
    if( !b_force_no_skip )
Laurent Aimar's avatar
Laurent Aimar committed
690
    {
691
        if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
Loren Merritt's avatar
Loren Merritt committed
692
            !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
693 694
            *(uint32_t*)h->mb.cache.mv[0][x264_scan8[0]] == *(uint32_t*)h->mb.cache.pskip_mv
            && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
695
        {
696
            h->mb.i_type = P_SKIP;
Laurent Aimar's avatar
Laurent Aimar committed
697
        }
698

699
        /* Check for B_SKIP */
700
        if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
701 702 703
        {
            h->mb.i_type = B_SKIP;
        }
704
    }
Laurent Aimar's avatar
Laurent Aimar committed
705 706 707
}

/*****************************************************************************
708 709
 * x264_macroblock_probe_skip:
 *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
Laurent Aimar's avatar
Laurent Aimar committed
710 711
 *  the previous QP
 *****************************************************************************/
712
int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
Laurent Aimar's avatar
Laurent Aimar committed
713
{
Fiona Glaser's avatar
Fiona Glaser committed
714
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Loren Merritt's avatar
Loren Merritt committed
715 716
    DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
    DECLARE_ALIGNED_16( int16_t dctscan[16] );
Laurent Aimar's avatar
Laurent Aimar committed
717

718
    int i_qp = h->mb.i_qp;
Laurent Aimar's avatar
Laurent Aimar committed
719
    int mvp[2];
720
    int ch, thresh;
Laurent Aimar's avatar
Laurent Aimar committed
721 722 723 724

    int i8x8, i4x4;
    int i_decimate_mb;

725 726 727
    if( !b_bidir )
    {
        /* Get the MV */
728 729
        mvp[0] = x264_clip3( h->mb.cache.pskip_mv[0], h->mb.mv_min[0], h->mb.mv_max[0] );
        mvp[1] = x264_clip3( h->mb.cache.pskip_mv[1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
730

731
        /* Motion compensation */
732 733
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
734
                       mvp[0], mvp[1], 16, 16 );
735
    }
Laurent Aimar's avatar
Laurent Aimar committed
736 737 738

    for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
    {
Fiona Glaser's avatar
Fiona Glaser committed
739 740 741 742 743
        int fenc_offset = (i8x8&1) * 8 + (i8x8>>1) * FENC_STRIDE * 8;
        int fdec_offset = (i8x8&1) * 8 + (i8x8>>1) * FDEC_STRIDE * 8;
        /* get luma diff */
        h->dctf.sub8x8_dct( dct4x4, h->mb.pic.p_fenc[0] + fenc_offset,
                                    h->mb.pic.p_fdec[0] + fdec_offset );
Laurent Aimar's avatar
Laurent Aimar committed
744 745 746
        /* encode one 4x4 block */
        for( i4x4 = 0; i4x4 < 4; i4x4++ )
        {
Fiona Glaser's avatar
Fiona Glaser committed
747 748 749 750
            h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
            if( !array_non_zero(dct4x4[i4x4]) )
                continue;
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
751
            i_decimate_mb += h->quantf.decimate_score16( dctscan );
Laurent Aimar's avatar
Laurent Aimar committed
752 753 754 755 756 757
            if( i_decimate_mb >= 6 )
                return 0;
        }
    }

    /* encode chroma */
758
    i_qp = h->mb.i_chroma_qp;
759
    thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
Laurent Aimar's avatar
Laurent Aimar committed
760 761 762 763 764 765

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];

766 767
        if( !b_bidir )
        {
768 769
            h->mc.mc_chroma( h->mb.pic.p_fdec[1+ch],       FDEC_STRIDE,
                             h->mb.pic.p_fref[0][0][4+ch], h->mb.pic.i_stride[1+ch],
770
                             mvp[0], mvp[1], 8, 8 );
771
        }
Laurent Aimar's avatar
Laurent Aimar committed
772

773 774 775 776 777
        /* there is almost never a termination during chroma, but we can't avoid the check entirely */
        /* so instead we check SSD and skip the actual check if the score is low enough. */
        if( h->pixf.ssd[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) < thresh )
            continue;

778
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
779 780

        /* calculate dct DC */
Fiona Glaser's avatar
Fiona Glaser committed
781
        dct2x2dc( dct2x2, dct4x4 );
Loren Merritt's avatar
Loren Merritt committed
782
        h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 );
Fiona Glaser's avatar
Fiona Glaser committed
783
        if( array_non_zero(dct2x2) )
Laurent Aimar's avatar
Laurent Aimar committed
784 785 786 787 788
            return 0;

        /* calculate dct coeffs */
        for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
        {
Loren Merritt's avatar
Loren Merritt committed
789
            h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] );
Fiona Glaser's avatar
Fiona Glaser committed
790 791
            if( !array_non_zero(dct4x4[i4x4]) )
                continue;
792
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
793
            i_decimate_mb += h->quantf.decimate_score15( dctscan );
Laurent Aimar's avatar
Laurent Aimar committed
794 795 796 797 798
            if( i_decimate_mb >= 7 )
                return 0;
        }
    }

799
    h->mb.b_skip_mc = 1;
Laurent Aimar's avatar
Laurent Aimar committed
800 801
    return 1;
}
802 803 804 805 806 807 808 809 810

/****************************************************************************
 * DCT-domain noise reduction / adaptive deadzone
 * from libavcodec
 ****************************************************************************/

void x264_noise_reduction_update( x264_t *h )
{
    int cat, i;
811
    for( cat = 0; cat < 2; cat++ )
812
    {
813
        int size = cat ? 64 : 16;
814
        const uint16_t *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
815

816
        if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
817 818 819 820 821 822 823 824 825 826 827 828 829 830
        {
            for( i = 0; i < size; i++ )
                h->nr_residual_sum[cat][i] >>= 1;
            h->nr_count[cat] >>= 1;
        }

        for( i = 0; i < size; i++ )
            h->nr_offset[cat][i] =
                ((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
                 + h->nr_residual_sum[cat][i]/2)
              / ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
    }
}

831 832 833 834 835 836 837 838 839
/*****************************************************************************
 * RD only; 4 calls to this do not make up for one macroblock_encode.
 * doesn't transform chroma dc.
 *****************************************************************************/
void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
{
    int i_qp = h->mb.i_qp;
    uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
    uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
840
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
841
    int nnz8x8 = 0;
842 843 844 845
    int ch;

    x264_mb_mc_8x8( h, i8 );

846
    if( h->mb.b_lossless )
847
    {
848
        int i4;
849 850
        if( h->mb.b_transform_8x8 )
        {
851
            h->zigzagf.sub_8x8( h->dct.luma8x8[i8], p_fenc, p_fdec );
852 853 854
            nnz8x8 = array_non_zero( h->dct.luma8x8[i8] );
        }
        else
855
        {
856 857 858 859 860 861 862
            for( i4 = i8*4; i4 < i8*4+4; i4++ )
            {</