macroblock.c 41 KB
Newer Older
Laurent Aimar's avatar
Laurent Aimar committed
1 2 3
/*****************************************************************************
 * macroblock.c: h264 encoder library
 *****************************************************************************
4
 * Copyright (C) 2003-2008 x264 project
Laurent Aimar's avatar
Laurent Aimar committed
5 6
 *
 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
7 8
 *          Loren Merritt <lorenm@u.washington.edu>
 *          Fiona Glaser <fiona@x264.com>
Laurent Aimar's avatar
Laurent Aimar committed
9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
Laurent Aimar's avatar
Laurent Aimar committed
23 24
 *****************************************************************************/

25
#include "common/common.h"
Laurent Aimar's avatar
Laurent Aimar committed
26 27
#include "macroblock.h"

Fiona Glaser's avatar
Fiona Glaser committed
28 29
/* These chroma DC functions don't have assembly versions and are only used here. */

30
#define ZIG(i,y,x) level[i] = dct[x][y];
31
static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[2][2] )
Laurent Aimar's avatar
Laurent Aimar committed
32
{
33 34 35 36
    ZIG(0,0,0)
    ZIG(1,0,1)
    ZIG(2,1,0)
    ZIG(3,1,1)
Laurent Aimar's avatar
Laurent Aimar committed
37
}
38
#undef ZIG
Laurent Aimar's avatar
Laurent Aimar committed
39

40 41 42 43 44 45 46 47 48 49 50 51 52
#define IDCT_DEQUANT_START \
    int d0 = dct[0][0] + dct[0][1]; \
    int d1 = dct[1][0] + dct[1][1]; \
    int d2 = dct[0][0] - dct[0][1]; \
    int d3 = dct[1][0] - dct[1][1]; \
    int dmf = dequant_mf[i_qp%6][0][0]; \
    int qbits = i_qp/6 - 5; \
    if( qbits > 0 ) \
    { \
        dmf <<= qbits; \
        qbits = 0; \
    }

Fiona Glaser's avatar
Fiona Glaser committed
53 54
static inline void idct_dequant_2x2_dc( int16_t dct[2][2], int16_t dct4x4[4][4][4], int dequant_mf[6][4][4], int i_qp )
{
55
    IDCT_DEQUANT_START
Fiona Glaser's avatar
Fiona Glaser committed
56 57 58 59 60 61
    dct4x4[0][0][0] = (d0 + d1) * dmf >> -qbits;
    dct4x4[1][0][0] = (d0 - d1) * dmf >> -qbits;
    dct4x4[2][0][0] = (d2 + d3) * dmf >> -qbits;
    dct4x4[3][0][0] = (d2 - d3) * dmf >> -qbits;
}

62 63 64 65 66 67 68 69 70
static inline void idct_dequant_2x2_dconly( int16_t dct[2][2], int dequant_mf[6][4][4], int i_qp )
{
    IDCT_DEQUANT_START
    dct[0][0] = (d0 + d1) * dmf >> -qbits;
    dct[0][1] = (d0 - d1) * dmf >> -qbits;
    dct[1][0] = (d2 + d3) * dmf >> -qbits;
    dct[1][1] = (d2 - d3) * dmf >> -qbits;
}

Fiona Glaser's avatar
Fiona Glaser committed
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
static inline void dct2x2dc( int16_t d[2][2], int16_t dct4x4[4][4][4] )
{
    int d0 = dct4x4[0][0][0] + dct4x4[1][0][0];
    int d1 = dct4x4[2][0][0] + dct4x4[3][0][0];
    int d2 = dct4x4[0][0][0] - dct4x4[1][0][0];
    int d3 = dct4x4[2][0][0] - dct4x4[3][0][0];
    d[0][0] = d0 + d1;
    d[1][0] = d2 + d3;
    d[0][1] = d0 - d1;
    d[1][1] = d2 - d3;
    dct4x4[0][0][0] = 0;
    dct4x4[1][0][0] = 0;
    dct4x4[2][0][0] = 0;
    dct4x4[3][0][0] = 0;
}

87
static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra, int idx )
88 89 90
{
    int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
    if( h->mb.b_trellis )
91
        return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra, idx );
92
    else
93
        return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
94 95
}

96
static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra, int idx )
97 98 99
{
    int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
    if( h->mb.b_trellis )
100
        return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
101
    else
102
        return h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
103 104
}

105 106 107 108 109 110 111 112 113 114 115
/* All encoding functions must output the correct CBP and NNZ values.
 * The entropy coding functions will check CBP first, then NNZ, before
 * actually reading the DCT coefficients.  NNZ still must be correct even
 * if CBP is zero because of the use of NNZ values for context selection.
 * "NNZ" need only be 0 or 1 rather than the exact coefficient count because
 * that is only needed in CAVLC, and will be calculated by CAVLC's residual
 * coding and stored as necessary. */

/* This means that decimation can be done merely by adjusting the CBP and NNZ
 * rather than memsetting the coefficients. */

116
void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
117
{
118
    int nz;
119 120
    uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
Loren Merritt's avatar
Loren Merritt committed
121
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
122

Loren Merritt's avatar
Loren Merritt committed
123 124
    if( h->mb.b_lossless )
    {
125
        h->zigzagf.sub_4x4( h->dct.luma4x4[idx], p_src, p_dst );
126 127 128
        nz = array_non_zero( h->dct.luma4x4[idx] );
        h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
        h->mb.i_cbp_luma |= nz<<(idx>>2);
Loren Merritt's avatar
Loren Merritt committed
129 130 131
        return;
    }

132
    h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
133

134 135 136
    nz = x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1, idx );
    h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
    if( nz )
137
    {
138
        h->mb.i_cbp_luma |= 1<<(idx>>2);
139
        h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
140
        h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
141 142
        h->dctf.add4x4_idct( p_dst, dct4x4 );
    }
143 144 145 146 147 148
}

#define STORE_8x8_NNZ(idx,nz)\
{\
    *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[idx*4+0]] = nz * 0x0101;\
    *(uint16_t*)&h->mb.cache.non_zero_count[x264_scan8[idx*4+2]] = nz * 0x0101;\
Laurent Aimar's avatar
Laurent Aimar committed
149 150
}

151
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
152
{
153 154
    int x = 8 * (idx&1);
    int y = 8 * (idx>>1);
155
    int nz;
156 157
    uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
    uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
Loren Merritt's avatar
Loren Merritt committed
158
    DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
159

160 161 162
    if( h->mb.b_lossless )
    {
        h->zigzagf.sub_8x8( h->dct.luma8x8[idx], p_src, p_dst );
163 164 165
        nz = array_non_zero( h->dct.luma8x8[idx] );
        STORE_8x8_NNZ(idx,nz);
        h->mb.i_cbp_luma |= nz<<idx;
166 167 168
        return;
    }

169
    h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
Loren Merritt's avatar
Loren Merritt committed
170

171 172 173 174
    nz = x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
    if( nz )
    {
        h->mb.i_cbp_luma |= 1<<idx;
175
        h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
176 177 178 179 180 181
        h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
        h->dctf.add8x8_idct8( p_dst, dct8x8 );
        STORE_8x8_NNZ(idx,1);
    }
    else
        STORE_8x8_NNZ(idx,0);
182 183
}

184
static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
185 186 187 188
{
    uint8_t  *p_src = h->mb.pic.p_fenc[0];
    uint8_t  *p_dst = h->mb.pic.p_fdec[0];

189 190
    DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
    DECLARE_ALIGNED_16( int16_t dct_dc4x4[4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
191

192
    int i, nz;
Fiona Glaser's avatar
Fiona Glaser committed
193 194
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || (h->param.analyse.b_dct_decimate && h->sh.i_type == SLICE_TYPE_P);
    int decimate_score = b_decimate ? 0 : 9;
Laurent Aimar's avatar
Laurent Aimar committed
195

Loren Merritt's avatar
Loren Merritt committed
196 197 198 199
    if( h->mb.b_lossless )
    {
        for( i = 0; i < 16; i++ )
        {
200 201
            int oe = block_idx_xy_fenc[i];
            int od = block_idx_xy_fdec[i];
202
            h->zigzagf.sub_4x4( h->dct.luma4x4[i], p_src+oe, p_dst+od );
203
            dct_dc4x4[0][block_idx_yx_1d[i]] = h->dct.luma4x4[i][0];
Fiona Glaser's avatar
Fiona Glaser committed
204
            h->dct.luma4x4[i][0] = 0;
205 206 207
            nz = array_non_zero( h->dct.luma4x4[i] );
            h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
            h->mb.i_cbp_luma |= nz;
Loren Merritt's avatar
Loren Merritt committed
208
        }
209 210
        h->mb.i_cbp_luma *= 0xf;
        h->mb.cache.non_zero_count[x264_scan8[24]] = array_non_zero( dct_dc4x4 );
211
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
Loren Merritt's avatar
Loren Merritt committed
212 213 214
        return;
    }

215
    h->dctf.sub16x16_dct( dct4x4, p_src, p_dst );
216

Laurent Aimar's avatar
Laurent Aimar committed
217 218 219
    for( i = 0; i < 16; i++ )
    {
        /* copy dc coeff */
220
        dct_dc4x4[0][block_idx_xy_1d[i]] = dct4x4[i][0][0];
221
        dct4x4[i][0][0] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
222 223

        /* quant/scan/dequant */
224 225 226 227 228 229
        nz = x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1, i );
        h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
        if( nz )
        {
            h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
            h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
Fiona Glaser's avatar
Fiona Glaser committed
230
            if( decimate_score < 6 ) decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[i] );
231 232
            h->mb.i_cbp_luma = 0xf;
        }
Laurent Aimar's avatar
Laurent Aimar committed
233 234
    }

Fiona Glaser's avatar
Fiona Glaser committed
235 236 237 238 239 240 241 242 243 244 245
    /* Writing the 16 CBFs in an i16x16 block is quite costly, so decimation can save many bits. */
    /* More useful with CAVLC, but still useful with CABAC. */
    if( decimate_score < 6 )
    {
        h->mb.i_cbp_luma = 0;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
        *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
    }

246
    h->dctf.dct4x4dc( dct_dc4x4 );
247
    if( h->mb.b_trellis )
248
        nz = x264_quant_dc_trellis( h, (int16_t*)dct_dc4x4, CQM_4IY, i_qp, DCT_LUMA_DC, 1);
249
    else
250
        nz = h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
Laurent Aimar's avatar
Laurent Aimar committed
251

252 253
    h->mb.cache.non_zero_count[x264_scan8[24]] = nz;
    if( nz )
Laurent Aimar's avatar
Laurent Aimar committed
254
    {
255 256 257 258 259 260 261 262
        h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );

        /* output samples to fdec */
        h->dctf.idct4x4dc( dct_dc4x4 );
        h->quantf.dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp );  /* XXX not inversed */
        if( h->mb.i_cbp_luma )
            for( i = 0; i < 16; i++ )
                dct4x4[i][0][0] = dct_dc4x4[0][block_idx_xy_1d[i]];
Laurent Aimar's avatar
Laurent Aimar committed
263
    }
264

Laurent Aimar's avatar
Laurent Aimar committed
265
    /* put pixels to fdec */
266 267 268 269
    if( h->mb.i_cbp_luma )
        h->dctf.add16x16_idct( p_dst, dct4x4 );
    else if( nz )
        h->dctf.add16x16_idct_dc( p_dst, dct_dc4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
270 271
}

272
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
Laurent Aimar's avatar
Laurent Aimar committed
273
{
274
    int i, ch, nz, nz_dc;
275
    int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
276
    h->mb.i_cbp_chroma = 0;
Laurent Aimar's avatar
Laurent Aimar committed
277 278 279 280 281 282

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];
        int i_decimate_score = 0;
283
        int nz_ac = 0;
Laurent Aimar's avatar
Laurent Aimar committed
284

Loren Merritt's avatar
Loren Merritt committed
285 286
        DECLARE_ALIGNED_16( int16_t dct2x2[2][2]  );
        DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Laurent Aimar's avatar
Laurent Aimar committed
287

Loren Merritt's avatar
Loren Merritt committed
288 289 290 291
        if( h->mb.b_lossless )
        {
            for( i = 0; i < 4; i++ )
            {
292 293
                int oe = block_idx_x[i]*4 + block_idx_y[i]*4*FENC_STRIDE;
                int od = block_idx_x[i]*4 + block_idx_y[i]*4*FDEC_STRIDE;
294
                h->zigzagf.sub_4x4( h->dct.luma4x4[16+i+ch*4], p_src+oe, p_dst+od );
Loren Merritt's avatar
Loren Merritt committed
295
                h->dct.chroma_dc[ch][i] = h->dct.luma4x4[16+i+ch*4][0];
Fiona Glaser's avatar
Fiona Glaser committed
296
                h->dct.luma4x4[16+i+ch*4][0] = 0;
297 298 299
                nz = array_non_zero( h->dct.luma4x4[16+i+ch*4] );
                h->mb.cache.non_zero_count[x264_scan8[16+i+ch*4]] = nz;
                h->mb.i_cbp_chroma |= nz;
Loren Merritt's avatar
Loren Merritt committed
300
            }
301
            h->mb.cache.non_zero_count[x264_scan8[25]+ch] = array_non_zero( h->dct.chroma_dc[ch] );
Loren Merritt's avatar
Loren Merritt committed
302 303
            continue;
        }
Loren Merritt's avatar
Loren Merritt committed
304

305
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Fiona Glaser's avatar
Fiona Glaser committed
306
        dct2x2dc( dct2x2, dct4x4 );
Laurent Aimar's avatar
Laurent Aimar committed
307 308 309
        /* calculate dct coeffs */
        for( i = 0; i < 4; i++ )
        {
310
            if( h->mb.b_trellis )
311
                nz = x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IC+b_inter, i_qp, DCT_CHROMA_AC, !b_inter, 0 );
312
            else
313 314 315 316 317 318 319 320 321 322
                nz = h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
            h->mb.cache.non_zero_count[x264_scan8[16+i+ch*4]] = nz;
            if( nz )
            {
                nz_ac = 1;
                h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
                h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
                if( b_decimate )
                    i_decimate_score += h->quantf.decimate_score15( h->dct.luma4x4[16+i+ch*4] );
            }
Laurent Aimar's avatar
Laurent Aimar committed
323 324
        }

325
        if( h->mb.b_trellis )
326
            nz_dc = x264_quant_dc_trellis( h, (int16_t*)dct2x2, CQM_4IC+b_inter, i_qp, DCT_CHROMA_DC, !b_inter );
327
        else
328
            nz_dc = h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
Laurent Aimar's avatar
Laurent Aimar committed
329

330 331 332
        h->mb.cache.non_zero_count[x264_scan8[25]+ch] = nz_dc;

        if( (b_decimate && i_decimate_score < 7) || !nz_ac )
Laurent Aimar's avatar
Laurent Aimar committed
333
        {
334 335 336 337 338
            /* Decimate the block */
            h->mb.cache.non_zero_count[x264_scan8[16+0]+24*ch] = 0;
            h->mb.cache.non_zero_count[x264_scan8[16+1]+24*ch] = 0;
            h->mb.cache.non_zero_count[x264_scan8[16+2]+24*ch] = 0;
            h->mb.cache.non_zero_count[x264_scan8[16+3]+24*ch] = 0;
339
            if( !nz_dc ) /* Whole block is empty */
340
                continue;
341 342 343 344
            /* DC-only */
            zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
            idct_dequant_2x2_dconly( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
            h->dctf.add8x8_idct_dc( p_dst, dct2x2 );
Laurent Aimar's avatar
Laurent Aimar committed
345
        }
346 347
        else
        {
348 349
            h->mb.i_cbp_chroma = 1;
            if( nz_dc )
350
            {
351 352
                zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
                idct_dequant_2x2_dc( dct2x2, dct4x4, h->dequant4_mf[CQM_4IC + b_inter], i_qp );
353 354
            }
            h->dctf.add8x8_idct( p_dst, dct4x4 );
355
        }
Laurent Aimar's avatar
Laurent Aimar committed
356
    }
357 358 359

    if( h->mb.i_cbp_chroma )
        h->mb.i_cbp_chroma = 2;    /* dc+ac (we can't do only ac) */
360 361
    else if( h->mb.cache.non_zero_count[x264_scan8[25]] |
             h->mb.cache.non_zero_count[x264_scan8[26]] )
362
        h->mb.i_cbp_chroma = 1;    /* dc only */
Laurent Aimar's avatar
Laurent Aimar committed
363 364
}

365 366 367 368
static void x264_macroblock_encode_skip( x264_t *h )
{
    h->mb.i_cbp_luma = 0x00;
    h->mb.i_cbp_chroma = 0x00;
369
    memset( h->mb.cache.non_zero_count, 0, X264_SCAN8_SIZE );
370 371 372 373
    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = 0;
}

Laurent Aimar's avatar
Laurent Aimar committed
374 375 376 377
/*****************************************************************************
 * x264_macroblock_encode_pskip:
 *  Encode an already marked skip block
 *****************************************************************************/
Loic Le Loarer's avatar
Loic Le Loarer committed
378
static void x264_macroblock_encode_pskip( x264_t *h )
Laurent Aimar's avatar
Laurent Aimar committed
379
{
380 381 382 383
    const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
                                h->mb.mv_min[0], h->mb.mv_max[0] );
    const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
                                h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
384

385
    /* don't do pskip motion compensation if it was already done in macroblock_analyse */
386
    if( !h->mb.b_skip_mc )
387 388 389 390
    {
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
                       mvx, mvy, 16, 16 );
Laurent Aimar's avatar
Laurent Aimar committed
391

392 393 394
        h->mc.mc_chroma( h->mb.pic.p_fdec[1],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
                         mvx, mvy, 8, 8 );
Laurent Aimar's avatar
Laurent Aimar committed
395

396 397 398 399
        h->mc.mc_chroma( h->mb.pic.p_fdec[2],       FDEC_STRIDE,
                         h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
                         mvx, mvy, 8, 8 );
    }
Laurent Aimar's avatar
Laurent Aimar committed
400

401
    x264_macroblock_encode_skip( h );
Laurent Aimar's avatar
Laurent Aimar committed
402 403
}

404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
/*****************************************************************************
 * Intra prediction for predictive lossless mode.
 *****************************************************************************/

/* Note that these functions take a shortcut (mc.copy instead of actual pixel prediction) which assumes
 * that the edge pixels of the reconstructed frame are the same as that of the source frame.  This means
 * they will only work correctly if the neighboring blocks are losslessly coded.  In practice, this means
 * lossless mode cannot be mixed with lossy mode within a frame. */
/* This can be resolved by explicitly copying the edge pixels after doing the mc.copy, but this doesn't
 * need to be done unless we decide to allow mixing lossless and lossy compression. */

void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[1] << h->mb.b_interlaced;
    if( i_mode == I_PRED_CHROMA_V )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-stride, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-stride, stride, 8 );
    }
    else if( i_mode == I_PRED_CHROMA_H )
    {
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-1, stride, 8 );
        h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-1, stride, 8 );
    }
    else
    {
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
        h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
    }
}

void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;

    if( i_mode == I_PRED_4x4_V )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
    else if( i_mode == I_PRED_4x4_H )
        h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-1, stride, 4 );
    else
        h->predict_4x4[i_mode]( p_dst );
}

void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;

    if( i_mode == I_PRED_8x8_V )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
    else if( i_mode == I_PRED_8x8_H )
        h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-1, stride, 8 );
    else
        h->predict_8x8[i_mode]( p_dst, edge );
}

void x264_predict_lossless_16x16( x264_t *h, int i_mode )
{
    int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
    if( i_mode == I_PRED_16x16_V )
        h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 );
    else if( i_mode == I_PRED_16x16_H )
        h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-1, stride, 16 );
    else
        h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
}

Laurent Aimar's avatar
Laurent Aimar committed
472 473 474 475 476 477
/*****************************************************************************
 * x264_macroblock_encode:
 *****************************************************************************/
void x264_macroblock_encode( x264_t *h )
{
    int i_cbp_dc = 0;
478
    int i_qp = h->mb.i_qp;
479
    int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
480
    int b_force_no_skip = 0;
481 482 483
    int i,idx,nz;
    h->mb.i_cbp_luma = 0;
    h->mb.cache.non_zero_count[x264_scan8[24]] = 0;
Laurent Aimar's avatar
Laurent Aimar committed
484

485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
    if( h->sh.b_mbaff
        && h->mb.i_mb_xy == h->sh.i_first_mb + h->mb.i_mb_stride
        && IS_SKIP(h->mb.type[h->sh.i_first_mb]) )
    {
        /* The first skip is predicted to be a frame mb pair.
         * We don't yet support the aff part of mbaff, so force it to non-skip
         * so that we can pick the aff flag. */
        b_force_no_skip = 1;
        if( IS_SKIP(h->mb.i_type) )
        {
            if( h->mb.i_type == P_SKIP )
                h->mb.i_type = P_L0;
            else if( h->mb.i_type == B_SKIP )
                h->mb.i_type = B_DIRECT;
        }
    }

Laurent Aimar's avatar
Laurent Aimar committed
502 503 504 505 506 507
    if( h->mb.i_type == P_SKIP )
    {
        /* A bit special */
        x264_macroblock_encode_pskip( h );
        return;
    }
508 509
    if( h->mb.i_type == B_SKIP )
    {
510
        /* don't do bskip motion compensation if it was already done in macroblock_analyse */
511
        if( !h->mb.b_skip_mc )
512
            x264_mb_mc( h );
513 514 515
        x264_macroblock_encode_skip( h );
        return;
    }
Laurent Aimar's avatar
Laurent Aimar committed
516 517 518 519

    if( h->mb.i_type == I_16x16 )
    {
        const int i_mode = h->mb.i_intra16x16_pred_mode;
520
        h->mb.b_transform_8x8 = 0;
521 522 523 524 525

        if( h->mb.b_lossless )
            x264_predict_lossless_16x16( h, i_mode );
        else
            h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
Laurent Aimar's avatar
Laurent Aimar committed
526 527

        /* encode the 16x16 macroblock */
528
        x264_mb_encode_i16x16( h, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
529
    }
530 531
    else if( h->mb.i_type == I_8x8 )
    {
Loren Merritt's avatar
Loren Merritt committed
532
        DECLARE_ALIGNED_16( uint8_t edge[33] );
533
        h->mb.b_transform_8x8 = 1;
534 535 536 537
        /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
538 539 540 541 542
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = h->mb.pic.i8x8_nnz_buf[0];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = h->mb.pic.i8x8_nnz_buf[1];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = h->mb.pic.i8x8_nnz_buf[2];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = h->mb.pic.i8x8_nnz_buf[3];
            h->mb.i_cbp_luma = h->mb.pic.i8x8_cbp;
543 544 545 546 547
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
                h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
        }
        for( i = h->mb.i_skip_intra ? 3 : 0 ; i < 4; i++ )
548
        {
549
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
550
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
551
            x264_predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
552 553 554 555 556 557

            if( h->mb.b_lossless )
                x264_predict_lossless_8x8( h, p_dst, i, i_mode, edge );
            else
                h->predict_8x8[i_mode]( p_dst, edge );

558
            x264_mb_encode_i8x8( h, i, i_qp );
559 560
        }
    }
Laurent Aimar's avatar
Laurent Aimar committed
561 562
    else if( h->mb.i_type == I_4x4 )
    {
563
        h->mb.b_transform_8x8 = 0;
564 565 566 567
        /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
        if( h->mb.i_skip_intra )
        {
            h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
568 569 570 571 572
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = h->mb.pic.i4x4_nnz_buf[0];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = h->mb.pic.i4x4_nnz_buf[1];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = h->mb.pic.i4x4_nnz_buf[2];
            *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = h->mb.pic.i4x4_nnz_buf[3];
            h->mb.i_cbp_luma = h->mb.pic.i4x4_cbp;
573 574
            /* In RD mode, restore the now-overwritten DCT data. */
            if( h->mb.i_skip_intra == 2 )
575
                h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
576 577
        }
        for( i = h->mb.i_skip_intra ? 15 : 0 ; i < 16; i++ )
Laurent Aimar's avatar
Laurent Aimar committed
578
        {
579
            uint8_t  *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[i]];
Laurent Aimar's avatar
Laurent Aimar committed
580 581
            int      i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];

582 583
            if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
                /* emulate missing topright samples */
584
                *(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
585

586 587 588 589
            if( h->mb.b_lossless )
                x264_predict_lossless_4x4( h, p_dst, i, i_mode );
            else
                h->predict_4x4[i_mode]( p_dst );
590
            x264_mb_encode_i4x4( h, i, i_qp );
Laurent Aimar's avatar
Laurent Aimar committed
591 592 593 594
        }
    }
    else    /* Inter MB */
    {
595
        int i8x8, i4x4;
Laurent Aimar's avatar
Laurent Aimar committed
596 597
        int i_decimate_mb = 0;

598 599 600
        /* Don't repeat motion compensation if it was already done in non-RD transform analysis */
        if( !h->mb.b_skip_mc )
            x264_mb_mc( h );
Laurent Aimar's avatar
Laurent Aimar committed
601

Loren Merritt's avatar
Loren Merritt committed
602 603
        if( h->mb.b_lossless )
        {
604 605 606 607 608 609 610 611
            if( h->mb.b_transform_8x8 )
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                {
                    int x = 8*(i8x8&1);
                    int y = 8*(i8x8>>1);
                    h->zigzagf.sub_8x8( h->dct.luma8x8[i8x8],
                                        h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
                                        h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
612 613 614
                    nz = array_non_zero( h->dct.luma8x8[i8x8] );
                    STORE_8x8_NNZ(i8x8,nz);
                    h->mb.i_cbp_luma |= nz << i8x8;
615 616 617 618 619 620 621
                }
            else
                for( i4x4 = 0; i4x4 < 16; i4x4++ )
                {
                    h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
                                        h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4x4],
                                        h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4x4] );
622 623 624
                    nz = array_non_zero( h->dct.luma4x4[i4x4] );
                    h->mb.cache.non_zero_count[x264_scan8[i4x4]] = nz;
                    h->mb.i_cbp_luma |= nz << (i4x4>>2);
625
                }
Loren Merritt's avatar
Loren Merritt committed
626 627
        }
        else if( h->mb.b_transform_8x8 )
Laurent Aimar's avatar
Laurent Aimar committed
628
        {
Loren Merritt's avatar
Loren Merritt committed
629
            DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
630
            b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
631
            h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
632
            h->nr_count[1] += h->mb.b_noise_reduction * 4;
Laurent Aimar's avatar
Laurent Aimar committed
633

634
            for( idx = 0; idx < 4; idx++ )
Laurent Aimar's avatar
Laurent Aimar committed
635
            {
636
                if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
637
                    h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
638
                nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
Laurent Aimar's avatar
Laurent Aimar committed
639

640
                if( nz )
641
                {
642 643 644 645 646 647 648 649 650 651
                    h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
                    if( b_decimate )
                    {
                        int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[idx] );
                        i_decimate_mb += i_decimate_8x8;
                        if( i_decimate_8x8 >= 4 )
                            h->mb.i_cbp_luma |= 1<<idx;
                    }
                    else
                        h->mb.i_cbp_luma |= 1<<idx;
652
                }
Laurent Aimar's avatar
Laurent Aimar committed
653 654
            }

655
            if( i_decimate_mb < 6 && b_decimate )
656 657 658 659 660 661 662
            {
                h->mb.i_cbp_luma = 0;
                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
                *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
            }
663
            else
664 665
            {
                for( idx = 0; idx < 4; idx++ )
666 667
                {
                    if( h->mb.i_cbp_luma&(1<<idx) )
668 669
                    {
                        h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
670
                        h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[0][(idx&1)*8 + (idx>>1)*8*FDEC_STRIDE], dct8x8[idx] );
671
                        STORE_8x8_NNZ(idx,1);
672
                    }
673 674 675
                    else
                        STORE_8x8_NNZ(idx,0);
                }
676
            }
677 678 679
        }
        else
        {
Loren Merritt's avatar
Loren Merritt committed
680
            DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
681
            h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
Fiona Glaser's avatar
Fiona Glaser committed
682
            h->nr_count[0] += h->mb.b_noise_reduction * 16;
683 684

            for( i8x8 = 0; i8x8 < 4; i8x8++ )
Laurent Aimar's avatar
Laurent Aimar committed
685
            {
686 687
                int i_decimate_8x8 = 0;
                int cbp = 0;
688 689

                /* encode one 4x4 block */
Laurent Aimar's avatar
Laurent Aimar committed
690 691 692
                for( i4x4 = 0; i4x4 < 4; i4x4++ )
                {
                    idx = i8x8 * 4 + i4x4;
693

694
                    if( h->mb.b_noise_reduction )
Loren Merritt's avatar
Loren Merritt committed
695
                        h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
696 697
                    nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
                    h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
Loren Merritt's avatar
Loren Merritt committed
698

699 700 701 702 703 704 705 706
                    if( nz )
                    {
                        h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
                        h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
                        if( b_decimate && i_decimate_8x8 < 6 )
                            i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[idx] );
                        cbp = 1;
                    }
Laurent Aimar's avatar
Laurent Aimar committed
707 708
                }

709 710
                /* decimate this 8x8 block */
                i_decimate_mb += i_decimate_8x8;
711 712 713 714 715 716 717 718 719 720 721 722
                if( b_decimate )
                {
                    if( i_decimate_8x8 < 4 )
                        STORE_8x8_NNZ(i8x8,0)
                    else
                        h->mb.i_cbp_luma |= 1<<i8x8;
                }
                else if( cbp )
                {
                    h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
                    h->mb.i_cbp_luma |= 1<<i8x8;
                }
Laurent Aimar's avatar
Laurent Aimar committed
723
            }
724

725
            if( b_decimate )
726
            {
727 728 729 730 731 732 733 734 735 736 737 738 739 740
                if( i_decimate_mb < 6 )
                {
                    h->mb.i_cbp_luma = 0;
                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 0]] = 0;
                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 2]] = 0;
                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[ 8]] = 0;
                    *(uint32_t*)&h->mb.cache.non_zero_count[x264_scan8[10]] = 0;
                }
                else
                {
                    for( i8x8 = 0; i8x8 < 4; i8x8++ )
                        if( h->mb.i_cbp_luma&(1<<i8x8) )
                            h->dctf.add8x8_idct( &h->mb.pic.p_fdec[0][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
                }
741
            }
Laurent Aimar's avatar
Laurent Aimar committed
742 743 744 745 746 747 748
        }
    }

    /* encode chroma */
    if( IS_INTRA( h->mb.i_type ) )
    {
        const int i_mode = h->mb.i_chroma_pred_mode;
749 750 751 752 753 754 755
        if( h->mb.b_lossless )
            x264_predict_lossless_8x8_chroma( h, i_mode );
        else
        {
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
            h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
        }
Laurent Aimar's avatar
Laurent Aimar committed
756 757 758
    }

    /* encode the 8x8 blocks */
759
    x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
Laurent Aimar's avatar
Laurent Aimar committed
760 761 762

    if( h->param.b_cabac )
    {
763 764 765
        i_cbp_dc = h->mb.cache.non_zero_count[x264_scan8[24]]
                 | h->mb.cache.non_zero_count[x264_scan8[25]] << 1
                 | h->mb.cache.non_zero_count[x264_scan8[26]] << 2;
Laurent Aimar's avatar
Laurent Aimar committed
766 767 768 769 770 771 772 773
    }

    /* store cbp */
    h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;

    /* Check for P_SKIP
     * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
     *      (if multiple mv give same result)*/
774
    if( !b_force_no_skip )
Laurent Aimar's avatar
Laurent Aimar committed
775
    {
776
        if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
Loren Merritt's avatar
Loren Merritt committed
777
            !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
778 779
            *(uint32_t*)h->mb.cache.mv[0][x264_scan8[0]] == *(uint32_t*)h->mb.cache.pskip_mv
            && h->mb.cache.ref[0][x264_scan8[0]] == 0 )
780
        {
781
            h->mb.i_type = P_SKIP;
Laurent Aimar's avatar
Laurent Aimar committed
782
        }
783

784
        /* Check for B_SKIP */
785
        if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
786 787 788
        {
            h->mb.i_type = B_SKIP;
        }
789
    }
Laurent Aimar's avatar
Laurent Aimar committed
790 791 792
}

/*****************************************************************************
793 794
 * x264_macroblock_probe_skip:
 *  Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
Laurent Aimar's avatar
Laurent Aimar committed
795 796
 *  the previous QP
 *****************************************************************************/
797
int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
Laurent Aimar's avatar
Laurent Aimar committed
798
{
Fiona Glaser's avatar
Fiona Glaser committed
799
    DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
Loren Merritt's avatar
Loren Merritt committed
800 801
    DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
    DECLARE_ALIGNED_16( int16_t dctscan[16] );
Laurent Aimar's avatar
Laurent Aimar committed
802

803
    int i_qp = h->mb.i_qp;
Laurent Aimar's avatar
Laurent Aimar committed
804
    int mvp[2];
805
    int ch, thresh;
Laurent Aimar's avatar
Laurent Aimar committed
806 807 808 809

    int i8x8, i4x4;
    int i_decimate_mb;

810 811 812
    if( !b_bidir )
    {
        /* Get the MV */
813 814
        mvp[0] = x264_clip3( h->mb.cache.pskip_mv[0], h->mb.mv_min[0], h->mb.mv_max[0] );
        mvp[1] = x264_clip3( h->mb.cache.pskip_mv[1], h->mb.mv_min[1], h->mb.mv_max[1] );
Laurent Aimar's avatar
Laurent Aimar committed
815

816
        /* Motion compensation */
817 818
        h->mc.mc_luma( h->mb.pic.p_fdec[0],    FDEC_STRIDE,
                       h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
819
                       mvp[0], mvp[1], 16, 16 );
820
    }
Laurent Aimar's avatar
Laurent Aimar committed
821 822 823

    for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
    {
Fiona Glaser's avatar
Fiona Glaser committed
824 825 826 827 828
        int fenc_offset = (i8x8&1) * 8 + (i8x8>>1) * FENC_STRIDE * 8;
        int fdec_offset = (i8x8&1) * 8 + (i8x8>>1) * FDEC_STRIDE * 8;
        /* get luma diff */
        h->dctf.sub8x8_dct( dct4x4, h->mb.pic.p_fenc[0] + fenc_offset,
                                    h->mb.pic.p_fdec[0] + fdec_offset );
Laurent Aimar's avatar
Laurent Aimar committed
829 830 831
        /* encode one 4x4 block */
        for( i4x4 = 0; i4x4 < 4; i4x4++ )
        {
832
            if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ) )
Fiona Glaser's avatar
Fiona Glaser committed
833 834
                continue;
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
835
            i_decimate_mb += h->quantf.decimate_score16( dctscan );
Laurent Aimar's avatar
Laurent Aimar committed
836 837 838 839 840 841
            if( i_decimate_mb >= 6 )
                return 0;
        }
    }

    /* encode chroma */
842
    i_qp = h->mb.i_chroma_qp;
843
    thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
Laurent Aimar's avatar
Laurent Aimar committed
844 845 846 847 848 849

    for( ch = 0; ch < 2; ch++ )
    {
        uint8_t  *p_src = h->mb.pic.p_fenc[1+ch];
        uint8_t  *p_dst = h->mb.pic.p_fdec[1+ch];

850 851
        if( !b_bidir )
        {
852 853
            h->mc.mc_chroma( h->mb.pic.p_fdec[1+ch],       FDEC_STRIDE,
                             h->mb.pic.p_fref[0][0][4+ch], h->mb.pic.i_stride[1+ch],
854
                             mvp[0], mvp[1], 8, 8 );
855
        }
Laurent Aimar's avatar
Laurent Aimar committed
856

857 858 859 860 861
        /* there is almost never a termination during chroma, but we can't avoid the check entirely */
        /* so instead we check SSD and skip the actual check if the score is low enough. */
        if( h->pixf.ssd[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) < thresh )
            continue;

862
        h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
Laurent Aimar's avatar
Laurent Aimar committed
863 864

        /* calculate dct DC */
Fiona Glaser's avatar
Fiona Glaser committed
865
        dct2x2dc( dct2x2, dct4x4 );
866
        if( h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 ) )
Laurent Aimar's avatar
Laurent Aimar committed
867 868 869 870 871
            return 0;

        /* calculate dct coeffs */
        for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
        {
872
            if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] ) )
Fiona Glaser's avatar
Fiona Glaser committed
873
                continue;
874
            h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
875
            i_decimate_mb += h->quantf.decimate_score15( dctscan );